From 6c5ed4d8625cbb7df485bb8640ef9f68e62fc3b8 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 28 Jun 2026 09:55:34 +0300 Subject: [PATCH 1/2] gh-85320: Use UTF-8 for IDLE configuration and breakpoint files They were read and written using the locale encoding, which could corrupt non-ASCII paths and made them non-portable. Co-Authored-By: Claude Opus 4.8 --- Lib/idlelib/News3.txt | 3 +++ Lib/idlelib/config.py | 9 +++++---- Lib/idlelib/pyshell.py | 8 +++++--- .../IDLE/2026-06-28-06-46-46.gh-issue-85320.Hq2vKn.rst | 4 ++++ 4 files changed, 17 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/IDLE/2026-06-28-06-46-46.gh-issue-85320.Hq2vKn.rst diff --git a/Lib/idlelib/News3.txt b/Lib/idlelib/News3.txt index 0f61da8368f211..5e17d93c134f64 100644 --- a/Lib/idlelib/News3.txt +++ b/Lib/idlelib/News3.txt @@ -4,6 +4,9 @@ Released on 2026-10-01 ========================= +gh-85320: IDLE now reads and writes its configuration files and the +breakpoints file using UTF-8 instead of the locale encoding. + gh-143774: Better explain the operation of Format / Format Paragraph. Patch by Terry J. Reedy. diff --git a/Lib/idlelib/config.py b/Lib/idlelib/config.py index 1cabe479450015..82afd6c49269d2 100644 --- a/Lib/idlelib/config.py +++ b/Lib/idlelib/config.py @@ -73,8 +73,9 @@ def GetOptionList(self, section): def Load(self): "Load the configuration file from disk." - if self.file: - self.read(self.file) + if self.file and os.path.exists(self.file): + with open(self.file, encoding='utf-8', errors='replace') as f: + self.read_file(f) class IdleUserConfParser(IdleConfParser): """ @@ -133,10 +134,10 @@ def Save(self): if fname and fname[0] != '#': if not self.IsEmpty(): try: - cfgFile = open(fname, 'w') + cfgFile = open(fname, 'w', encoding='utf-8') except OSError: os.unlink(fname) - cfgFile = open(fname, 'w') + cfgFile = open(fname, 'w', encoding='utf-8') with cfgFile: self.write(cfgFile) elif os.path.exists(self.file): diff --git a/Lib/idlelib/pyshell.py b/Lib/idlelib/pyshell.py index b80c8e56c92810..b1662491935e4a 100755 --- a/Lib/idlelib/pyshell.py +++ b/Lib/idlelib/pyshell.py @@ -242,12 +242,13 @@ def store_file_breaks(self): breaks = self.breakpoints filename = self.io.filename try: - with open(self.breakpointPath) as fp: + with open(self.breakpointPath, + encoding='utf-8', errors='replace') as fp: lines = fp.readlines() except OSError: lines = [] try: - with open(self.breakpointPath, "w") as new_file: + with open(self.breakpointPath, "w", encoding='utf-8') as new_file: for line in lines: if not line.startswith(filename + '='): new_file.write(line) @@ -272,7 +273,8 @@ def restore_file_breaks(self): if filename is None: return if os.path.isfile(self.breakpointPath): - with open(self.breakpointPath) as fp: + with open(self.breakpointPath, + encoding='utf-8', errors='replace') as fp: lines = fp.readlines() for line in lines: if line.startswith(filename + '='): diff --git a/Misc/NEWS.d/next/IDLE/2026-06-28-06-46-46.gh-issue-85320.Hq2vKn.rst b/Misc/NEWS.d/next/IDLE/2026-06-28-06-46-46.gh-issue-85320.Hq2vKn.rst new file mode 100644 index 00000000000000..ec34bf8ead9eb9 --- /dev/null +++ b/Misc/NEWS.d/next/IDLE/2026-06-28-06-46-46.gh-issue-85320.Hq2vKn.rst @@ -0,0 +1,4 @@ +IDLE now reads and writes its configuration files and the breakpoints file +using UTF-8 instead of the locale encoding. This keeps non-ASCII data (such +as non-ASCII paths) from being corrupted and makes the files portable between +environments. From eb54eaf41a8396286d64d3f3238d61f0f9d62f33 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 29 Jun 2026 10:47:29 +0300 Subject: [PATCH 2/2] Update Lib/idlelib/News3.txt Co-authored-by: Terry Jan Reedy --- Lib/idlelib/News3.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/idlelib/News3.txt b/Lib/idlelib/News3.txt index 5e17d93c134f64..97becb858fea33 100644 --- a/Lib/idlelib/News3.txt +++ b/Lib/idlelib/News3.txt @@ -6,6 +6,8 @@ Released on 2026-10-01 gh-85320: IDLE now reads and writes its configuration files and the breakpoints file using UTF-8 instead of the locale encoding. +Files with non-ASCII characters and non-UTF-8 encoding may need +to be opened in an editor and resaved with UTF-8 encoding. gh-143774: Better explain the operation of Format / Format Paragraph. Patch by Terry J. Reedy.