Skip to content

Commit e9fd219

Browse files
[3.14] gh-150880: Normalize paths on Windows before appending wildcard (GH-152906) (GH-152964)
gh-150880: Normalize paths on Windows before appending wildcard (GH-152906) This ensures that we don't turn a "valid" path with trailing spaces into an invalid path with embedded spaces. (cherry picked from commit 1b4135a) Co-authored-by: Zain Nadeem <zainnadeemzainnadeem80@gmail.com>
1 parent 45f7ac0 commit e9fd219

3 files changed

Lines changed: 78 additions & 29 deletions

File tree

Lib/test/test_os.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5284,6 +5284,30 @@ def test_current_directory(self):
52845284
finally:
52855285
os.chdir(old_dir)
52865286

5287+
@unittest.skipIf(sys.platform != 'win32', "Win32 specific test")
5288+
def test_windows_trailing_space_path(self):
5289+
import pathlib
5290+
5291+
filename = self.create_file("file.txt")
5292+
path = self.path + " "
5293+
5294+
self.assertTrue(os.path.exists(path))
5295+
os.stat(path)
5296+
with open(filename + " ", "rb") as file:
5297+
self.assertEqual(file.read(), b"python")
5298+
5299+
self.assertEqual(os.listdir(path), ["file.txt"])
5300+
with os.scandir(path) as entries:
5301+
self.assertEqual([entry.name for entry in entries], ["file.txt"])
5302+
pathlib_entries = list(pathlib.Path(path).iterdir())
5303+
self.assertEqual([entry.name for entry in pathlib_entries], ["file.txt"])
5304+
del pathlib_entries
5305+
5306+
extended_path = "\\\\?\\" + path
5307+
self.assertFalse(os.path.exists(extended_path))
5308+
self.assertRaises(FileNotFoundError, os.listdir, extended_path)
5309+
self.assertRaises(FileNotFoundError, os.scandir, extended_path)
5310+
52875311
def test_repr(self):
52885312
entry = self.create_file_entry()
52895313
self.assertEqual(repr(entry), "<DirEntry 'file.txt'>")
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Normalize non-extended Windows paths before appending the wildcard used by
2+
``os.listdir()`` and ``os.scandir()``, making paths with trailing spaces
3+
behave consistently with other filesystem APIs.

Modules/posixmodule.c

Lines changed: 51 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4458,42 +4458,34 @@ os_link_impl(PyObject *module, path_t *src, path_t *dst, int src_dir_fd,
44584458

44594459

44604460
#if defined(MS_WINDOWS) && !defined(HAVE_OPENDIR)
4461+
static wchar_t *
4462+
join_path_filenameW(const wchar_t *path_wide, const wchar_t *filename,
4463+
int normalize);
4464+
44614465
static PyObject *
44624466
_listdir_windows_no_opendir(path_t *path, PyObject *list)
44634467
{
44644468
PyObject *v;
44654469
HANDLE hFindFile = INVALID_HANDLE_VALUE;
44664470
BOOL result, return_bytes;
4467-
wchar_t namebuf[MAX_PATH+4]; /* Overallocate for "\*.*" */
4468-
/* only claim to have space for MAX_PATH */
4469-
Py_ssize_t len = Py_ARRAY_LENGTH(namebuf)-4;
44704471
wchar_t *wnamebuf = NULL;
44714472

44724473
WIN32_FIND_DATAW wFileData;
44734474
const wchar_t *po_wchars;
44744475

44754476
if (!path->wide) { /* Default arg: "." */
44764477
po_wchars = L".";
4477-
len = 1;
44784478
return_bytes = 0;
44794479
} else {
44804480
po_wchars = path->wide;
4481-
len = wcslen(path->wide);
44824481
return_bytes = PyBytes_Check(path->object);
44834482
}
4484-
/* The +5 is so we can append "\\*.*\0" */
4485-
wnamebuf = PyMem_New(wchar_t, len + 5);
4486-
if (!wnamebuf) {
4487-
PyErr_NoMemory();
4483+
4484+
wnamebuf = join_path_filenameW(po_wchars, L"*.*", 1);
4485+
if (wnamebuf == NULL) {
44884486
goto exit;
44894487
}
4490-
wcscpy(wnamebuf, po_wchars);
4491-
if (len > 0) {
4492-
wchar_t wch = wnamebuf[len-1];
4493-
if (wch != SEP && wch != ALTSEP && wch != L':')
4494-
wnamebuf[len++] = SEP;
4495-
wcscpy(wnamebuf + len, L"*.*");
4496-
}
4488+
44974489
if ((list = PyList_New(0)) == NULL) {
44984490
goto exit;
44994491
}
@@ -16082,13 +16074,19 @@ static PyType_Spec DirEntryType_spec = {
1608216074

1608316075
#ifdef MS_WINDOWS
1608416076

16077+
static int
16078+
is_extended_path(const wchar_t *path)
16079+
{
16080+
return wcsncmp(path, L"\\\\?\\", 4) == 0;
16081+
}
16082+
1608516083
static wchar_t *
16086-
join_path_filenameW(const wchar_t *path_wide, const wchar_t *filename)
16084+
join_path_filenameW(const wchar_t *path_wide, const wchar_t *filename,
16085+
int normalize)
1608716086
{
1608816087
Py_ssize_t path_len;
16089-
Py_ssize_t size;
1609016088
wchar_t *result;
16091-
wchar_t ch;
16089+
wchar_t *path_allocated = NULL;
1609216090

1609316091
if (!path_wide) { /* Default arg: "." */
1609416092
path_wide = L".";
@@ -16098,20 +16096,44 @@ join_path_filenameW(const wchar_t *path_wide, const wchar_t *filename)
1609816096
path_len = wcslen(path_wide);
1609916097
}
1610016098

16101-
/* The +1's are for the path separator and the NUL */
16102-
size = path_len + 1 + wcslen(filename) + 1;
16099+
if (path_len == 0) {
16100+
result = PyMem_New(wchar_t, 1);
16101+
if (result == NULL) {
16102+
PyErr_NoMemory();
16103+
return NULL;
16104+
}
16105+
result[0] = L'\0';
16106+
return result;
16107+
}
16108+
16109+
if (normalize && !is_extended_path(path_wide)) {
16110+
int err = _PyOS_getfullpathname(path_wide, &path_allocated);
16111+
if (err < 0) {
16112+
PyErr_SetFromWindowsErr(0);
16113+
return NULL;
16114+
}
16115+
if (path_allocated == NULL) {
16116+
PyErr_NoMemory();
16117+
return NULL;
16118+
}
16119+
path_wide = path_allocated;
16120+
path_len = wcslen(path_wide);
16121+
}
16122+
16123+
size_t size = (size_t)path_len + 1 + wcslen(filename) + 1;
1610316124
result = PyMem_New(wchar_t, size);
16104-
if (!result) {
16125+
if (result == NULL) {
16126+
PyMem_RawFree(path_allocated);
1610516127
PyErr_NoMemory();
1610616128
return NULL;
1610716129
}
1610816130
wcscpy(result, path_wide);
16109-
if (path_len > 0) {
16110-
ch = result[path_len - 1];
16111-
if (ch != SEP && ch != ALTSEP && ch != L':')
16112-
result[path_len++] = SEP;
16113-
wcscpy(result + path_len, filename);
16131+
wchar_t ch = result[path_len - 1];
16132+
if (ch != SEP && ch != ALTSEP && ch != L':') {
16133+
result[path_len++] = SEP;
1611416134
}
16135+
wcscpy(result + path_len, filename);
16136+
PyMem_RawFree(path_allocated);
1611516137
return result;
1611616138
}
1611716139

@@ -16143,7 +16165,7 @@ DirEntry_from_find_data(PyObject *module, path_t *path, WIN32_FIND_DATAW *dataW)
1614316165
goto error;
1614416166
}
1614516167

16146-
joined_path = join_path_filenameW(path->wide, dataW->cFileName);
16168+
joined_path = join_path_filenameW(path->wide, dataW->cFileName, 0);
1614716169
if (!joined_path)
1614816170
goto error;
1614916171

@@ -16575,7 +16597,7 @@ os_scandir_impl(PyObject *module, path_t *path)
1657516597
#ifdef MS_WINDOWS
1657616598
iterator->first_time = 1;
1657716599

16578-
path_strW = join_path_filenameW(iterator->path.wide, L"*.*");
16600+
path_strW = join_path_filenameW(iterator->path.wide, L"*.*", 1);
1657916601
if (!path_strW)
1658016602
goto error;
1658116603

0 commit comments

Comments
 (0)