Skip to content

Commit f3ef09e

Browse files
[3.15] gh-150880: Normalize paths on Windows before appending wildcard (GH-152906) (GH-152963)
gh-150880: Normalize paths on Windows before appending wildcard (GH-152906) This ensures that we don't turn a "valid" path with trailing spaces into an invalid path with embedded spaces. (cherry picked from commit 1b4135a) Co-authored-by: Zain Nadeem <zainnadeemzainnadeem80@gmail.com>
1 parent 8ad11e3 commit f3ef09e

3 files changed

Lines changed: 78 additions & 29 deletions

File tree

Lib/test/test_os/test_os.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5046,6 +5046,30 @@ def test_current_directory(self):
50465046
finally:
50475047
os.chdir(old_dir)
50485048

5049+
@unittest.skipIf(sys.platform != 'win32', "Win32 specific test")
5050+
def test_windows_trailing_space_path(self):
5051+
import pathlib
5052+
5053+
filename = self.create_file("file.txt")
5054+
path = self.path + " "
5055+
5056+
self.assertTrue(os.path.exists(path))
5057+
os.stat(path)
5058+
with open(filename + " ", "rb") as file:
5059+
self.assertEqual(file.read(), b"python")
5060+
5061+
self.assertEqual(os.listdir(path), ["file.txt"])
5062+
with os.scandir(path) as entries:
5063+
self.assertEqual([entry.name for entry in entries], ["file.txt"])
5064+
pathlib_entries = list(pathlib.Path(path).iterdir())
5065+
self.assertEqual([entry.name for entry in pathlib_entries], ["file.txt"])
5066+
del pathlib_entries
5067+
5068+
extended_path = "\\\\?\\" + path
5069+
self.assertFalse(os.path.exists(extended_path))
5070+
self.assertRaises(FileNotFoundError, os.listdir, extended_path)
5071+
self.assertRaises(FileNotFoundError, os.scandir, extended_path)
5072+
50495073
def test_repr(self):
50505074
entry = self.create_file_entry()
50515075
self.assertEqual(repr(entry), "<DirEntry 'file.txt'>")
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Normalize non-extended Windows paths before appending the wildcard used by
2+
``os.listdir()`` and ``os.scandir()``, making paths with trailing spaces
3+
behave consistently with other filesystem APIs.

Modules/posixmodule.c

Lines changed: 51 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4905,42 +4905,34 @@ os_link_impl(PyObject *module, path_t *src, path_t *dst, int src_dir_fd,
49054905

49064906

49074907
#if defined(MS_WINDOWS) && !defined(HAVE_OPENDIR)
4908+
static wchar_t *
4909+
join_path_filenameW(const wchar_t *path_wide, const wchar_t *filename,
4910+
int normalize);
4911+
49084912
static PyObject *
49094913
_listdir_windows_no_opendir(path_t *path, PyObject *list)
49104914
{
49114915
PyObject *v;
49124916
HANDLE hFindFile = INVALID_HANDLE_VALUE;
49134917
BOOL result, return_bytes;
4914-
wchar_t namebuf[MAX_PATH+4]; /* Overallocate for "\*.*" */
4915-
/* only claim to have space for MAX_PATH */
4916-
Py_ssize_t len = Py_ARRAY_LENGTH(namebuf)-4;
49174918
wchar_t *wnamebuf = NULL;
49184919

49194920
WIN32_FIND_DATAW wFileData;
49204921
const wchar_t *po_wchars;
49214922

49224923
if (!path->wide) { /* Default arg: "." */
49234924
po_wchars = L".";
4924-
len = 1;
49254925
return_bytes = 0;
49264926
} else {
49274927
po_wchars = path->wide;
4928-
len = wcslen(path->wide);
49294928
return_bytes = PyBytes_Check(path->object);
49304929
}
4931-
/* The +5 is so we can append "\\*.*\0" */
4932-
wnamebuf = PyMem_New(wchar_t, len + 5);
4933-
if (!wnamebuf) {
4934-
PyErr_NoMemory();
4930+
4931+
wnamebuf = join_path_filenameW(po_wchars, L"*.*", 1);
4932+
if (wnamebuf == NULL) {
49354933
goto exit;
49364934
}
4937-
wcscpy(wnamebuf, po_wchars);
4938-
if (len > 0) {
4939-
wchar_t wch = wnamebuf[len-1];
4940-
if (wch != SEP && wch != ALTSEP && wch != L':')
4941-
wnamebuf[len++] = SEP;
4942-
wcscpy(wnamebuf + len, L"*.*");
4943-
}
4935+
49444936
if ((list = PyList_New(0)) == NULL) {
49454937
goto exit;
49464938
}
@@ -16579,13 +16571,19 @@ static PyType_Spec DirEntryType_spec = {
1657916571

1658016572
#ifdef MS_WINDOWS
1658116573

16574+
static int
16575+
is_extended_path(const wchar_t *path)
16576+
{
16577+
return wcsncmp(path, L"\\\\?\\", 4) == 0;
16578+
}
16579+
1658216580
static wchar_t *
16583-
join_path_filenameW(const wchar_t *path_wide, const wchar_t *filename)
16581+
join_path_filenameW(const wchar_t *path_wide, const wchar_t *filename,
16582+
int normalize)
1658416583
{
1658516584
Py_ssize_t path_len;
16586-
Py_ssize_t size;
1658716585
wchar_t *result;
16588-
wchar_t ch;
16586+
wchar_t *path_allocated = NULL;
1658916587

1659016588
if (!path_wide) { /* Default arg: "." */
1659116589
path_wide = L".";
@@ -16595,20 +16593,44 @@ join_path_filenameW(const wchar_t *path_wide, const wchar_t *filename)
1659516593
path_len = wcslen(path_wide);
1659616594
}
1659716595

16598-
/* The +1's are for the path separator and the NUL */
16599-
size = path_len + 1 + wcslen(filename) + 1;
16596+
if (path_len == 0) {
16597+
result = PyMem_New(wchar_t, 1);
16598+
if (result == NULL) {
16599+
PyErr_NoMemory();
16600+
return NULL;
16601+
}
16602+
result[0] = L'\0';
16603+
return result;
16604+
}
16605+
16606+
if (normalize && !is_extended_path(path_wide)) {
16607+
int err = _PyOS_getfullpathname(path_wide, &path_allocated);
16608+
if (err < 0) {
16609+
PyErr_SetFromWindowsErr(0);
16610+
return NULL;
16611+
}
16612+
if (path_allocated == NULL) {
16613+
PyErr_NoMemory();
16614+
return NULL;
16615+
}
16616+
path_wide = path_allocated;
16617+
path_len = wcslen(path_wide);
16618+
}
16619+
16620+
size_t size = (size_t)path_len + 1 + wcslen(filename) + 1;
1660016621
result = PyMem_New(wchar_t, size);
16601-
if (!result) {
16622+
if (result == NULL) {
16623+
PyMem_RawFree(path_allocated);
1660216624
PyErr_NoMemory();
1660316625
return NULL;
1660416626
}
1660516627
wcscpy(result, path_wide);
16606-
if (path_len > 0) {
16607-
ch = result[path_len - 1];
16608-
if (ch != SEP && ch != ALTSEP && ch != L':')
16609-
result[path_len++] = SEP;
16610-
wcscpy(result + path_len, filename);
16628+
wchar_t ch = result[path_len - 1];
16629+
if (ch != SEP && ch != ALTSEP && ch != L':') {
16630+
result[path_len++] = SEP;
1661116631
}
16632+
wcscpy(result + path_len, filename);
16633+
PyMem_RawFree(path_allocated);
1661216634
return result;
1661316635
}
1661416636

@@ -16640,7 +16662,7 @@ DirEntry_from_find_data(PyObject *module, path_t *path, WIN32_FIND_DATAW *dataW)
1664016662
goto error;
1664116663
}
1664216664

16643-
joined_path = join_path_filenameW(path->wide, dataW->cFileName);
16665+
joined_path = join_path_filenameW(path->wide, dataW->cFileName, 0);
1664416666
if (!joined_path)
1664516667
goto error;
1664616668

@@ -17076,7 +17098,7 @@ os_scandir_impl(PyObject *module, path_t *path)
1707617098
#ifdef MS_WINDOWS
1707717099
iterator->first_time = 1;
1707817100

17079-
path_strW = join_path_filenameW(iterator->path.wide, L"*.*");
17101+
path_strW = join_path_filenameW(iterator->path.wide, L"*.*", 1);
1708017102
if (!path_strW)
1708117103
goto error;
1708217104

0 commit comments

Comments
 (0)