Skip to content

Commit 82d5141

Browse files
committed
[3.10] bpo-46521: Fix codeop to use a new partial-input mode of the parser (GH-31010).
(cherry picked from commit 69e1097) Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
1 parent cbdcae5 commit 82d5141

7 files changed

Lines changed: 52 additions & 54 deletions

File tree

Include/cpython/compile.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
#define PyCF_IGNORE_COOKIE 0x0800
1919
#define PyCF_TYPE_COMMENTS 0x1000
2020
#define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000
21+
#define PyCF_ALLOW_INCOMPLETE_INPUT 0x4000
2122
#define PyCF_COMPILE_MASK (PyCF_ONLY_AST | PyCF_ALLOW_TOP_LEVEL_AWAIT | \
22-
PyCF_TYPE_COMMENTS | PyCF_DONT_IMPLY_DEDENT)
23+
PyCF_TYPE_COMMENTS | PyCF_DONT_IMPLY_DEDENT | \
24+
PyCF_ALLOW_INCOMPLETE_INPUT)
2325

2426
typedef struct {
2527
int cf_flags; /* bitmask of CO_xxx flags relevant to future */

Include/errcode.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ extern "C" {
2626
#define E_TOODEEP 20 /* Too many indentation levels */
2727
#define E_DEDENT 21 /* No matching outer block for dedent */
2828
#define E_DECODE 22 /* Error in decoding into Unicode */
29+
#define E_EOFS 23 /* EOF in triple-quoted string */
30+
#define E_EOLS 24 /* EOL in single-quoted string */
2931
#define E_LINECONT 25 /* Unexpected characters after a line continuation */
3032
#define E_BADSINGLE 27 /* Ill-formed single statement input */
3133
#define E_INTERACT_STOP 28 /* Interactive mode stopped tokenization */

Lib/codeop.py

Lines changed: 11 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -10,30 +10,6 @@
1010
syntax error (OverflowError and ValueError can be produced by
1111
malformed literals).
1212
13-
Approach:
14-
15-
First, check if the source consists entirely of blank lines and
16-
comments; if so, replace it with 'pass', because the built-in
17-
parser doesn't always do the right thing for these.
18-
19-
Compile three times: as is, with \n, and with \n\n appended. If it
20-
compiles as is, it's complete. If it compiles with one \n appended,
21-
we expect more. If it doesn't compile either way, we compare the
22-
error we get when compiling with \n or \n\n appended. If the errors
23-
are the same, the code is broken. But if the errors are different, we
24-
expect more. Not intuitive; not even guaranteed to hold in future
25-
releases; but this matches the compiler's behavior from Python 1.4
26-
through 2.2, at least.
27-
28-
Caveat:
29-
30-
It is possible (but not likely) that the parser stops parsing with a
31-
successful outcome before reaching the end of the source; in this
32-
case, trailing symbols may be ignored instead of causing an error.
33-
For example, a backslash followed by two newlines may be followed by
34-
arbitrary garbage. This will be fixed once the API for the parser is
35-
better.
36-
3713
The two interfaces are:
3814
3915
compile_command(source, filename, symbol):
@@ -64,7 +40,11 @@
6440

6541
__all__ = ["compile_command", "Compile", "CommandCompiler"]
6642

67-
PyCF_DONT_IMPLY_DEDENT = 0x200 # Matches pythonrun.h.
43+
# The following flags match the values from Include/cpython/compile.h
44+
# Caveat emptor: These flags are undocumented on purpose and depending
45+
# on their effect outside the standard library is **unsupported**.
46+
PyCF_DONT_IMPLY_DEDENT = 0x200
47+
PyCF_ALLOW_INCOMPLETE_INPUT = 0x4000
6848

6949
def _maybe_compile(compiler, source, filename, symbol):
7050
# Check for source consisting of only blank lines and comments.
@@ -86,24 +66,12 @@ def _maybe_compile(compiler, source, filename, symbol):
8666
with warnings.catch_warnings():
8767
warnings.simplefilter("error")
8868

89-
code1 = err1 = err2 = None
90-
try:
91-
code1 = compiler(source + "\n", filename, symbol)
92-
except SyntaxError as e:
93-
err1 = e
94-
9569
try:
96-
code2 = compiler(source + "\n\n", filename, symbol)
70+
compiler(source + "\n", filename, symbol)
9771
except SyntaxError as e:
98-
err2 = e
99-
100-
try:
101-
if not code1 and _is_syntax_error(err1, err2):
102-
raise err1
103-
else:
104-
return None
105-
finally:
106-
err1 = err2 = None
72+
if "incomplete input" in str(e):
73+
return None
74+
raise
10775

10876
def _is_syntax_error(err1, err2):
10977
rep1 = repr(err1)
@@ -115,7 +83,7 @@ def _is_syntax_error(err1, err2):
11583
return False
11684

11785
def _compile(source, filename, symbol):
118-
return compile(source, filename, symbol, PyCF_DONT_IMPLY_DEDENT)
86+
return compile(source, filename, symbol, PyCF_DONT_IMPLY_DEDENT | PyCF_ALLOW_INCOMPLETE_INPUT)
11987

12088
def compile_command(source, filename="<input>", symbol="single"):
12189
r"""Compile a command and determine whether it is incomplete.
@@ -144,7 +112,7 @@ class Compile:
144112
statement, it "remembers" and compiles all subsequent program texts
145113
with the statement in force."""
146114
def __init__(self):
147-
self.flags = PyCF_DONT_IMPLY_DEDENT
115+
self.flags = PyCF_DONT_IMPLY_DEDENT | PyCF_ALLOW_INCOMPLETE_INPUT
148116

149117
def __call__(self, source, filename, symbol):
150118
codeob = compile(source, filename, symbol, self.flags, True)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix a bug in the :mod:`codeop` module that was incorrectly identifying
2+
invalid code involving string quotes as valid code.

Parser/pegen.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1202,6 +1202,9 @@ compute_parser_flags(PyCompilerFlags *flags)
12021202
if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) {
12031203
parser_flags |= PyPARSE_ASYNC_HACKS;
12041204
}
1205+
if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) {
1206+
parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT;
1207+
}
12051208
return parser_flags;
12061209
}
12071210

@@ -1327,15 +1330,29 @@ _PyPegen_check_tokenizer_errors(Parser *p) {
13271330
return ret;
13281331
}
13291332

1333+
1334+
static inline int
1335+
_is_end_of_source(Parser *p) {
1336+
int err = p->tok->done;
1337+
return err == E_EOF || err == E_EOFS || err == E_EOLS;
1338+
}
1339+
13301340
void *
13311341
_PyPegen_run_parser(Parser *p)
13321342
{
13331343
void *res = _PyPegen_parse(p);
13341344
assert(p->level == 0);
13351345
if (res == NULL) {
1346+
if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) && _is_end_of_source(p)) {
1347+
PyErr_Clear();
1348+
return RAISE_SYNTAX_ERROR("incomplete input");
1349+
}
13361350
if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) {
13371351
return NULL;
13381352
}
1353+
// Make a second parser pass. In this pass we activate heavier and slower checks
1354+
// to produce better error messages and more complete diagnostics. Extra "invalid_*"
1355+
// rules will be active during parsing.
13391356
Token *last_token = p->tokens[p->fill - 1];
13401357
reset_parser_state(p);
13411358
_PyPegen_parse(p);

Parser/pegen.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#define PyPARSE_BARRY_AS_BDFL 0x0020
2323
#define PyPARSE_TYPE_COMMENTS 0x0040
2424
#define PyPARSE_ASYNC_HACKS 0x0080
25+
#define PyPARSE_ALLOW_INCOMPLETE_INPUT 0x0100
2526

2627
typedef struct _memo {
2728
int type;

Parser/tokenizer.c

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
static struct tok_state *tok_new(void);
4040
static int tok_nextc(struct tok_state *tok);
4141
static void tok_backup(struct tok_state *tok, int c);
42-
42+
static int syntaxerror(struct tok_state *tok, const char *format, ...);
4343

4444
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
4545
tokenizing. */
@@ -1030,8 +1030,9 @@ tok_nextc(struct tok_state *tok)
10301030
if (tok->cur != tok->inp) {
10311031
return Py_CHARMASK(*tok->cur++); /* Fast path */
10321032
}
1033-
if (tok->done != E_OK)
1034-
return EOF;
1033+
if (tok->done != E_OK) {
1034+
return EOF;
1035+
}
10351036
if (tok->fp == NULL) {
10361037
rc = tok_underflow_string(tok);
10371038
}
@@ -1963,16 +1964,21 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
19631964
tok->line_start = tok->multi_line_start;
19641965
int start = tok->lineno;
19651966
tok->lineno = tok->first_lineno;
1966-
19671967
if (quote_size == 3) {
1968-
return syntaxerror(tok,
1969-
"unterminated triple-quoted string literal"
1970-
" (detected at line %d)", start);
1968+
syntaxerror(tok, "unterminated triple-quoted string literal"
1969+
" (detected at line %d)", start);
1970+
if (c != '\n') {
1971+
tok->done = E_EOFS;
1972+
}
1973+
return ERRORTOKEN;
19711974
}
19721975
else {
1973-
return syntaxerror(tok,
1974-
"unterminated string literal (detected at"
1975-
" line %d)", start);
1976+
syntaxerror(tok, "unterminated string literal (detected at"
1977+
" line %d)", start);
1978+
if (c != '\n') {
1979+
tok->done = E_EOLS;
1980+
}
1981+
return ERRORTOKEN;
19761982
}
19771983
}
19781984
if (c == quote) {

0 commit comments

Comments
 (0)