Skip to content

Commit 87c85ab

Browse files
committed
Refactor get_local_part.
BUGFIX: get_local_part now correctly registers defects if encoded words are found in the local part.
1 parent c185843 commit 87c85ab

2 files changed

Lines changed: 108 additions & 85 deletions

File tree

Lib/email/_header_value_parser.py

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2081,40 +2081,55 @@ def get_obs_local_part(value, start):
20812081
obs_local_part.token_type = 'invalid-obs-local-part'
20822082
return obs_local_part, start
20832083

2084-
def get_local_part(value):
2084+
@_deprecate_old_api
2085+
def get_local_part(value, start):
20852086
""" local-part = dot-atom / quoted-string / obs-local-part
20862087
20872088
"""
20882089
local_part = LocalPart()
2089-
orig_value = value
2090+
vlen = len(value)
20902091
leader = None
2091-
if value and value[0] in CFWS_LEADER:
2092-
leader, value = get_cfws(value)
2093-
if not value:
2092+
if start < vlen and value[start] in CFWS_LEADER:
2093+
leader, start = get_cfws(value, start)
2094+
text_start = start
2095+
if start >= vlen:
20942096
raise errors.HeaderParseError(
20952097
"expected local-part but found '{}'".format(value))
20962098
try:
2097-
token, value = get_dot_atom(value)
2099+
token, start = get_dot_atom(value, start)
20982100
except errors.HeaderParseError:
20992101
try:
2100-
token, value = get_word(value)
2102+
token, start = get_word(value, start)
21012103
except errors.HeaderParseError:
2102-
if value[0] != '\\' and value[0] in PHRASE_ENDS:
2104+
if value[start] != '\\' and value[start] in PHRASE_ENDS:
2105+
# XXX XXX should this be a separate message mentioning
2106+
# both dot atom and word?
21032107
raise
21042108
token = TokenList()
2105-
if leader is not None:
2106-
token[:0] = [leader]
2107-
local_part.append(token)
2108-
if value and (value[0]=='\\' or value[0] not in PHRASE_ENDS):
2109-
obs_local_part, value = get_obs_local_part(orig_value)
2110-
if obs_local_part.token_type == 'invalid-obs-local-part':
2109+
if start < vlen and (value[start]=='\\' or value[start] not in PHRASE_ENDS):
2110+
# Even if we started with valid text there is more, so start over as obs
2111+
token, start = get_obs_local_part(value, text_start)
2112+
if token.token_type == 'invalid-obs-local-part':
21112113
local_part.defects.append(errors.InvalidHeaderDefect(
21122114
"local-part is not dot-atom, quoted-string, or obs-local-part"))
21132115
else:
2114-
local_part.defects.append(errors.ObsoleteHeaderDefect(
2115-
"local-part is not a dot-atom (contains CFWS)"))
2116-
local_part[0] = obs_local_part
2117-
return local_part, value
2116+
local_part.defects.append(
2117+
errors.ObsoleteHeaderDefect(
2118+
"local-part is not a valid dot-atom"
2119+
" (it contains internal CFWS)"
2120+
)
2121+
)
2122+
if leader is not None:
2123+
token.push(leader)
2124+
local_part.append(token)
2125+
if local_part.ew_indexes:
2126+
# XXX some day we'll put each index into its own defect.
2127+
local_part.defects.extend(
2128+
[
2129+
errors.InvalidHeaderDefect('encoded-word in local-part'),
2130+
] * len(local_part.ew_indexes)
2131+
)
2132+
return local_part, start
21182133

21192134
def get_dtext(value):
21202135
r""" dtext = <printable ascii except \ [ ]> / obs-dtext

Lib/test/test_email/test__header_value_parser.py

Lines changed: 75 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def charset_defect(chars):
153153

154154
non_dot_atom_local_part_obs_defect = (
155155
errors.ObsoleteHeaderDefect,
156-
r'local-part is not a dot-atom \(contains CFWS\)',
156+
r'local-part is not a valid dot-atom \(it contains internal CFWS\)',
157157
)
158158

159159
not_even_obs_local_part_defect = (
@@ -186,6 +186,11 @@ def charset_defect(chars):
186186
r"'\\' character outside of quoted-string/ccontent",
187187
)
188188

189+
ew_in_local_part_defect = (
190+
errors.InvalidHeaderDefect,
191+
'encoded-word in local-part',
192+
)
193+
189194
# ---> End Defect Expectations
190195

191196

@@ -4151,6 +4156,12 @@ def test_get_local_part(self, s, *args, local_part=None, **kw):
41514156
)
41524157
self.assertEqual(lp.local_part, local_part)
41534158

4159+
@params_map
4160+
def add_ew_defects(*args, ew_indexes=[], defects=[], **kw):
4161+
if ew_indexes:
4162+
defects = defects + [ew_in_local_part_defect] * len(ew_indexes)
4163+
yield '', C(*args, ew_indexes=ew_indexes, defects=defects, **kw)
4164+
41544165
@params_map(with_namelist=True)
41554166
def adapt_get_dot_atom_tests_for_get_local_part(nl, s, *args, **kw):
41564167
r = kw.get('remainder')
@@ -4165,10 +4176,6 @@ def adapt_get_dot_atom_tests_for_get_local_part(nl, s, *args, **kw):
41654176
# For those two ew tests the blank comes from inside the ew.
41664177
local_part = local_part.removeprefix(' ').removesuffix(' ')
41674178
kw['local_part'] = local_part
4168-
# XXX XXX indexes won't be right mid-refactor, remove when
4169-
# get_local_part refactored.
4170-
if 'ew_indexes' in kw:
4171-
kw['ew_indexes'] = ...
41724179
yield '', C(s, *args, **kw)
41734180

41744181
@params_map
@@ -4177,10 +4184,6 @@ def adapt_get_quoted_string_tests_for_get_local_part(*args, **kw):
41774184
kw['value'] = kw.pop('quoted_value')
41784185
if 'exception' not in kw:
41794186
kw['local_part'] = kw.pop('content')
4180-
# XXX XXX indexes won't be right mid-refactor, remove when
4181-
# get_local_part refactored.
4182-
if 'ew_indexes' in kw:
4183-
kw['ew_indexes'] = ...
41844187
yield '', C(*args, **kw)
41854188

41864189
@params_map
@@ -4202,75 +4205,79 @@ def adapt_get_obs_local_part_tests_for_get_local_part(
42024205
defects.append(not_even_obs_local_part_defect)
42034206
else:
42044207
defects.append(non_dot_atom_local_part_obs_defect)
4205-
# XXX XXX indexes won't be right mid-refactor, remove when
4206-
# get_local_part refactored.
4207-
if 'ew_indexes' in kw:
4208-
kw['ew_indexes'] = ...
42094208
yield '', C(*args, defects=defects, **kw)
42104209

4211-
params_test_get_local_part = old_api_only(
4210+
params_test_get_local_part = for_each_api(
42124211

42134212
# An RFC compliant local part can be a dot atom or a quoted string, so
42144213
# it should pass some of the tests for those.
42154214

4216-
adapt_get_dot_atom_tests_for_get_local_part(
4217-
include_unless(
4218-
lambda n, *a, **k:
4219-
n.has_any(
4220-
# Get local part handles multiple atoms.
4221-
'two_ew_two_atoms',
4222-
'atom_ends_at_noncfws',
4223-
# There are some things get_dot_atom raises for that
4224-
# get_local_part treats as obs-local-part.
4225-
'two_dots_raises',
4226-
'trailing_dot_raises',
4227-
'space_ends_dot_atom',
4228-
# XXX XXX These need a logic fix to whitespace handling
4229-
# in get_local_part itself.
4230-
'ew_and_comments_no_ws',
4231-
'ew_and_empty_comments_no_ws',
4232-
)
4233-
or
4234-
# get_local_part handles quoted strings (tested above),
4235-
# and leading dots or \ are handled as obs-local-part.
4215+
add_ew_defects(
4216+
adapt_get_dot_atom_tests_for_get_local_part(
4217+
include_unless(
4218+
lambda n, *a, **k:
42364219
n.has_any(
4237-
'up_to_special',
4238-
'leading_special_raises',
4239-
'no_atom_before_special',
4240-
'no_atext_before_special_or_wsp',
4241-
'atom_ends_at_special',
4242-
'ends_at_special_after_comment',
4243-
'ends_at_special',
4220+
# Get local part handles multiple atoms.
4221+
'two_ew_two_atoms',
4222+
'atom_ends_at_noncfws',
4223+
# There are some things get_dot_atom raises for
4224+
# that get_local_part treats as obs-local-part.
4225+
'two_dots_raises',
4226+
'trailing_dot_raises',
4227+
'space_ends_dot_atom',
4228+
# XXX XXX These need a logic fix to whitespace
4229+
# handling in get_local_part itself.
4230+
'ew_and_comments_no_ws',
4231+
'ew_and_empty_comments_no_ws',
42444232
)
4245-
and n.has_any(
4246-
'reverse_solidus',
4247-
'quotation_mark',
4248-
'full_stop',
4249-
),
4250-
label='from_test_get_dot_atom',
4251-
)(params_test_get_dot_atom),
4233+
or
4234+
# get_local_part handles quoted strings (tested
4235+
# above), and leading dots or \ are handled as
4236+
# obs-local-part.
4237+
n.has_any(
4238+
'up_to_special',
4239+
'leading_special_raises',
4240+
'no_atom_before_special',
4241+
'no_atext_before_special_or_wsp',
4242+
'atom_ends_at_special',
4243+
'ends_at_special_after_comment',
4244+
'ends_at_special',
4245+
)
4246+
and n.has_any(
4247+
'reverse_solidus',
4248+
'quotation_mark',
4249+
'full_stop',
4250+
),
4251+
label='from_test_get_dot_atom',
4252+
)(params_test_get_dot_atom),
4253+
),
42524254
),
42534255

4254-
adapt_get_quoted_string_tests_for_get_local_part(
4255-
include_unless(
4256-
lambda n, *a, **k: n.has_any(
4257-
# These tests have an atom first; get_quoted_string raises,
4258-
# but get_local_part parses the atom. Atoms are tested above.
4259-
'no_quoted_string',
4260-
'no_leading_dquote_before_non_ws',
4261-
# A local part only ends at specials other than " and .
4262-
'qs_ends_at_noncfws',
4263-
'ew_after_dquote',
4264-
'encoded_word_after_dquote_with_no_ws',
4265-
'end_dquote_mid_word',
4266-
),
4267-
label='from_test_get_quoted_string',
4268-
)(params_test_get_quoted_string),
4256+
add_ew_defects(
4257+
adapt_get_quoted_string_tests_for_get_local_part(
4258+
include_unless(
4259+
lambda n, *a, **k: n.has_any(
4260+
# These tests have an atom first; get_quoted_string
4261+
# raises, but get_local_part parses the atom. Atoms
4262+
# are tested above.
4263+
'no_quoted_string',
4264+
'no_leading_dquote_before_non_ws',
4265+
# A local part only ends at specials other than " and .
4266+
'qs_ends_at_noncfws',
4267+
'ew_after_dquote',
4268+
'encoded_word_after_dquote_with_no_ws',
4269+
'end_dquote_mid_word',
4270+
),
4271+
label='from_test_get_quoted_string',
4272+
)(params_test_get_quoted_string),
4273+
),
42694274
),
42704275

4271-
add_label('from_test_get_obs_local_part')(
4272-
adapt_get_obs_local_part_tests_for_get_local_part(
4273-
params_test_get_obs_local_part,
4276+
add_ew_defects(
4277+
add_label('from_test_get_obs_local_part')(
4278+
adapt_get_obs_local_part_tests_for_get_local_part(
4279+
params_test_get_obs_local_part,
4280+
),
42744281
),
42754282
),
42764283

@@ -4357,7 +4364,7 @@ def adapt_get_obs_local_part_tests_for_get_local_part(
43574364
# XXX XXX there should be exactly one missing whitespace here,
43584365
# but it will change until we refactor get_local_part.
43594366
#missing_whitespace_after_ew_defect,
4360-
# XXX XXX there should be a defect for there being an EW at all.
4367+
ew_in_local_part_defect,
43614368
],
43624369
local_part='exámple',
43634370
ew_indexes=[0],
@@ -4376,6 +4383,7 @@ def adapt_get_obs_local_part_tests_for_get_local_part(
43764383
missing_whitespace_before_ew_defect,
43774384
missing_whitespace_after_ew_defect,
43784385
# XXX XXX There should also be an ew in local part defect.
4386+
*[ew_in_local_part_defect]*2,
43794387
],
43804388
ew_indexes=[0, 17],
43814389
),

0 commit comments

Comments
 (0)