Skip to content

Commit c185843

Browse files
committed
Refactor get_obs_local_part.
Along the way I've tidied up the 'dot' defects to all have the same format, and use 'local-part' instead of 'local part' to be consistent with the usage in other defects.
1 parent 2a5b64e commit c185843

2 files changed

Lines changed: 38 additions & 25 deletions

File tree

Lib/email/_header_value_parser.py

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2013,39 +2013,54 @@ def get_phrase(value, start):
20132013
" raise an error in the future."
20142014
)
20152015

2016-
def get_obs_local_part(value):
2016+
@_deprecate_old_api
2017+
def get_obs_local_part(value, start):
20172018
""" obs-local-part = word *("." word)
2019+
2020+
Return an ObsLocalPart containing a list of words and DOTs containing
2021+
all of the characters up to the next character not allowed in a phrase or
2022+
the end of the value, and a pointer to the SPECIAL or the len of value.
2023+
2024+
Decode any encoded words, registering a defect if any are found.
2025+
Missing whitespace defects may also be registered.
2026+
2027+
Register defects if there are any non-printable or invalid characters in
2028+
the non-whitespace tokens.
2029+
20182030
"""
20192031
obs_local_part = ObsLocalPart()
2032+
vlen = len(value)
20202033
last_non_ws_was_dot = False
2021-
while value and (value[0]=='\\' or value[0] not in PHRASE_ENDS):
2022-
if value[0] == '.':
2034+
while start < vlen and ((c := value[start]) == '\\' or c not in PHRASE_ENDS):
2035+
if c == '.':
20232036
if last_non_ws_was_dot:
20242037
obs_local_part.defects.append(errors.InvalidHeaderDefect(
2025-
"invalid repeated '.'"))
2038+
"invalid repeated '.' in local-part")
2039+
)
20262040
obs_local_part.append(DOT)
20272041
last_non_ws_was_dot = True
2028-
value = value[1:]
2042+
start += 1
20292043
continue
2030-
elif value[0]=='\\':
2044+
elif c == '\\':
20312045
# RFC 5322 doesn't allow \, but the old email code parsed it.
2032-
obs_local_part.append(ValueTerminal(value[0],
2033-
'misplaced-special'))
2034-
value = value[1:]
2046+
obs_local_part.append(ValueTerminal(c,'misplaced-special'))
2047+
start += 1
20352048
obs_local_part.defects.append(errors.InvalidHeaderDefect(
20362049
"'\\' character outside of quoted-string/ccontent"))
20372050
last_non_ws_was_dot = False
20382051
continue
20392052
if obs_local_part and obs_local_part[-1].token_type != 'dot':
2040-
obs_local_part.defects.append(errors.InvalidHeaderDefect(
2041-
"missing '.' between words"))
2053+
obs_local_part.defects.append(
2054+
errors.InvalidHeaderDefect("missing '.' between words"),
2055+
)
20422056
try:
2043-
token, value = get_word(value)
2057+
token, start = get_word(value, start)
20442058
last_non_ws_was_dot = False
20452059
except errors.HeaderParseError:
2046-
if value[0] not in CFWS_LEADER:
2060+
if value[start] not in CFWS_LEADER:
20472061
raise
2048-
token, value = get_cfws(value)
2062+
# There will be a 'dot' defect; no need for no-word defect here.
2063+
token, start = get_cfws(value, start)
20492064
obs_local_part.append(token)
20502065
if not obs_local_part:
20512066
raise errors.HeaderParseError(
@@ -2055,16 +2070,16 @@ def get_obs_local_part(value):
20552070
len(obs_local_part) > 1 and
20562071
obs_local_part[1].token_type=='dot'):
20572072
obs_local_part.defects.append(errors.InvalidHeaderDefect(
2058-
"Invalid leading '.' in local part"))
2073+
"Invalid leading '.' in local-part"))
20592074
if (obs_local_part[-1].token_type == 'dot' or
20602075
obs_local_part[-1].token_type=='cfws' and
20612076
len(obs_local_part) > 1 and
20622077
obs_local_part[-2].token_type=='dot'):
20632078
obs_local_part.defects.append(errors.InvalidHeaderDefect(
2064-
"Invalid trailing '.' in local part"))
2079+
"Invalid trailing '.' in local-part"))
20652080
if obs_local_part.defects:
20662081
obs_local_part.token_type = 'invalid-obs-local-part'
2067-
return obs_local_part, value
2082+
return obs_local_part, start
20682083

20692084
def get_local_part(value):
20702085
""" local-part = dot-atom / quoted-string / obs-local-part

Lib/test/test_email/test__header_value_parser.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -168,17 +168,17 @@ def charset_defect(chars):
168168

169169
trailing_dot_in_local_part_defect = (
170170
errors.InvalidHeaderDefect,
171-
"invalid trailing '.' in local part",
171+
"invalid trailing '.' in local-part",
172172
)
173173

174174
leading_dot_in_local_part_defect = (
175175
errors.InvalidHeaderDefect,
176-
"invalid leading '.' in local part",
176+
"invalid leading '.' in local-part",
177177
)
178178

179179
repeated_dot_in_local_part_defect = (
180180
errors.InvalidHeaderDefect,
181-
"invalid repeated '.'",
181+
"invalid repeated '.' in local-part",
182182
)
183183

184184
misplaced_backslash_defect = (
@@ -3960,7 +3960,7 @@ def test_get_obs_local_part(self, s, *args, local_part=None, **kw):
39603960
# not what it does with non-obs syntax. Anything else is "don't care".
39613961
# The 'local_part' specs are checked by the get_local_part tests, since the
39623962
# token list returned by get_obs_local_part doesn't have that attribute.
3963-
params_test_get_obs_local_part = old_api_only(
3963+
params_test_get_obs_local_part = for_each_api(
39643964

39653965
simple_obsolete = C(
39663966
'Fred. A.Johnson@python.org',
@@ -4119,8 +4119,7 @@ def test_get_obs_local_part(self, s, *args, local_part=None, **kw):
41194119
missing_dot_in_local_part_defect,
41204120
ew_inside_quoted_string_defect,
41214121
],
4122-
# XXX XXX second index will change during refactor
4123-
ew_indexes=[0, 1],
4122+
ew_indexes=[0, 17],
41244123
),
41254124

41264125
less_invalid_ew_atoms = C(
@@ -4129,8 +4128,7 @@ def test_get_obs_local_part(self, s, *args, local_part=None, **kw):
41294128
value="foo . bar .bird",
41304129
local_part="foo . bar.bird",
41314130
comments=['test'],
4132-
# XXX XXX the indexes will change during refactor
4133-
ew_indexes=[0, 2, 20],
4131+
ew_indexes=[0, 20, 38],
41344132
),
41354133

41364134
)

0 commit comments

Comments
 (0)