From 5ed6ce19e8ce164dfaec90225127cc2eed2195c7 Mon Sep 17 00:00:00 2001 From: nick evans Date: Tue, 10 Mar 2026 10:29:13 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Validate=20RawData=20and=20wait?= =?UTF-8?q?=20to=20continue=20literals?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This parses a RawData string into an array of `text`, `literal`, and `literal8` parts. This fixes embedded literals so they correctly wait for server continuation request before sending. Non-synchronizing literals are also parsed correctly. This adds `Net::IMAP::RawText` which sends verbatim (like `RawData` did previously), and handles `text` validations: * `text` can't contain CR, LF, or NULL * `text` must be ASCII compatible or valid UTF-8 The existing `Literal` and `Literal8` classes handle literal validation: * `literal` can't contain NULL byte, but `literal8` can Additionally, `RawData` validates that: * embedded literal bytesize must be <= remaining string bytesize * final `text` cannot end with `{number}` (in case a `CRLF` comes after) This does _not_ make RawData arguments safe from every type of injection attack. However, without losing any significant flexibility, this _does_ prevent unescaped `CRLF` from creating a _command_ injection. --- lib/net/imap/command_data.rb | 80 ++++++++++- test/net/imap/test_command_data.rb | 205 +++++++++++++++++++++++++++++ test/net/imap/test_imap.rb | 29 ++++ 3 files changed, 312 insertions(+), 2 deletions(-) diff --git a/lib/net/imap/command_data.rb b/lib/net/imap/command_data.rb index 33185d4c..76dc1d30 100644 --- a/lib/net/imap/command_data.rb +++ b/lib/net/imap/command_data.rb @@ -154,9 +154,85 @@ def validate end end + # Represents IMAP +text+ data, which may contain any 7-bit ASCII character, + # except for +NULL+, +CR+, or +LF+. +text+ is extended to allow any + # multibyte +UTF-8+ character when either +UTF8=ACCEPT+ or +IMAP4rev2+ have + # been enabled, or when the server supports only +IMAP4rev2+ and not earlier + # IMAP revisions, or when the server advertises +UTF8=ONLY+. + # + # NOTE: The current implementation does not validate whether the connection + # currently supports UTF-8. Future versions may change. + # + # The string's bytes must be valid ASCII or valid UTF-8. The string's + # reported encoding is ignored, but the string is _not_ transcoded. + class RawText < CommandData # :nodoc: + def initialize(data:) + data = String(data.to_str) + data = if data.encoding in Encoding::ASCII | Encoding::UTF_8 + -data + elsif data.ascii_only? + -(data.dup.force_encoding("ASCII")) + else + -(data.dup.force_encoding("UTF-8")) + end + super + validate + end + + def validate + if data.include?("\0") + raise DataFormatError, "NULL byte must be binary literal encoded" + elsif !data.valid_encoding? + raise DataFormatError, "invalid UTF-8 must be literal encoded" + elsif /[\r\n]/.match?(data) + raise DataFormatError, "CR and LF bytes must be literal encoded" + end + end + + def ascii_only? = data.ascii_only? + + def send_data(imap, tag) = imap.__send__(:put_string, data) + end + class RawData < CommandData # :nodoc: - def send_data(imap, tag) - imap.__send__(:put_string, data) + def initialize(data:) + data = split_parts(data) + super + validate + end + + def send_data(imap, tag) = data.each do _1.send_data(imap, tag) end + + def validate + return unless data.last in RawText(data: text) + if text.rindex(/~?\{[1-9]\d*\+?\}\z/n) + raise DataFormatError, "RawData cannot end with literal continuation" + end + end + + private + + def split_parts(data) + data = data.b # dups and ensures BINARY encoding + parts = [] + while data.match(/(~)?\{(0|[1-9]\d*)(\+)?\}\r\n/n) + text, binary, bytesize, non_sync, data = $`, !!$1, $2, !!$3, $' + bytesize = NumValidator.coerce_number64 bytesize + parts << RawText[text] unless text.empty? + parts << extract_literal(data, binary:, bytesize:, non_sync:) + data.bytesplice(0, bytesize, "") + end + parts << RawText[data] unless data.empty? + parts + end + + def extract_literal(data, binary:, bytesize:, non_sync:) + if data.bytesize < bytesize + raise DataFormatError, "Too few bytes in string for literal, " \ + "expected: %s, remaining: %s" % [bytesize, data.bytesize] + end + literal = data.byteslice(0, bytesize) + (binary ? Literal8 : Literal).new(data: literal, non_sync:) end end diff --git a/test/net/imap/test_command_data.rb b/test/net/imap/test_command_data.rb index ac0047f9..66cf592b 100644 --- a/test/net/imap/test_command_data.rb +++ b/test/net/imap/test_command_data.rb @@ -10,6 +10,8 @@ class CommandDataTest < Net::IMAP::TestCase Flag = Net::IMAP::Flag Literal = Net::IMAP::Literal Literal8 = Net::IMAP::Literal8 + RawText = Net::IMAP::RawText + RawData = Net::IMAP::RawData Output = Data.define(:name, :args, :kwargs) TAG = Module.new.freeze @@ -162,4 +164,207 @@ class StringFormatterTest < Net::IMAP::TestCase end end + class RawTextTest < CommandDataTest + test "basic ASCII string" do + imap.send_data RawText.new('foo "bar" (baz)') + assert_equal [Output.put_string('foo "bar" (baz)')], imap.output + end + + test "allows IMAP atom-special symbols" do + imap.send_data RawText.new('foo "bar" (baz)') + imap.send_data RawText.new("(){}[]%*\"\\") + imap.send_data RawText.new("(((((((((((((((( unbalanced ]]]]]]]]]]]]]") + assert_equal [ + Output.put_string('foo "bar" (baz)'), + Output.put_string("(){}[]%*\"\\"), + Output.put_string("(((((((((((((((( unbalanced ]]]]]]]]]]]]]"), + ], imap.output + end + + test "ASCII compatible string with another encodings" do + imap.send_data RawText.new("foo bar".encode("cp1252")) + assert_equal [ + Output.put_string("foo bar"), + ], imap.output + end + + test "allows ASCII control chars" do + text = RawText.new("beep\b beep\b escape!\e delete this:\x1f") + imap.send_data text + assert_equal [ + Output.put_string("beep\b beep\b escape!\e delete this:\x1f"), + ], imap.output + end + + data( + "NULL" => ["with \0 NULL", /NULL\b.+\bbyte/i], + "CR" => ["with \r CR", /CR\b.+\bbyte/i], + "LF" => ["with \n LF", /LF\b.+\bbyte/i], + ) + test "invalid ASCII byte" do |(text, error_message)| + try_multiple_encodings(error_message, text) + end + + # See Table 3-7, Well-Formed UTF-8 Byte Sequences, in The Unicode Standard: + # https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-3/#G27506 + data( + "incomplete 2 byte sequence" => "\xc3".b, + "invalid 2 byte sequence" => "\xc3\x7f".b, + "incomplete 3 byte sequence" => "\xe0\x80\x80".b, + "invalid 3 byte sequence" => "\xe0\x80\x80".b, + "incomplete 4 byte sequence" => "\xf1\x80\x80".b, + "invalid 4 byte sequence" => "\xf0\x80\x80\x80".b, + "first byte too high" => "\xff\xaa\xaa\xaa".b, + "UTF-16 surrogate pair" => "\xFE\xFF\xD8\x3D\xDC\xA3\xFE\x0F".b, + "windows-1252" => "åêïõü".encode("windows-1252"), + ) + test "invalid UTF-8" do |text| + try_multiple_encodings(/invalid UTF-8/i, text) + end + + def with_multiple_encodings(data) + yield data.b # BINARY + yield data.dup.force_encoding("ASCII") + yield data.dup.force_encoding("UTF-8") + yield data.dup.force_encoding("cp1252") + end + + def try_multiple_encodings(error_message, data) + with_multiple_encodings(data) do |encoded| + assert_raise_with_message(DataFormatError, error_message) do + RawText[encoded] + end + end + end + end + + class RawDataTest < CommandDataTest + test "simple raw text" do + raw = RawData.new('foo "bar" baz') + assert_equal [RawText['foo "bar" baz']], raw.data + imap.send_data raw + assert_equal [Output.put_string('foo "bar" baz')], imap.output + end + + test "a single literal" do + raw = RawData.new("{7}\r\nfoo bar") + assert_equal [Literal["foo bar", false]], raw.data + imap.send_data raw, tag: "t1" + assert_equal [ + Output.send_literal("foo bar", "t1", non_sync: false), + ], imap.output + end + + test "literals embedded between text" do + raw = RawData.new("foo bar {3}\r\nbaz {4+}\r\nquux etc") + assert_equal [ + RawText["foo bar "], + Literal["baz", false], + RawText[" "], + Literal["quux", true], # non-synchronizing + RawText[" etc"], + ], raw.data + imap.send_data raw, tag: "t2" + assert_equal [ + Output.put_string("foo bar "), + Output.send_literal("baz", "t2", non_sync: false), + Output.put_string(" "), + Output.send_literal("quux", "t2", non_sync: true), + Output.put_string(" etc"), + ], imap.output + end + + test "empty literals" do + raw = RawData.new("{0}\r\n{0+}\r\n~{0}\r\n~{0+}\r\n") + assert_equal [ + Literal["", false], + Literal["", true], + Literal8["", false], + Literal8["", true], + ], raw.data + imap.send_data raw, tag: "t2.2" + assert_equal [ + Output.send_literal("", "t2.2", non_sync: false), + Output.send_literal("", "t2.2", non_sync: true), + Output.send_binary_literal("", "t2.2", non_sync: false), + Output.send_binary_literal("", "t2.2", non_sync: true), + ], imap.output + end + + test "raw text embedded between literals" do + raw = RawData.new("{3}\r\nfoo bar") + assert_equal [ + Literal["foo", false], + RawText[" bar"] + ], raw.data + imap.send_data raw, tag: "t3" + assert_equal [ + Output.send_literal("foo", "t3", non_sync: false), + Output.put_string(" bar"), + ], imap.output + end + + test "raw text followed by literal" do + raw = RawData.new("foo {3}\r\nbar") + assert_equal [ + RawText["foo "], + Literal["bar", false], + ], raw.data + imap.send_data raw, tag: "t4" + assert_equal [ + Output.put_string("foo "), + Output.send_literal("bar", "t4", non_sync: false), + ], imap.output + imap.clear + end + + test "binary literal with regular literal" do + raw = RawData.new("foo ~{7}\r\n\0bar\r\nbaz {4}\r\nquux") + assert_equal [ + RawText["foo "], + Literal8["\0bar\r\nb", false], + RawText["az "], + Literal["quux", false], + ], raw.data + imap.send_data raw, tag: "t5" + assert_equal [ + Output.put_string("foo "), + Output.send_binary_literal("\0bar\r\nb", "t5", non_sync: false), + Output.put_string("az "), + Output.send_literal("quux", "t5", non_sync: false), + ], imap.output + end + + data( + "CR" => "with \r byte", + "LF" => "with \n byte", + "NULL" => "with \0 byte", + "CRLF" => "with \r\n bytes", + ) + test "invalid bytes in raw text" do |data| + assert_raise_with_message(DataFormatError, /must be.* literal encoded/i) do + RawData.new(data:) + end + end + + test "invalid literal" do |data| + assert_raise_with_message(DataFormatError, /too few bytes/i) do + RawData.new(data: "invalid literal {123}\r\ntoo small") + end + + assert_raise_with_message(DataFormatError, /NULL byte.*in.*literal/i) do + RawData.new(data: "invalid literal {10}\r\ncontains \0 null") + end + end + + test "invalid literal ending ('{123}')" do + assert_raise(DataFormatError) do RawData.new(data: "literal {123}") end + assert_raise(DataFormatError) do RawData.new(data: "literal+ {123+}") end + assert_raise(DataFormatError) do RawData.new(data: "~literal ~{123}") end + assert_raise(DataFormatError) do RawData.new(data: "~literal+ ~{123+}") end + raw = RawData.new(data: " {123} ") + assert_equal [RawText[" {123} "]], raw.data + end + end + end diff --git a/test/net/imap/test_imap.rb b/test/net/imap/test_imap.rb index d2aa9bb3..e82d08cc 100644 --- a/test/net/imap/test_imap.rb +++ b/test/net/imap/test_imap.rb @@ -650,6 +650,35 @@ def test_send_symbol_as_flag end end + def test_raw_data + with_fake_server do |server, imap| + server.on "TEST", &:done_ok + + imap.__send__(:send_command, "TEST", Net::IMAP::RawData.new("foo bar")) + assert_equal "foo bar", server.commands.pop.args + + imap.__send__(:send_command, "TEST", + Net::IMAP::RawData.new("{3}\r\nfoo"), + Net::IMAP::RawData.new("~{4}\r\n\0bar")) + assert_equal "{3}\r\nfoo ~{4}\r\n\0bar", server.commands.pop.args + + # RawData must pass basic validation before sending command + [ + "with \0 NULL", + "with \r CR", + "with \n LF", + "with \r\n CRLF", + "{1234}\r\nliteral is too small", + "{1}\r\n\0 literal contains NULL", + ].each do |data| + assert_raise(Net::IMAP::DataFormatError) do + imap.__send__(:send_command, "TEST", Net::IMAP::RawData[data:]) + end + assert_empty server.commands + end + end + end + test("send PartialRange args") do with_fake_server do |server, imap| server.on "TEST", &:done_ok