From d8699306076973641219703ba9d4be2398442a99 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 4 Jun 2023 21:04:15 +0100 Subject: [PATCH 1/4] Fix list items losing nesting when following another list --- lib/markdown2.py | 3 ++- test/tm-cases/seperated_list_items.html | 12 ++++++++++++ test/tm-cases/seperated_list_items.text | 6 ++++++ 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 test/tm-cases/seperated_list_items.html create mode 100644 test/tm-cases/seperated_list_items.text diff --git a/lib/markdown2.py b/lib/markdown2.py index f5e5fbc8..e71f9210 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1887,7 +1887,8 @@ def _list_item_sub(self, match): item = match.group(4) leading_line = match.group(1) if leading_line or "\n\n" in item or self._last_li_endswith_two_eols: - item = self._run_block_gamut(self._outdent(item)) + item = self._uniform_outdent(item, min_outdent=' ', max_outdent=self.tab)[1] + item = self._run_block_gamut(item) else: # Recursion for sub-lists: item = self._do_lists(self._uniform_outdent(item, min_outdent=' ')[1]) diff --git a/test/tm-cases/seperated_list_items.html b/test/tm-cases/seperated_list_items.html new file mode 100644 index 00000000..140ad893 --- /dev/null +++ b/test/tm-cases/seperated_list_items.html @@ -0,0 +1,12 @@ + diff --git a/test/tm-cases/seperated_list_items.text b/test/tm-cases/seperated_list_items.text new file mode 100644 index 00000000..1a5c991a --- /dev/null +++ b/test/tm-cases/seperated_list_items.text @@ -0,0 +1,6 @@ +- Item 1 + ABCDEF + +- Item 2 + - Item 3 + - Item 4 From 6725ef01f707aeff023a4b6976aa7a741f109bfb Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 4 Jun 2023 21:14:30 +0100 Subject: [PATCH 2/4] Update `_uniform_indent` to allow more granular control of whitespace only lines. Also converted it and `_uniform_outdent` to `staticmethod`s and added docstrings --- lib/markdown2.py | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index e71f9210..79501bae 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -2196,7 +2196,7 @@ def _wavedrom_block_sub(self, match): return self._uniform_indent( '\n%s%s%s\n' % (open_tag, self._escape_table[waves], close_tag), - lead_indent, include_empty_lines=True + lead_indent, indent_empty_lines=True ) def _do_wavedrom_blocks(self, text): @@ -2607,13 +2607,16 @@ def _outdent(self, text): # Remove one level of line-leading tabs or spaces return self._outdent_re.sub('', text) - def _uniform_outdent(self, text, min_outdent=None, max_outdent=None): - # Removes the smallest common leading indentation from each (non empty) - # line of `text` and returns said indent along with the outdented text. - # The `min_outdent` kwarg makes sure the smallest common whitespace - # must be at least this size - # The `max_outdent` sets the maximum amount a line can be - # outdented by + @staticmethod + def _uniform_outdent(text, min_outdent=None, max_outdent=None): + ''' + Removes the smallest common leading indentation from each (non empty) + line of `text` and returns said indent along with the outdented text. + + Args: + min_outdent: make sure the smallest common whitespace is at least this size + max_outdent: the maximum amount a line can be outdented by + ''' # find the leading whitespace for every line whitespace = [ @@ -2647,11 +2650,26 @@ def _uniform_outdent(self, text, min_outdent=None, max_outdent=None): return outdent, ''.join(outdented) - def _uniform_indent(self, text, indent, include_empty_lines=False): - return ''.join( - (indent + line if line.strip() or include_empty_lines else '') - for line in text.splitlines(True) - ) + @staticmethod + def _uniform_indent(text, indent, include_empty_lines=False, indent_empty_lines=False): + ''' + Uniformly indent a block of text by a fixed amount + + Args: + text: the text to indent + indent: a string containing the indent to apply + include_empty_lines: don't remove whitespace only lines + indent_empty_lines: indent whitespace only lines with the rest of the text + ''' + blocks = [] + for line in text.splitlines(True): + if line.strip() or indent_empty_lines: + blocks.append(indent + line) + elif include_empty_lines: + blocks.append(line) + else: + blocks.append('') + return ''.join(blocks) @staticmethod def _match_overlaps_substr(text, match, substr): From 700ac816b8d7fca9ae1a67b25ed85450dcaae43c Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 4 Jun 2023 21:27:08 +0100 Subject: [PATCH 3/4] Fix `markdown-in-html` extra not working within lists --- lib/markdown2.py | 34 +++++++++++++++--- test/tm-cases/markdown_in_html_in_lists.html | 37 ++++++++++++++++++++ test/tm-cases/markdown_in_html_in_lists.opts | 1 + test/tm-cases/markdown_in_html_in_lists.text | 17 +++++++++ 4 files changed, 84 insertions(+), 5 deletions(-) create mode 100644 test/tm-cases/markdown_in_html_in_lists.html create mode 100644 test/tm-cases/markdown_in_html_in_lists.opts create mode 100644 test/tm-cases/markdown_in_html_in_lists.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 79501bae..8d4469e5 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -363,6 +363,9 @@ def convert(self, text): # Turn block-level HTML blocks into hash entries text = self._hash_html_blocks(text, raw=True) + if 'markdown-in-html' in self.extras: + text = self._do_markdown_in_html(text) + if "fenced-code-blocks" in self.extras and self.safe_mode: text = self._do_fenced_code_blocks(text) @@ -878,27 +881,39 @@ def _hash_html_blocks(self, text, raw=False): return text - def _strict_tag_block_sub(self, text, html_tags_re, callback): + def _strict_tag_block_sub(self, text, html_tags_re, callback, allow_indent=False): + ''' + Finds and substitutes HTML blocks within blocks of text + + Args: + text: the text to search + html_tags_re: a regex pattern of HTML block tags to match against. + For example, `Markdown._block_tags_a` + callback: callback function that receives the found HTML text block + allow_indent: allow matching HTML blocks that are not completely outdented + ''' tag_count = 0 current_tag = html_tags_re block = '' result = '' for chunk in text.splitlines(True): - is_markup = re.match(r'^(?:(?=))?(?)' % current_tag, chunk) + is_markup = re.match( + r'^(\s{0,%s})(?:(?=))?(?)' % ('' if allow_indent else '0', current_tag), chunk + ) block += chunk if is_markup: - if chunk.startswith('' % tag_name, text)) == len(re.findall('' % tag_name, text)) + def _do_markdown_in_html(self, text): + def callback(block): + indent, block = self._uniform_outdent(block) + block = self._hash_html_block_sub(block) + block = self._uniform_indent(block, indent, include_empty_lines=True, indent_empty_lines=False) + return block + + return self._strict_tag_block_sub(text, self._block_tags_a, callback, True) + def _strip_link_definitions(self, text): # Strips link definitions from text, stores the URLs and titles in # hash references. diff --git a/test/tm-cases/markdown_in_html_in_lists.html b/test/tm-cases/markdown_in_html_in_lists.html new file mode 100644 index 00000000..981113f9 --- /dev/null +++ b/test/tm-cases/markdown_in_html_in_lists.html @@ -0,0 +1,37 @@ +
    +
  • Item 1

    + +
    + +
    Block one
    + +

    Some text

    + +
  • +
  • Item 2

    + +
      +
    • Item 3

      + +
        +
      • Item 4

        + +
        + +
        Block two
        + +

        Some text

        + +
      • +
    • +
    • Item 5

      + +
      + +
      Block three
      + +

      Some text

      + +
    • +
  • +
diff --git a/test/tm-cases/markdown_in_html_in_lists.opts b/test/tm-cases/markdown_in_html_in_lists.opts new file mode 100644 index 00000000..25fea79f --- /dev/null +++ b/test/tm-cases/markdown_in_html_in_lists.opts @@ -0,0 +1 @@ +{"extras": ["markdown-in-html"]} diff --git a/test/tm-cases/markdown_in_html_in_lists.text b/test/tm-cases/markdown_in_html_in_lists.text new file mode 100644 index 00000000..e629c55d --- /dev/null +++ b/test/tm-cases/markdown_in_html_in_lists.text @@ -0,0 +1,17 @@ +- Item 1 +
+ ###### Block one + Some text +
+- Item 2 + - Item 3 + - Item 4 +
+ ###### Block two + Some text +
+ - Item 5 +
+ ###### Block three + Some text +
From da54c21aaa105ce8c66952eb005c4d4070d059fa Mon Sep 17 00:00:00 2001 From: Crozzers Date: Mon, 5 Jun 2023 18:37:15 +0100 Subject: [PATCH 4/4] Expand nested_list test case for recent bug fix --- test/tm-cases/nested_list.html | 15 +++++++++++++++ test/tm-cases/nested_list.text | 12 +++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/test/tm-cases/nested_list.html b/test/tm-cases/nested_list.html index 05851236..49624d24 100644 --- a/test/tm-cases/nested_list.html +++ b/test/tm-cases/nested_list.html @@ -34,3 +34,18 @@
  • Item 3 - yes! just a single item
  • + +

    Other more different nested list:

    + +
      +
    • Item 1 +With some space after

    • +
    • Item 2

      + +
        +
      • Item 3 +
          +
        • Item 4
        • +
      • +
    • +
    diff --git a/test/tm-cases/nested_list.text b/test/tm-cases/nested_list.text index 94a2ece1..14fb9291 100644 --- a/test/tm-cases/nested_list.text +++ b/test/tm-cases/nested_list.text @@ -20,4 +20,14 @@ Slightly more nested list: + What + The + Code -* Item 3 - yes! just a single item \ No newline at end of file +* Item 3 - yes! just a single item + + +Other more different nested list: + +- Item 1 + With some space after + +- Item 2 + - Item 3 + - Item 4