From a365aed25096c7fa6b8c3bd5bb2fd638f09bafd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20H=C3=BCbelbauer?= Date: Tue, 14 Nov 2017 19:25:51 +0100 Subject: [PATCH 1/3] Implement inline-toc extra for TOC HTML insertion after first heading Fixes #279 Instead of adding a new CLI switch and trying to figure out whether CLI vs module usage is distinguishable, I have decided to create a sister extra for toc and make if behave the same way, but insert the TOC HTML to the MarkDown HTML (after the first heading) as well as preserving it as a property. This allows CLI usages the flexibility to just replace `toc` with `inline-toc` and get a table of contents right after the first heading without any worries about backwards compatibility or pollution the CLI switches. I believe this to be the cleanest solution. --- lib/markdown2.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 84d7d6c2..715536aa 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -241,7 +241,7 @@ def __init__(self, html4tags=False, tab_width=4, safe_mode=None, extras = dict([(e, None) for e in extras]) self.extras.update(extras) assert isinstance(self.extras, dict) - if "toc" in self.extras and "header-ids" not in self.extras: + if ("toc" in self.extras or "inline-toc" in self.extras) and "header-ids" not in self.extras: self.extras["header-ids"] = None # "toc" implies "header-ids" self._instance_extras = self.extras.copy() @@ -391,8 +391,20 @@ def convert(self, text): text += "\n" rv = UnicodeWithAttrs(text) - if "toc" in self.extras: + if ("toc" in self.extras or "inline-toc" in self.extras): + # Generate TOC HTML as a property to be able to hijack it in "inline-toc" for subsctitution + # TODO (Tomas Hubelbauer): See about using that as a static method without a need for a throwaway instance. rv._toc = self._toc + if "inline-toc" in self.extras: + if self._toc[0] is None: + rv = UnicodeWithAttrs(rv.toc_html + text) + else: + (level, id, name) = self._toc[0] + # Need to use a regex and rely on the HTML structure, tracking the regex's `end()` across all the HTML transformations would be extremely unreliable + # TODO (Tomas Hubelbauer): Consider looser regex which allows for more attributes in order to to find heading even when more extras add attributes to it (future-proof) + pattern = r"\{}<\/h{}\>".format(level, id, re.escape(name), level) + text = re.sub(pattern, "\g<0>" + rv.toc_html, text) + rv = UnicodeWithAttrs(text) if "metadata" in self.extras: rv.metadata = self.metadata return rv @@ -1540,7 +1552,7 @@ def _h_sub(self, match): if header_id: header_id_attr = ' id="%s"' % header_id html = self._run_span_gamut(header_group) - if "toc" in self.extras and header_id: + if ("toc" in self.extras or "inline-toc" in self.extras) and header_id: self._toc_add_entry(n, header_id, html) return "%s\n\n" % (n, header_id_attr, html, n) From 64cd722439dbad76a2a28c7275430bdd94f2782c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20H=C3=BCbelbauer?= Date: Thu, 16 Nov 2017 16:36:47 +0100 Subject: [PATCH 2/3] Change toc_html to non-instance method and simplify TOC HTML inclusion as a result --- lib/markdown2.py | 87 +++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 45 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 715536aa..8f73380a 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -390,21 +390,17 @@ def convert(self, text): text += "\n" + # Insert TOC HTML into the MD HTML after the first heading element if any headings are present. + if ("inline-toc" in self.extras and self._toc[0] is not None): + (level, id, name) = self._toc[0] + # Use a regex and rely on the HTML structure as opposed to tracking the heading regex's `end()` across all the HTML transformations (unreliable) + # TODO (Tomas Hubelbauer): Consider looser regex which allows for more attributes in order to to find heading even when more extras add attributes to it (future-proof) + pattern = r"\{}<\/h{}\>".format(level, id, re.escape(name), level) + text = re.sub(pattern, "\g<0>\n" + toc_html(self._toc), text) + rv = UnicodeWithAttrs(text) if ("toc" in self.extras or "inline-toc" in self.extras): - # Generate TOC HTML as a property to be able to hijack it in "inline-toc" for subsctitution - # TODO (Tomas Hubelbauer): See about using that as a static method without a need for a throwaway instance. rv._toc = self._toc - if "inline-toc" in self.extras: - if self._toc[0] is None: - rv = UnicodeWithAttrs(rv.toc_html + text) - else: - (level, id, name) = self._toc[0] - # Need to use a regex and rely on the HTML structure, tracking the regex's `end()` across all the HTML transformations would be extremely unreliable - # TODO (Tomas Hubelbauer): Consider looser regex which allows for more attributes in order to to find heading even when more extras add attributes to it (future-proof) - pattern = r"\{}<\/h{}\>".format(level, id, re.escape(name), level) - text = re.sub(pattern, "\g<0>" + rv.toc_html, text) - rv = UnicodeWithAttrs(text) if "metadata" in self.extras: rv.metadata = self.metadata return rv @@ -2234,6 +2230,39 @@ class MarkdownWithExtras(Markdown): # ---- internal support functions +def toc_html(toc): + """Return the HTML for the current TOC. + + This expects the `_toc` attribute to have been set on this instance. + """ + if toc is None: + return None + + def indent(): + return ' ' * (len(h_stack) - 1) + lines = [] + h_stack = [0] # stack of header-level numbers + for level, id, name in toc: + if level > h_stack[-1]: + lines.append("%s
    " % indent()) + h_stack.append(level) + elif level == h_stack[-1]: + lines[-1] += "" + else: + while level < h_stack[-1]: + h_stack.pop() + if not lines[-1].endswith(""): + lines[-1] += "" + lines.append("%s
" % indent()) + lines.append('%s
  • %s' % ( + indent(), id, name)) + while len(h_stack) > 1: + h_stack.pop() + if not lines[-1].endswith("
  • "): + lines[-1] += "" + lines.append("%s" % indent()) + return '\n'.join(lines) + '\n' + class UnicodeWithAttrs(unicode): """A subclass of unicode used for the return value of conversion to possibly attach some attributes. E.g. the "toc_html" attribute when @@ -2241,39 +2270,7 @@ class UnicodeWithAttrs(unicode): """ metadata = None _toc = None - def toc_html(self): - """Return the HTML for the current TOC. - - This expects the `_toc` attribute to have been set on this instance. - """ - if self._toc is None: - return None - - def indent(): - return ' ' * (len(h_stack) - 1) - lines = [] - h_stack = [0] # stack of header-level numbers - for level, id, name in self._toc: - if level > h_stack[-1]: - lines.append("%s
      " % indent()) - h_stack.append(level) - elif level == h_stack[-1]: - lines[-1] += "" - else: - while level < h_stack[-1]: - h_stack.pop() - if not lines[-1].endswith(""): - lines[-1] += "" - lines.append("%s
    " % indent()) - lines.append('%s
  • %s' % ( - indent(), id, name)) - while len(h_stack) > 1: - h_stack.pop() - if not lines[-1].endswith("
  • "): - lines[-1] += "" - lines.append("%s" % indent()) - return '\n'.join(lines) + '\n' - toc_html = property(toc_html) + toc_html = property(toc_html(_toc)) ## {{{ http://code.activestate.com/recipes/577257/ (r1) _slugify_strip_re = re.compile(r'[^\w\s-]') From c707ba2ceb99d46bc13636d6025cc52981592d63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20H=C3=BCbelbauer?= Date: Thu, 16 Nov 2017 17:01:49 +0100 Subject: [PATCH 3/3] Fix calculating toc_html on UnicodeWithAttrs unpythonically `toc_html = property(calculate_toc_html(self._toc))` doesn't seem to work because `self` is not valid in that context so I had to hack around it with the useless proxy method. --- lib/markdown2.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 8f73380a..0e37be6e 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -396,7 +396,7 @@ def convert(self, text): # Use a regex and rely on the HTML structure as opposed to tracking the heading regex's `end()` across all the HTML transformations (unreliable) # TODO (Tomas Hubelbauer): Consider looser regex which allows for more attributes in order to to find heading even when more extras add attributes to it (future-proof) pattern = r"\{}<\/h{}\>".format(level, id, re.escape(name), level) - text = re.sub(pattern, "\g<0>\n" + toc_html(self._toc), text) + text = re.sub(pattern, "\g<0>\n" + calculate_toc_html(self._toc), text) rv = UnicodeWithAttrs(text) if ("toc" in self.extras or "inline-toc" in self.extras): @@ -2230,7 +2230,7 @@ class MarkdownWithExtras(Markdown): # ---- internal support functions -def toc_html(toc): +def calculate_toc_html(toc): """Return the HTML for the current TOC. This expects the `_toc` attribute to have been set on this instance. @@ -2270,7 +2270,9 @@ class UnicodeWithAttrs(unicode): """ metadata = None _toc = None - toc_html = property(toc_html(_toc)) + def toc_html(self): + return calculate_toc_html(self._toc) + toc_html = property(toc_html) ## {{{ http://code.activestate.com/recipes/577257/ (r1) _slugify_strip_re = re.compile(r'[^\w\s-]')