From 7bce2f4e96da43e71fb1fc89f1cc5645ed32fce2 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 8 Aug 2013 21:27:23 -0400 Subject: [PATCH] Parse *, #; add another test. --- mwparserfromhell/parser/tokenizer.py | 18 ++++++++++++++++-- tests/tokenizer/tags_wikimarkup.mwtest | 7 +++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 7754a6b..ef45ee9 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -629,6 +629,18 @@ class Tokenizer(object): else: self._emit_all(tag) + def _parse_list(self): + """Parse a wiki-style list (``#``, ``*``, ``;``, ``:``).""" + def emit(): + self._emit(tokens.TagOpenOpen(wiki_markup=self._read())) + self._emit_text("li") + self._emit(tokens.TagCloseSelfclose()) + + emit() + while self._read(1) in ("#", "*"): + self._head += 1 + emit() + def _parse_hr(self): """Parse a wiki-style horizontal rule (``----``) at the string head.""" length = 4 @@ -793,8 +805,10 @@ class Tokenizer(object): self._emit_text("<") elif this == ">" and self._context & contexts.TAG_CLOSE: return self._handle_tag_close_close() - elif this == next == "-" and self._read(-1) in ("\n", self.START): - if self._read(2) == self._read(3) == "-": + elif self._read(-1) in ("\n", self.START): + if this in ("#", "*"): + self._parse_list() + elif this == next == self._read(2) == self._read(3) == "-": self._parse_hr() else: self._emit_text("-") diff --git a/tests/tokenizer/tags_wikimarkup.mwtest b/tests/tokenizer/tags_wikimarkup.mwtest index e1891f5..9ce71b6 100644 --- a/tests/tokenizer/tags_wikimarkup.mwtest +++ b/tests/tokenizer/tags_wikimarkup.mwtest @@ -370,6 +370,13 @@ output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Tag --- +name: ul_ol_dt_dd_mix +label: an assortment of uls, ols, dds, and dts +input: ";:#*foo\n:#*;foo\n#*;:foo\n*;:#foo" +output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="foo\n"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="foo\n"), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="foo\n"), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="foo")] + +--- + name: hr_text_before label: text before an otherwise-valid hr input: "foo----"