diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 5bb7059..515a7a2 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -24,8 +24,7 @@ from __future__ import unicode_literals from math import log import re -from . import contexts -from . import tokens +from . import contexts, tokens from ..compat import htmlentities from ..tag_defs import is_parsable @@ -136,33 +135,33 @@ class Tokenizer(object): self._pop() raise BadRoute() - def _write(self, token): + def _emit(self, token): """Write a token to the end of the current token stack.""" self._push_textbuffer() self._stack.append(token) - def _write_first(self, token): + def _emit_first(self, token): """Write a token to the beginning of the current token stack.""" self._push_textbuffer() self._stack.insert(0, token) - def _write_text(self, text): + def _emit_text(self, text): """Write text to the current textbuffer.""" self._textbuffer.append(text) - def _write_all(self, tokenlist): + def _emit_all(self, tokenlist): """Write a series of tokens to the current stack at once.""" if tokenlist and isinstance(tokenlist[0], tokens.Text): - self._write_text(tokenlist.pop(0).text) + self._emit_text(tokenlist.pop(0).text) self._push_textbuffer() self._stack.extend(tokenlist) - def _write_text_then_stack(self, text): + def _emit_text_then_stack(self, text): """Pop the current stack, write *text*, and then write the stack.""" stack = self._pop() - self._write_text(text) + self._emit_text(text) if stack: - self._write_all(stack) + self._emit_all(stack) self._head -= 1 def _read(self, delta=0, wrap=False, strict=False): @@ -198,12 +197,12 @@ class Tokenizer(object): while braces: if braces == 1: - return self._write_text_then_stack("{") + return self._emit_text_then_stack("{") if braces == 2: try: self._parse_template() except BadRoute: - return self._write_text_then_stack("{{") + return self._emit_text_then_stack("{{") break try: self._parse_argument() @@ -213,11 +212,11 @@ class Tokenizer(object): self._parse_template() braces -= 2 except BadRoute: - return self._write_text_then_stack("{" * braces) + return self._emit_text_then_stack("{" * braces) if braces: self._head += 1 - self._write_all(self._pop()) + self._emit_all(self._pop()) if self._context & contexts.FAIL_NEXT: self._context ^= contexts.FAIL_NEXT @@ -229,9 +228,9 @@ class Tokenizer(object): except BadRoute: self._head = reset raise - self._write_first(tokens.TemplateOpen()) - self._write_all(template) - self._write(tokens.TemplateClose()) + self._emit_first(tokens.TemplateOpen()) + self._emit_all(template) + self._emit(tokens.TemplateClose()) def _parse_argument(self): """Parse an argument at the head of the wikicode string.""" @@ -241,9 +240,9 @@ class Tokenizer(object): except BadRoute: self._head = reset raise - self._write_first(tokens.ArgumentOpen()) - self._write_all(argument) - self._write(tokens.ArgumentClose()) + self._emit_first(tokens.ArgumentOpen()) + self._emit_all(argument) + self._emit(tokens.ArgumentClose()) def _handle_template_param(self): """Handle a template parameter at the head of the string.""" @@ -252,22 +251,22 @@ class Tokenizer(object): elif self._context & contexts.TEMPLATE_PARAM_VALUE: self._context ^= contexts.TEMPLATE_PARAM_VALUE elif self._context & contexts.TEMPLATE_PARAM_KEY: - self._write_all(self._pop(keep_context=True)) + self._emit_all(self._pop(keep_context=True)) self._context |= contexts.TEMPLATE_PARAM_KEY - self._write(tokens.TemplateParamSeparator()) + self._emit(tokens.TemplateParamSeparator()) self._push(self._context) def _handle_template_param_value(self): """Handle a template parameter's value at the head of the string.""" - self._write_all(self._pop(keep_context=True)) + self._emit_all(self._pop(keep_context=True)) self._context ^= contexts.TEMPLATE_PARAM_KEY self._context |= contexts.TEMPLATE_PARAM_VALUE - self._write(tokens.TemplateParamEquals()) + self._emit(tokens.TemplateParamEquals()) def _handle_template_end(self): """Handle the end of a template at the head of the string.""" if self._context & contexts.TEMPLATE_PARAM_KEY: - self._write_all(self._pop(keep_context=True)) + self._emit_all(self._pop(keep_context=True)) self._head += 1 return self._pop() @@ -275,7 +274,7 @@ class Tokenizer(object): """Handle the separator between an argument's name and default.""" self._context ^= contexts.ARGUMENT_NAME self._context |= contexts.ARGUMENT_DEFAULT - self._write(tokens.ArgumentSeparator()) + self._emit(tokens.ArgumentSeparator()) def _handle_argument_end(self): """Handle the end of an argument at the head of the string.""" @@ -290,19 +289,19 @@ class Tokenizer(object): wikilink = self._parse(contexts.WIKILINK_TITLE) except BadRoute: self._head = reset - self._write_text("[[") + self._emit_text("[[") else: if self._context & contexts.FAIL_NEXT: self._context ^= contexts.FAIL_NEXT - self._write(tokens.WikilinkOpen()) - self._write_all(wikilink) - self._write(tokens.WikilinkClose()) + self._emit(tokens.WikilinkOpen()) + self._emit_all(wikilink) + self._emit(tokens.WikilinkClose()) def _handle_wikilink_separator(self): """Handle the separator between a wikilink's title and its text.""" self._context ^= contexts.WIKILINK_TITLE self._context |= contexts.WIKILINK_TEXT - self._write(tokens.WikilinkSeparator()) + self._emit(tokens.WikilinkSeparator()) def _handle_wikilink_end(self): """Handle the end of a wikilink at the head of the string.""" @@ -324,13 +323,13 @@ class Tokenizer(object): title, level = self._parse(context) except BadRoute: self._head = reset + best - 1 - self._write_text("=" * best) + self._emit_text("=" * best) else: - self._write(tokens.HeadingStart(level=level)) + self._emit(tokens.HeadingStart(level=level)) if level < best: - self._write_text("=" * (best - level)) - self._write_all(title) - self._write(tokens.HeadingEnd()) + self._emit_text("=" * (best - level)) + self._emit_all(title) + self._emit(tokens.HeadingEnd()) finally: self._global ^= contexts.GL_HEADING @@ -349,28 +348,28 @@ class Tokenizer(object): after, after_level = self._parse(self._context) except BadRoute: if level < best: - self._write_text("=" * (best - level)) + self._emit_text("=" * (best - level)) self._head = reset + best - 1 return self._pop(), level else: # Found another closure - self._write_text("=" * best) - self._write_all(after) + self._emit_text("=" * best) + self._emit_all(after) return self._pop(), after_level def _really_parse_entity(self): """Actually parse an HTML entity and ensure that it is valid.""" - self._write(tokens.HTMLEntityStart()) + self._emit(tokens.HTMLEntityStart()) self._head += 1 this = self._read(strict=True) if this == "#": numeric = True - self._write(tokens.HTMLEntityNumeric()) + self._emit(tokens.HTMLEntityNumeric()) self._head += 1 this = self._read(strict=True) if this[0].lower() == "x": hexadecimal = True - self._write(tokens.HTMLEntityHex(char=this[0])) + self._emit(tokens.HTMLEntityHex(char=this[0])) this = this[1:] if not this: self._fail_route() @@ -396,8 +395,8 @@ class Tokenizer(object): if this not in htmlentities.entitydefs: self._fail_route() - self._write(tokens.Text(text=this)) - self._write(tokens.HTMLEntityEnd()) + self._emit(tokens.Text(text=this)) + self._emit(tokens.HTMLEntityEnd()) def _parse_entity(self): """Parse an HTML entity at the head of the wikicode string.""" @@ -407,9 +406,9 @@ class Tokenizer(object): self._really_parse_entity() except BadRoute: self._head = reset - self._write_text(self._read()) + self._emit_text(self._read()) else: - self._write_all(self._pop()) + self._emit_all(self._pop()) def _parse_comment(self): """Parse an HTML comment at the head of the wikicode string.""" @@ -419,11 +418,11 @@ class Tokenizer(object): comment = self._parse(contexts.COMMENT) except BadRoute: self._head = reset - self._write_text("