From 3fd13100da734ccf6c85756aad67b17d014c1c7d Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 17 Aug 2012 04:24:52 -0400 Subject: [PATCH] Another speedup by reducing calls to _read(). --- mwparserfromhell/parser/tokenizer.py | 38 +++++++++++++++++------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 6e6c05b..eb42c28 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -88,15 +88,10 @@ class Tokenizer(object): index = self._head + delta if index < 0 and (not wrap or abs(index) > len(self._text)): return self.START - if index >= len(self._text): + try: + return self._text[index] + except IndexError: return self.END - return self._text[index] - - def _at_head(self, chars): - length = len(chars) - if length == 1: - return self._read() == chars - return all([self._read(i) == chars[i] for i in xrange(len(chars))]) def _parse_template(self): reset = self._head @@ -146,7 +141,7 @@ class Tokenizer(object): self._push() self._write(tokens.HTMLEntityStart()) numeric = hexadecimal = False - if self._at_head("#"): + if self._read() == "#": numeric = True self._write(tokens.HTMLEntityNumeric()) if self._read(1).lower() == "x": @@ -160,7 +155,8 @@ class Tokenizer(object): if not numeric and not hexadecimal: valid += string.ascii_letters while True: - if self._at_head(";"): + this = self._read() + if this == ";": text = "".join(text) if numeric: test = int(text, 16) if hexadecimal else int(text) @@ -172,9 +168,9 @@ class Tokenizer(object): self._write(tokens.Text(text=text)) self._write(tokens.HTMLEntityEnd()) break - if self._read() is self.END or self._read() not in valid: + if this is self.END or this not in valid: raise BadRoute(self._pop()) - text.append(self._read()) + text.append(this) self._head += 1 except BadRoute: self._head = reset @@ -185,26 +181,28 @@ class Tokenizer(object): def _parse(self, context=0): self._push(context) while True: - if self._read() not in self.SENTINELS: + this = self._read() + if this not in self.SENTINELS: self._write(self._read(), text=True) self._head += 1 continue - if self._read() is self.END: + if this is self.END: if self._context & contexts.TEMPLATE: raise BadRoute(self._pop()) return self._pop() - if self._at_head("{{"): + next = self._read(1) + if this == next == "{": self._parse_template() - elif self._at_head("|") and self._context & contexts.TEMPLATE: + elif this == "|" and self._context & contexts.TEMPLATE: self._handle_template_param() - elif self._at_head("=") and self._context & contexts.TEMPLATE_PARAM_KEY: + elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: self._handle_template_param_value() - elif self._at_head("}}") and self._context & contexts.TEMPLATE: + elif this == next == "}" and self._context & contexts.TEMPLATE: return self._handle_template_end() - elif self._at_head("&"): + elif this == "&": self._parse_entity() else: - self._write(self._read(), text=True) + self._write(this, text=True) self._head += 1 def tokenize(self, text):