Browse Source

Condense code.

tags/v0.3.1
Ben Kurtovic 10 years ago
parent
commit
287bf71158
2 changed files with 38 additions and 46 deletions
  1. +19
    -24
      mwparserfromhell/parser/tokenizer.c
  2. +19
    -22
      mwparserfromhell/parser/tokenizer.py

+ 19
- 24
mwparserfromhell/parser/tokenizer.c View File

@@ -1037,17 +1037,12 @@ Tokenizer_is_free_link(Tokenizer* self, Py_UNICODE this, Py_UNICODE next)
Py_UNICODE after = Tokenizer_READ(self, 2);
int ctx = self->topstack->context;

return ((this == *"" || this == *"\n" || this == *"[" || this == *"]") ||
(this == *"|" && ctx & LC_TEMPLATE) ||
(this == *"=" && ctx & LC_TEMPLATE_PARAM_KEY) ||
(this == *"}" && next == *"}" && ctx & LC_TEMPLATE) ||
(this == *"}" && next == *"}" && after == *"}"
&& ctx & LC_ARGUMENT) ||
(this == *"=" && ctx & LC_HEADING) ||
(this == *"<" && next == *"/" && after != *"") ||
(this == *"<" && next != *"!" && !(ctx & LC_TAG_CLOSE)) ||
(this == *">" && ctx & LC_TAG_CLOSE) ||
(this == *"'" && next == *"'"));
return (this == *"" || this == *"\n" || this == *"[" || this == *"]" ||
this == *"<" || this == *">" || (this == *"'" && next == *"'") ||
(this == *"|" && ctx & LC_TEMPLATE) ||
(this == *"=" && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) ||
(this == *"}" && next == *"}" &&
(ctx & LC_TEMPLATE || (after == *"}" && ctx & LC_ARGUMENT))));
}

/*
@@ -1073,7 +1068,19 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets,
while (1) {
this = Tokenizer_READ(self, 0);
next = Tokenizer_READ(self, 1);
if (!brackets && Tokenizer_is_free_link(self, this, next)) {
if (this == *"&") {
PUSH_TAIL_BUFFER(*extra, NULL)
if (Tokenizer_parse_entity(self))
return NULL;
}
else if (this == *"<" && next == *"!"
&& Tokenizer_READ(self, 2) == *"-"
&& Tokenizer_READ(self, 3) == *"-") {
PUSH_TAIL_BUFFER(*extra, NULL)
if (Tokenizer_parse_comment(self))
return NULL;
}
else if (!brackets && Tokenizer_is_free_link(self, this, next)) {
self->head--;
return Tokenizer_pop(self);
}
@@ -1086,18 +1093,6 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets,
}
else if (this == *"]")
return Tokenizer_pop(self);
else if (this == *"&") {
PUSH_TAIL_BUFFER(*extra, NULL)
if (Tokenizer_parse_entity(self))
return NULL;
}
else if (this == *"<" && next == *"!"
&& Tokenizer_READ(self, 2) == *"-"
&& Tokenizer_READ(self, 3) == *"-") {
PUSH_TAIL_BUFFER(*extra, NULL)
if (Tokenizer_parse_comment(self))
return NULL;
}
else if (this == *" ") {
if (brackets) {
if (Tokenizer_emit(self, ExternalLinkSeparator))


+ 19
- 22
mwparserfromhell/parser/tokenizer.py View File

@@ -389,16 +389,13 @@ class Tokenizer(object):
"""Return whether the current head is the end of a free link."""
# Built from _parse()'s end sentinels:
after, ctx = self._read(2), self._context
return (this is self.END or this in ("\n", "[", "]") or
this == "|" and ctx & contexts.TEMPLATE or
this == "=" and ctx & contexts.TEMPLATE_PARAM_KEY or
this == next == "}" and ctx & contexts.TEMPLATE or
this == next == after == "}" and ctx & contexts.ARGUMENT or
this == "=" and ctx & contexts.HEADING or
this == "<" and next == "/" and after is not self.END or
this == "<" and next != "!" and not ctx & contexts.TAG_CLOSE or
this == ">" and ctx & contexts.TAG_CLOSE or
this == next == "'")
equal_sign_contexts = contexts.TEMPLATE_PARAM_KEY | contexts.HEADING
return (this in (self.END, "\n", "[", "]", "<", ">") or
this == next == "'" or
(this == "|" and ctx & contexts.TEMPLATE) or
(this == "=" and ctx & equal_sign_contexts) or
(this == next == "}" and ctx & contexts.TEMPLATE) or
(this == next == after == "}" and ctx & contexts.ARGUMENT))

def _really_parse_external_link(self, brackets):
"""Really parse an external link."""
@@ -414,18 +411,7 @@ class Tokenizer(object):
tail = ""
while True:
this, next = self._read(), self._read(1)
if not brackets and self._is_free_link_end(this, next):
return self._pop(), tail, -1
elif this is self.END or this == "\n":
self._fail_route()
elif this == next == "{" and self._can_recurse():
if tail:
self._emit_text(tail)
tail = ""
self._parse_template_or_argument()
elif this == "]":
return self._pop(), tail, 0
elif this == "&":
if this == "&":
if tail:
self._emit_text(tail)
tail = ""
@@ -436,6 +422,17 @@ class Tokenizer(object):
self._emit_text(tail)
tail = ""
self._parse_comment()
elif not brackets and self._is_free_link_end(this, next):
return self._pop(), tail, -1
elif this is self.END or this == "\n":
self._fail_route()
elif this == next == "{" and self._can_recurse():
if tail:
self._emit_text(tail)
tail = ""
self._parse_template_or_argument()
elif this == "]":
return self._pop(), tail, 0
elif " " in this:
before, after = this.split(" ", 1)
if brackets:


Loading…
Cancel
Save