Ver a proveniência

Finish external links.

tags/v0.3
Ben Kurtovic há 11 anos
ascendente
cometimento
bd10aab823
1 ficheiros alterados com 48 adições e 16 eliminações
  1. +48
    -16
      mwparserfromhell/parser/tokenizer.py

+ 48
- 16
mwparserfromhell/parser/tokenizer.py Ver ficheiro

@@ -357,7 +357,6 @@ class Tokenizer(object):
slashes = self._read() == self._read(1) == "/"
if not is_scheme(scheme, slashes):
raise BadRoute()
parentheses = False
self._push(contexts.EXT_LINK_URI)
self._emit_text(scheme)
self._emit_text(":")
@@ -365,43 +364,75 @@ class Tokenizer(object):
self._emit_text("//")
self._head += 2

def _handle_free_link_text(self, punct, tail, this):
"""Handle text in a free ext link, including trailing punctuation."""
if "(" in this and ")" in punct:
punct = punct[:-1] # ')' is not longer valid punctuation
if this.endswith(punct):
for i in range(-1, -len(this) - 1, -1):
if i == -len(this) or this[i - 1] not in punct:
break
stripped = this[:i]
if stripped and tail:
self._emit_text(tail)
tail = ""
tail += this[i:]
this = stripped
elif tail:
self._emit_text(tail)
tail = ""
self._emit_text(this)
return punct, tail

def _really_parse_external_link(self, brackets):
"""Really parse an external link."""
if brackets:
self._parse_bracketed_uri_scheme()
invalid = ("\n", " ", "]")
else:
self._parse_free_uri_scheme()
if self._read() in self.MARKERS or self._read()[0].isspace(): ## Should actually check for valid chars
invalid = ("\n", " ", "[", "]")
punct = tuple(",;\.:!?)")
if self._read() is self.END or self._read()[0] in invalid:
self._fail_route()
tail = ""
while True:
this, next = self._read(), self._read(1)
if this is self.END or this == "\n":
if brackets:
self._fail_route()
self._head -= 1
return self._pop(), None
return self._pop(), tail, -1
elif this == next == "{" and self._can_recurse():
if not brackets and tail:
self._emit_text(tail)
tail = ""
self._parse_template_or_argument()
elif this == "[":
if brackets:
self._emit_text("[")
else:
return self._pop(), tail, -1
elif this == "]":
return self._pop(), tail, 0 if brackets else -1
elif this == "&":
if not brackets and tail:
self._emit_text(tail)
tail = ""
self._parse_entity()
elif this == "]":
if not brackets:
self._head -= 1
return self._pop(), None
elif this == "(" and not brackets and not parentheses:
parentheses = True
self._emit_text(this)
elif " " in this: ## Should be a more general whitespace check
elif " " in this:
before, after = this.split(" ", 1)
self._emit_text(before)
if brackets:
self._emit_text(before)
self._emit(tokens.ExternalLinkSeparator())
self._emit_text(after)
self._context ^= contexts.EXT_LINK_URI
self._context |= contexts.EXT_LINK_TITLE
self._head += 1
return self._parse(push=False), None
return self._pop(), " " + after
return self._parse(push=False), None, 0
punct, tail = self._handle_free_link_text(punct, tail, before)
return self._pop(), tail + " " + after, 0
elif not brackets:
punct, tail = self._handle_free_link_text(punct, tail, this)
else:
self._emit_text(this)
self._head += 1
@@ -424,7 +455,7 @@ class Tokenizer(object):
bad_context = self._context & contexts.INVALID_LINK
if bad_context or not self._can_recurse():
raise BadRoute()
link, extra = self._really_parse_external_link(brackets)
link, extra, delta = self._really_parse_external_link(brackets)
except BadRoute:
self._head = reset
if not brackets and self._context & contexts.DL_TERM:
@@ -438,6 +469,7 @@ class Tokenizer(object):
self._emit(tokens.ExternalLinkOpen(brackets=brackets))
self._emit_all(link)
self._emit(tokens.ExternalLinkClose())
self._head += delta
if extra:
self._emit_text(extra)



Carregando…
Cancelar
Guardar