Jakub Klinkovský 4 years ago
parent
commit
1cf0754e73
2 changed files with 16 additions and 1 deletions
  1. +2
    -1
      mwparserfromhell/parser/tokenizer.py
  2. +14
    -0
      tests/tokenizer/external_links.mwtest

+ 2
- 1
mwparserfromhell/parser/tokenizer.py View File

@@ -393,7 +393,8 @@ class Tokenizer:
# scheme since it was just parsed as text:
for chunk in reversed(self._textbuffer):
for char in reversed(chunk):
if char.isspace() or char in self.MARKERS:
# stop at the first non-word character
if re.fullmatch(r"\W", char):
raise StopIteration()
if char not in valid:
raise BadRoute()


+ 14
- 0
tests/tokenizer/external_links.mwtest View File

@@ -478,3 +478,17 @@ name: brackets_scheme_title_but_no_url
label: brackets around a scheme, colon, and slashes, with a title
input: "[http:// Example]"
output: [Text(text="[http:// Example]")]

---

name: url_preceded_by_non_word_character
label: non-word character immediately before a valid URL
input: "svn+ssh://server.domain.com:/reponame"
output: [Text(text="svn+"), ExternalLinkOpen(brackets=False), Text(text="ssh://server.domain.com:/reponame"), ExternalLinkClose()]

---

name: url_preceded_by_underscore
label: underscore immediately before a valid URL
input: "svn_ssh://server.domain.com:/reponame"
output: [Text(text="svn_ssh://server.domain.com:/reponame")]

Loading…
Cancel
Save