Browse Source

Fix case-insensitive parsing of URI schemes

pull/232/head
Jakub Klinkovský 4 years ago
parent
commit
41dbc45f73
3 changed files with 17 additions and 3 deletions
  1. +1
    -1
      mwparserfromhell/parser/ctokenizer/tok_parse.c
  2. +2
    -2
      mwparserfromhell/parser/tokenizer.py
  3. +14
    -0
      tests/tokenizer/external_links.mwtest

+ 1
- 1
mwparserfromhell/parser/ctokenizer/tok_parse.c View File

@@ -30,7 +30,7 @@ SOFTWARE.
#define DIGITS "0123456789"
#define HEXDIGITS "0123456789abcdefABCDEF"
#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
#define URISCHEME "abcdefghijklmnopqrstuvwxyz0123456789+.-"
#define URISCHEME "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-"

#define MAX_BRACES 255
#define MAX_ENTITY_SIZE 8


+ 2
- 2
mwparserfromhell/parser/tokenizer.py View File

@@ -366,7 +366,7 @@ class Tokenizer:
self._emit_text("//")
self._head += 2
else:
valid = "abcdefghijklmnopqrstuvwxyz0123456789+.-"
valid = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-"
all_valid = lambda: all(char in valid for char in self._read())
scheme = ""
while self._read() is not self.END and all_valid():
@@ -386,7 +386,7 @@ class Tokenizer:

def _parse_free_uri_scheme(self):
"""Parse the URI scheme of a free (no brackets) external link."""
valid = "abcdefghijklmnopqrstuvwxyz0123456789+.-"
valid = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-"
scheme = []
try:
# We have to backtrack through the textbuffer looking for our


+ 14
- 0
tests/tokenizer/external_links.mwtest View File

@@ -562,3 +562,17 @@ name: brackets_terminated_by_right_angle
label: an external link terminated by a right angle
input: "[http://foo>bar]"
output: [ExternalLinkOpen(brackets=True), Text(text="http://foo"), ExternalLinkSeparator(suppress_space=True), Text(text=">bar"), ExternalLinkClose()]

---

name: scheme_case
label: a free link with uppercase letters in the URL scheme
input: "HtTp://example.com/"
output: [ExternalLinkOpen(brackets=False), Text(text="HtTp://example.com/"), ExternalLinkClose()]

---

name: bracket_scheme_case
label: an external link with uppercase letters in the URL scheme
input: "[HtTp://example.com/]"
output: [ExternalLinkOpen(brackets=True), Text(text="HtTp://example.com/"), ExternalLinkClose()]

Loading…
Cancel
Save