diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.c b/mwparserfromhell/parser/ctokenizer/tok_parse.c index 879a5db..d36ce56 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -30,7 +30,7 @@ SOFTWARE. #define DIGITS "0123456789" #define HEXDIGITS "0123456789abcdefABCDEF" #define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" -#define URISCHEME "abcdefghijklmnopqrstuvwxyz0123456789+.-" +#define URISCHEME "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" #define MAX_BRACES 255 #define MAX_ENTITY_SIZE 8 diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 6acfb8d..c48e180 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -366,7 +366,7 @@ class Tokenizer: self._emit_text("//") self._head += 2 else: - valid = "abcdefghijklmnopqrstuvwxyz0123456789+.-" + valid = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" all_valid = lambda: all(char in valid for char in self._read()) scheme = "" while self._read() is not self.END and all_valid(): @@ -386,7 +386,7 @@ class Tokenizer: def _parse_free_uri_scheme(self): """Parse the URI scheme of a free (no brackets) external link.""" - valid = "abcdefghijklmnopqrstuvwxyz0123456789+.-" + valid = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" scheme = [] try: # We have to backtrack through the textbuffer looking for our diff --git a/tests/tokenizer/external_links.mwtest b/tests/tokenizer/external_links.mwtest index 7fed7b6..ca64fd0 100644 --- a/tests/tokenizer/external_links.mwtest +++ b/tests/tokenizer/external_links.mwtest @@ -562,3 +562,17 @@ name: brackets_terminated_by_right_angle label: an external link terminated by a right angle input: "[http://foo>bar]" output: [ExternalLinkOpen(brackets=True), Text(text="http://foo"), ExternalLinkSeparator(suppress_space=True), Text(text=">bar"), ExternalLinkClose()] + +--- + +name: scheme_case +label: a free link with uppercase letters in the URL scheme +input: "HtTp://example.com/" +output: [ExternalLinkOpen(brackets=False), Text(text="HtTp://example.com/"), ExternalLinkClose()] + +--- + +name: bracket_scheme_case +label: an external link with uppercase letters in the URL scheme +input: "[HtTp://example.com/]" +output: [ExternalLinkOpen(brackets=True), Text(text="HtTp://example.com/"), ExternalLinkClose()]