From 22e869b1429dabd30976e4bdb8b819ed240c3f29 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 19 May 2013 01:45:09 -0400 Subject: [PATCH] Fix a failing HTML entity test in the C tokenizer. Remove some extraneous whitespace in string_mixin.py. --- mwparserfromhell/parser/tokenizer.c | 19 +++++++++++++++++-- mwparserfromhell/string_mixin.py | 1 - 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index df0882e..939f30c 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -911,8 +911,8 @@ Tokenizer_really_parse_entity(Tokenizer* self) { PyObject *token, *kwargs, *textobj; Py_UNICODE this; - int numeric, hexadecimal, i, j, test; - char *valid, *text, *def; + int numeric, hexadecimal, i, j, zeroes, test; + char *valid, *text, *buffer, *def; #define FAIL_ROUTE_AND_EXIT() { \ Tokenizer_fail_route(self); \ @@ -984,6 +984,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) return -1; } i = 0; + zeroes = 0; while (1) { this = Tokenizer_READ(self, 0); if (this == *";") { @@ -992,6 +993,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) break; } if (i == 0 && this == *"0") { + zeroes++; self->head++; continue; } @@ -1029,6 +1031,19 @@ Tokenizer_really_parse_entity(Tokenizer* self) i++; } } + if (zeroes) { + buffer = calloc(strlen(text) + zeroes + 1, sizeof(char)); + if (!buffer) { + free(text); + PyErr_NoMemory(); + return -1; + } + for (i = 0; i < zeroes; i++) + strcat(buffer, "0"); + strcat(buffer, text); + free(text); + text = buffer; + } textobj = PyUnicode_FromString(text); if (!textobj) { free(text); diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 6bee9c4..89c1bc0 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -40,7 +40,6 @@ def inheritdoc(method): method.__doc__ = getattr(str, method.__name__).__doc__ return method - class StringMixIn(object): """Implement the interface for ``unicode``/``str`` in a dynamic manner.