Browse Source

Fix a failing HTML entity test in the C tokenizer.

Remove some extraneous whitespace in string_mixin.py.
tags/v0.2
Ben Kurtovic 11 years ago
parent
commit
22e869b142
2 changed files with 17 additions and 3 deletions
  1. +17
    -2
      mwparserfromhell/parser/tokenizer.c
  2. +0
    -1
      mwparserfromhell/string_mixin.py

+ 17
- 2
mwparserfromhell/parser/tokenizer.c View File

@@ -911,8 +911,8 @@ Tokenizer_really_parse_entity(Tokenizer* self)
{
PyObject *token, *kwargs, *textobj;
Py_UNICODE this;
int numeric, hexadecimal, i, j, test;
char *valid, *text, *def;
int numeric, hexadecimal, i, j, zeroes, test;
char *valid, *text, *buffer, *def;

#define FAIL_ROUTE_AND_EXIT() { \
Tokenizer_fail_route(self); \
@@ -984,6 +984,7 @@ Tokenizer_really_parse_entity(Tokenizer* self)
return -1;
}
i = 0;
zeroes = 0;
while (1) {
this = Tokenizer_READ(self, 0);
if (this == *";") {
@@ -992,6 +993,7 @@ Tokenizer_really_parse_entity(Tokenizer* self)
break;
}
if (i == 0 && this == *"0") {
zeroes++;
self->head++;
continue;
}
@@ -1029,6 +1031,19 @@ Tokenizer_really_parse_entity(Tokenizer* self)
i++;
}
}
if (zeroes) {
buffer = calloc(strlen(text) + zeroes + 1, sizeof(char));
if (!buffer) {
free(text);
PyErr_NoMemory();
return -1;
}
for (i = 0; i < zeroes; i++)
strcat(buffer, "0");
strcat(buffer, text);
free(text);
text = buffer;
}
textobj = PyUnicode_FromString(text);
if (!textobj) {
free(text);


+ 0
- 1
mwparserfromhell/string_mixin.py View File

@@ -40,7 +40,6 @@ def inheritdoc(method):
method.__doc__ = getattr(str, method.__name__).__doc__
return method


class StringMixIn(object):
"""Implement the interface for ``unicode``/``str`` in a dynamic manner.



Loading…
Cancel
Save