Browse Source

Fix all bugs in C implementation of external links.

tags/v0.3
Ben Kurtovic 10 years ago
parent
commit
2561cf5b5e
2 changed files with 39 additions and 20 deletions
  1. +37
    -19
      mwparserfromhell/parser/tokenizer.c
  2. +2
    -1
      mwparserfromhell/parser/tokenizer.h

+ 37
- 19
mwparserfromhell/parser/tokenizer.c View File

@@ -100,7 +100,7 @@ static Textbuffer* Textbuffer_new(void)
PyErr_NoMemory(); PyErr_NoMemory();
return NULL; return NULL;
} }
buffer->next = NULL;
buffer->prev = buffer->next = NULL;
return buffer; return buffer;
} }


@@ -128,6 +128,7 @@ static int Textbuffer_write(Textbuffer** this, Py_UNICODE code)
if (!new) if (!new)
return -1; return -1;
new->next = self; new->next = self;
self->prev = new;
*this = self = new; *this = self = new;
} }
self->data[self->size++] = code; self->data[self->size++] = code;
@@ -435,19 +436,33 @@ static int Tokenizer_emit_text(Tokenizer* self, const char* text)
Write the contents of another textbuffer to the current textbuffer, Write the contents of another textbuffer to the current textbuffer,
deallocating it in the process. deallocating it in the process.
*/ */
static int Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer)
static int
Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer, int reverse)
{ {
Textbuffer *original = buffer; Textbuffer *original = buffer;
int i; int i;


while (buffer) {
for (i = 0; i < buffer->size; i++) {
if (Tokenizer_emit_char(self, buffer->data[i])) {
Textbuffer_dealloc(original);
return -1;
if (reverse) {
do {
for (i = buffer->size - 1; i >= 0; i--) {
if (Tokenizer_emit_char(self, buffer->data[i])) {
Textbuffer_dealloc(original);
return -1;
}
} }
}
buffer = buffer->next;
} while ((buffer = buffer->next));
}
else {
while (buffer->next)
buffer = buffer->next;
do {
for (i = 0; i < buffer->size; i++) {
if (Tokenizer_emit_char(self, buffer->data[i])) {
Textbuffer_dealloc(original);
return -1;
}
}
} while ((buffer = buffer->prev));
} }
Textbuffer_dealloc(original); Textbuffer_dealloc(original);
return 0; return 0;
@@ -933,7 +948,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self)
// it was just parsed as text: // it was just parsed as text:
temp_buffer = self->topstack->textbuffer; temp_buffer = self->topstack->textbuffer;
while (temp_buffer) { while (temp_buffer) {
for (i = temp_buffer->size - 1; i >= 0; i++) {
for (i = temp_buffer->size - 1; i >= 0; i--) {
chunk = temp_buffer->data[i]; chunk = temp_buffer->data[i];
if (Py_UNICODE_ISSPACE(chunk) || is_marker(chunk)) if (Py_UNICODE_ISSPACE(chunk) || is_marker(chunk))
goto end_of_loop; goto end_of_loop;
@@ -971,7 +986,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self)
Textbuffer_dealloc(scheme_buffer); Textbuffer_dealloc(scheme_buffer);
return -1; return -1;
} }
if (Tokenizer_emit_textbuffer(self, scheme_buffer))
if (Tokenizer_emit_textbuffer(self, scheme_buffer, 1))
return -1; return -1;
if (Tokenizer_emit_char(self, *":")) if (Tokenizer_emit_char(self, *":"))
return -1; return -1;
@@ -990,16 +1005,18 @@ static int
Tokenizer_handle_free_link_text(Tokenizer* self, int* parens, Tokenizer_handle_free_link_text(Tokenizer* self, int* parens,
Textbuffer** tail, Py_UNICODE this) Textbuffer** tail, Py_UNICODE this)
{ {
#define PUSH_TAIL_BUFFER(tail, error) \
if ((tail)->size || (tail)->next) { \
Tokenizer_emit_textbuffer(self, tail); \
tail = Textbuffer_new(); \
if (!(tail)) \
return error; \
#define PUSH_TAIL_BUFFER(tail, error) \
if ((tail)->size || (tail)->next) { \
Tokenizer_emit_textbuffer(self, tail, 0); \
tail = Textbuffer_new(); \
if (!(tail)) \
return error; \
} }


if (this == *"(" && !(*parens))
if (this == *"(" && !(*parens)) {
*parens = 1; *parens = 1;
PUSH_TAIL_BUFFER(*tail, -1)
}
else if (this == *"," || this == *";" || this == *"\\" || this == *"." || else if (this == *"," || this == *";" || this == *"\\" || this == *"." ||
this == *":" || this == *"!" || this == *"?" || this == *":" || this == *"!" || this == *"?" ||
(!(*parens) && this == *")")) (!(*parens) && this == *")"))
@@ -1141,6 +1158,7 @@ static int Tokenizer_parse_external_link(Tokenizer* self, int brackets)
link = Tokenizer_really_parse_external_link(self, brackets, &extra); link = Tokenizer_really_parse_external_link(self, brackets, &extra);
} }
if (BAD_ROUTE) { if (BAD_ROUTE) {
RESET_ROUTE();
self->head = reset; self->head = reset;
Textbuffer_dealloc(extra); Textbuffer_dealloc(extra);
if (!brackets && self->topstack->context & LC_DLTERM) if (!brackets && self->topstack->context & LC_DLTERM)
@@ -1180,7 +1198,7 @@ static int Tokenizer_parse_external_link(Tokenizer* self, int brackets)
return -1; return -1;
} }
if (extra->size || extra->next) if (extra->size || extra->next)
return Tokenizer_emit_textbuffer(self, extra);
return Tokenizer_emit_textbuffer(self, extra, 0);
return 0; return 0;
} }




+ 2
- 1
mwparserfromhell/parser/tokenizer.h View File

@@ -165,7 +165,7 @@ static PyObject* TagCloseClose;
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_TAG | LC_STYLE) #define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_TAG | LC_STYLE)
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) #define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE) #define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE)
#define AGG_INVALID_LINK (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK | LC_EXT_LINK_URI)
#define AGG_INVALID_LINK (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK | LC_EXT_LINK)


/* Tag contexts: */ /* Tag contexts: */


@@ -184,6 +184,7 @@ static PyObject* TagCloseClose;
struct Textbuffer { struct Textbuffer {
Py_ssize_t size; Py_ssize_t size;
Py_UNICODE* data; Py_UNICODE* data;
struct Textbuffer* prev;
struct Textbuffer* next; struct Textbuffer* next;
}; };




Loading…
Cancel
Save