ソースを参照

Fix tokenizer.c on Windows; add another template test (#25)

Mostly by @gdooms, with tweaks.
tags/v0.2
Ben Kurtovic 11年前
コミット
9ede1121ba
3個のファイルの変更39行の追加18行の削除
  1. +30
    -17
      mwparserfromhell/parser/tokenizer.c
  2. +1
    -0
      mwparserfromhell/parser/tokenizer.h
  3. +8
    -1
      tests/tokenizer/templates.mwtest

+ 30
- 17
mwparserfromhell/parser/tokenizer.c ファイルの表示

@@ -23,6 +23,11 @@ SOFTWARE.


#include "tokenizer.h" #include "tokenizer.h"


double log2(double n)
{
return log(n) / log(2);
}

static PyObject* static PyObject*
Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds)
{ {
@@ -52,8 +57,9 @@ Textbuffer_new(void)
static void static void
Tokenizer_dealloc(Tokenizer* self) Tokenizer_dealloc(Tokenizer* self)
{ {
Py_XDECREF(self->text);
struct Stack *this = self->topstack, *next; struct Stack *this = self->topstack, *next;
Py_XDECREF(self->text);

while (this) { while (this) {
Py_DECREF(this->stack); Py_DECREF(this->stack);
Textbuffer_dealloc(this->textbuffer); Textbuffer_dealloc(this->textbuffer);
@@ -139,20 +145,21 @@ Textbuffer_render(struct Textbuffer* self)
static int static int
Tokenizer_push_textbuffer(Tokenizer* self) Tokenizer_push_textbuffer(Tokenizer* self)
{ {
PyObject *text, *kwargs, *token;
struct Textbuffer* buffer = self->topstack->textbuffer; struct Textbuffer* buffer = self->topstack->textbuffer;
if (buffer->size == 0 && !buffer->next) if (buffer->size == 0 && !buffer->next)
return 0; return 0;
PyObject* text = Textbuffer_render(buffer);
text = Textbuffer_render(buffer);
if (!text) if (!text)
return -1; return -1;
PyObject* kwargs = PyDict_New();
kwargs = PyDict_New();
if (!kwargs) { if (!kwargs) {
Py_DECREF(text); Py_DECREF(text);
return -1; return -1;
} }
PyDict_SetItemString(kwargs, "text", text); PyDict_SetItemString(kwargs, "text", text);
Py_DECREF(text); Py_DECREF(text);
PyObject* token = PyObject_Call(Text, NOARGS, kwargs);
token = PyObject_Call(Text, NOARGS, kwargs);
Py_DECREF(kwargs); Py_DECREF(kwargs);
if (!token) if (!token)
return -1; return -1;
@@ -185,9 +192,10 @@ Tokenizer_delete_top_of_stack(Tokenizer* self)
static PyObject* static PyObject*
Tokenizer_pop(Tokenizer* self) Tokenizer_pop(Tokenizer* self)
{ {
PyObject* stack;
if (Tokenizer_push_textbuffer(self)) if (Tokenizer_push_textbuffer(self))
return NULL; return NULL;
PyObject* stack = self->topstack->stack;
stack = self->topstack->stack;
Py_INCREF(stack); Py_INCREF(stack);
Tokenizer_delete_top_of_stack(self); Tokenizer_delete_top_of_stack(self);
return stack; return stack;
@@ -200,11 +208,13 @@ Tokenizer_pop(Tokenizer* self)
static PyObject* static PyObject*
Tokenizer_pop_keeping_context(Tokenizer* self) Tokenizer_pop_keeping_context(Tokenizer* self)
{ {
PyObject* stack;
int context;
if (Tokenizer_push_textbuffer(self)) if (Tokenizer_push_textbuffer(self))
return NULL; return NULL;
PyObject* stack = self->topstack->stack;
stack = self->topstack->stack;
Py_INCREF(stack); Py_INCREF(stack);
int context = self->topstack->context;
context = self->topstack->context;
Tokenizer_delete_top_of_stack(self); Tokenizer_delete_top_of_stack(self);
self->topstack->context = context; self->topstack->context = context;
return stack; return stack;
@@ -376,9 +386,10 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta)
static PyObject* static PyObject*
Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta)
{ {
Py_ssize_t index;
if (delta > self->head) if (delta > self->head)
return EMPTY; return EMPTY;
Py_ssize_t index = self->head - delta;
index = self->head - delta;
return PyList_GET_ITEM(self->text, index); return PyList_GET_ITEM(self->text, index);
} }


@@ -392,7 +403,7 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
PyObject *tokenlist; PyObject *tokenlist;


self->head += 2; self->head += 2;
while (Tokenizer_READ(self, 0) == *"{") {
while (Tokenizer_READ(self, 0) == *"{" && braces < MAX_BRACES) {
self->head++; self->head++;
braces++; braces++;
} }
@@ -423,8 +434,8 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
if (Tokenizer_parse_template(self)) if (Tokenizer_parse_template(self))
return -1; return -1;
if (BAD_ROUTE) { if (BAD_ROUTE) {
char text[MAX_BRACES];
RESET_ROUTE(); RESET_ROUTE();
char text[braces + 1];
for (i = 0; i < braces; i++) text[i] = *"{"; for (i = 0; i < braces; i++) text[i] = *"{";
text[braces] = *""; text[braces] = *"";
if (Tokenizer_write_text_then_stack(self, text)) { if (Tokenizer_write_text_then_stack(self, text)) {
@@ -635,9 +646,10 @@ Tokenizer_handle_template_end(Tokenizer* self)
static int static int
Tokenizer_handle_argument_separator(Tokenizer* self) Tokenizer_handle_argument_separator(Tokenizer* self)
{ {
PyObject* token;
self->topstack->context ^= LC_ARGUMENT_NAME; self->topstack->context ^= LC_ARGUMENT_NAME;
self->topstack->context |= LC_ARGUMENT_DEFAULT; self->topstack->context |= LC_ARGUMENT_DEFAULT;
PyObject* token = PyObject_CallObject(ArgumentSeparator, NULL);
token = PyObject_CallObject(ArgumentSeparator, NULL);
if (!token) if (!token)
return -1; return -1;
if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
@@ -654,8 +666,8 @@ Tokenizer_handle_argument_separator(Tokenizer* self)
static PyObject* static PyObject*
Tokenizer_handle_argument_end(Tokenizer* self) Tokenizer_handle_argument_end(Tokenizer* self)
{ {
self->head += 2;
PyObject* stack = Tokenizer_pop(self); PyObject* stack = Tokenizer_pop(self);
self->head += 2;
return stack; return stack;
} }


@@ -716,9 +728,10 @@ Tokenizer_parse_wikilink(Tokenizer* self)
static int static int
Tokenizer_handle_wikilink_separator(Tokenizer* self) Tokenizer_handle_wikilink_separator(Tokenizer* self)
{ {
PyObject* token;
self->topstack->context ^= LC_WIKILINK_TITLE; self->topstack->context ^= LC_WIKILINK_TITLE;
self->topstack->context |= LC_WIKILINK_TEXT; self->topstack->context |= LC_WIKILINK_TEXT;
PyObject* token = PyObject_CallObject(WikilinkSeparator, NULL);
token = PyObject_CallObject(WikilinkSeparator, NULL);
if (!token) if (!token)
return -1; return -1;
if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
@@ -735,8 +748,8 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self)
static PyObject* static PyObject*
Tokenizer_handle_wikilink_end(Tokenizer* self) Tokenizer_handle_wikilink_end(Tokenizer* self)
{ {
self->head += 1;
PyObject* stack = Tokenizer_pop(self); PyObject* stack = Tokenizer_pop(self);
self->head += 1;
return stack; return stack;
} }


@@ -1093,9 +1106,9 @@ Tokenizer_parse_comment(Tokenizer* self)
self->head += 4; self->head += 4;
comment = Tokenizer_parse(self, LC_COMMENT); comment = Tokenizer_parse(self, LC_COMMENT);
if (BAD_ROUTE) { if (BAD_ROUTE) {
const char* text = "<!--";
RESET_ROUTE(); RESET_ROUTE();
self->head = reset; self->head = reset;
const char* text = "<!--";
i = 0; i = 0;
while (1) { while (1) {
if (!text[i]) if (!text[i])
@@ -1359,10 +1372,10 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args)
PyObject *text, *temp; PyObject *text, *temp;


if (!PyArg_ParseTuple(args, "U", &text)) { if (!PyArg_ParseTuple(args, "U", &text)) {
/* Failed to parse a Unicode object; try a string instead. */
PyErr_Clear();
const char* encoded; const char* encoded;
Py_ssize_t size; Py_ssize_t size;
/* Failed to parse a Unicode object; try a string instead. */
PyErr_Clear();
if (!PyArg_ParseTuple(args, "s#", &encoded, &size)) if (!PyArg_ParseTuple(args, "s#", &encoded, &size))
return NULL; return NULL;
temp = PyUnicode_FromStringAndSize(encoded, size); temp = PyUnicode_FromStringAndSize(encoded, size);


+ 1
- 0
mwparserfromhell/parser/tokenizer.h ファイルの表示

@@ -48,6 +48,7 @@ static const char* MARKERS[] = {
#define TEXTBUFFER_BLOCKSIZE 1024 #define TEXTBUFFER_BLOCKSIZE 1024
#define MAX_DEPTH 40 #define MAX_DEPTH 40
#define MAX_CYCLES 100000 #define MAX_CYCLES 100000
#define MAX_BRACES 255
#define MAX_ENTITY_SIZE 8 #define MAX_ENTITY_SIZE 8


static int route_state = 0; static int route_state = 0;


+ 8
- 1
tests/tokenizer/templates.mwtest ファイルの表示

@@ -607,8 +607,15 @@ output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|


--- ---


name: recursion_five_hundred_opens
label: test potentially dangerous recursion: five hundred template openings, without spaces
input: "{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{"
output: [Text(text="{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{")]

---

name: recursion_one_hundred_opens name: recursion_one_hundred_opens
label: test potentially dangerous recursion: one hundred template openings
label: test potentially dangerous recursion: one hundred template openings, with spaces
input: "{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{" input: "{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{"
output: [Text(text="{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{")] output: [Text(text="{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{")]




読み込み中…
キャンセル
保存