Browse Source

Fix some bugs.

tags/v0.2
Ben Kurtovic 12 years ago
parent
commit
17a09e395a
2 changed files with 74 additions and 60 deletions
  1. +36
    -23
      mwparserfromhell/parser/tokenizer.c
  2. +38
    -37
      mwparserfromhell/parser/tokenizer.h

+ 36
- 23
mwparserfromhell/parser/tokenizer.c View File

@@ -26,25 +26,7 @@ SOFTWARE.
static PyObject* static PyObject*
Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds)
{ {
Tokenizer *self;

self = (Tokenizer*) type->tp_alloc(type, 0);
if (self != NULL) {

self->text = Py_None;
Py_INCREF(Py_None);

self->stacks = PyList_New(0);
if (!self->stacks) {
Py_DECREF(self);
return NULL;
}

self->head = 0;
self->length = 0;
self->global = 0;
}

Tokenizer* self = (Tokenizer*) type->tp_alloc(type, 0);
return (PyObject*) self; return (PyObject*) self;
} }


@@ -63,6 +45,22 @@ Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds)
static char* kwlist[] = {NULL}; static char* kwlist[] = {NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist)) if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist))
return -1; return -1;

self->text = Py_None;
self->topstack = Py_None;
Py_INCREF(Py_None);
Py_INCREF(Py_None);

self->stacks = PyList_New(0);
if (!self->stacks) {
Py_DECREF(self);
return -1;
}

self->head = 0;
self->length = 0;
self->global = 0;

return 0; return 0;
} }


@@ -89,6 +87,7 @@ static int
Tokenizer_push(Tokenizer* self, Py_ssize_t context) Tokenizer_push(Tokenizer* self, Py_ssize_t context)
{ {
PyObject* top = PyList_New(3); PyObject* top = PyList_New(3);
if (!top) return -1;
PyList_SET_ITEM(top, 0, PyList_New(0)); PyList_SET_ITEM(top, 0, PyList_New(0));
PyList_SET_ITEM(top, 1, PyInt_FromSsize_t(context)); PyList_SET_ITEM(top, 1, PyInt_FromSsize_t(context));
PyList_SET_ITEM(top, 2, PyList_New(0)); PyList_SET_ITEM(top, 2, PyList_New(0));
@@ -1094,7 +1093,7 @@ Tokenizer_handle_heading_end(Tokenizer* self)
self->head++; self->head++;
} }


Py_ssize_t current = LC_HEADING_LEVEL_1 << (best > 5 ? 5 : best - 1); // FIXME
Py_ssize_t current = log2(Tokenizer_CONTEXT_VAL(self) / LC_HEADING_LEVEL_1) + 1;
Py_ssize_t level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current); Py_ssize_t level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current);


if (setjmp(exception_env) == BAD_ROUTE) { if (setjmp(exception_env) == BAD_ROUTE) {
@@ -1387,7 +1386,7 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
Build a list of tokens from a string of wikicode and return it. Build a list of tokens from a string of wikicode and return it.
*/ */
static PyObject* static PyObject*
Tokenizer_tokenize(Tokenizer* self, PyObject *args)
Tokenizer_tokenize(Tokenizer* self, PyObject* args)
{ {
PyObject* text; PyObject* text;


@@ -1439,10 +1438,24 @@ init_tokenizer(void)
NOARGS = PyTuple_New(0); NOARGS = PyTuple_New(0);
NOKWARGS = PyDict_New(); NOKWARGS = PyDict_New();


char* name = "mwparserfromhell.parser";
PyObject* globals = PyEval_GetGlobals(); PyObject* globals = PyEval_GetGlobals();
PyObject* locals = PyEval_GetLocals(); PyObject* locals = PyEval_GetLocals();
PyObject* fromlist = PyList_New(0);
PyObject* fromlist = PyList_New(1);
if (!fromlist) return;
PyObject* submodname = PyBytes_FromString("tokens");
if (!submodname) {
Py_DECREF(fromlist);
return;
}
PyList_SET_ITEM(fromlist, 0, submodname);


tokens = PyImport_ImportModuleLevel("tokens", globals, locals, fromlist, 1);
PyObject* tokmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0);
Py_DECREF(fromlist); Py_DECREF(fromlist);
if (!tokmodule) {
return;
}

tokens = PyObject_GetAttrString(tokmodule, "tokens");
Py_DECREF(tokmodule);
} }

+ 38
- 37
mwparserfromhell/parser/tokenizer.h View File

@@ -26,6 +26,7 @@ SOFTWARE.
#endif #endif


#include <Python.h> #include <Python.h>
#include <math.h>
#include <setjmp.h> #include <setjmp.h>
#include <structmember.h> #include <structmember.h>


@@ -108,43 +109,43 @@ typedef struct {


/* Tokenizer function prototypes: */ /* Tokenizer function prototypes: */


static PyObject* Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds);
static void Tokenizer_dealloc(Tokenizer* self);
static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds);
static int Tokenizer_set_context(Tokenizer* self, Py_ssize_t value);
static int Tokenizer_set_textbuffer(Tokenizer* self, PyObject* value);
static int Tokenizer_push(Tokenizer* self, Py_ssize_t context);
static int Tokenizer_push_textbuffer(Tokenizer* self);
static int Tokenizer_delete_top_of_stack(Tokenizer* self);
static PyObject* Tokenizer_pop(Tokenizer* self);
static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self);
static void Tokenizer_fail_route(Tokenizer* self);
static int Tokenizer_write(Tokenizer* self, PyObject* token);
static int Tokenizer_write_first(Tokenizer* self, PyObject* token);
static int Tokenizer_write_text(Tokenizer* self, PyObject* text);
static int Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist);
static int Tokenizer_write_text_then_stack(Tokenizer* self, PyObject* text);
static PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta);
static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta);
static int Tokenizer_parse_template_or_argument(Tokenizer* self);
static int Tokenizer_parse_template(Tokenizer* self);
static int Tokenizer_parse_argument(Tokenizer* self);
static int Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[]);
static int Tokenizer_handle_template_param(Tokenizer* self);
static int Tokenizer_handle_template_param_value(Tokenizer* self);
static PyObject* Tokenizer_handle_template_end(Tokenizer* self);
static int Tokenizer_handle_argument_separator(Tokenizer* self);
static PyObject* Tokenizer_handle_argument_end(Tokenizer* self);
static int Tokenizer_parse_wikilink(Tokenizer* self);
static int Tokenizer_handle_wikilink_separator(Tokenizer* self);
static PyObject* Tokenizer_handle_wikilink_end(Tokenizer* self);
static int Tokenizer_parse_heading(Tokenizer* self);
static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self);
static int Tokenizer_really_parse_entity(Tokenizer* self);
static int Tokenizer_parse_entity(Tokenizer* self);
static int Tokenizer_parse_comment(Tokenizer* self);
static PyObject* Tokenizer_parse(Tokenizer* self, Py_ssize_t context);
static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject *args);
static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*);
static void Tokenizer_dealloc(Tokenizer*);
static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*);
static int Tokenizer_set_context(Tokenizer*, Py_ssize_t);
static int Tokenizer_set_textbuffer(Tokenizer*, PyObject*);
static int Tokenizer_push(Tokenizer*, Py_ssize_t);
static int Tokenizer_push_textbuffer(Tokenizer*);
static int Tokenizer_delete_top_of_stack(Tokenizer*);
static PyObject* Tokenizer_pop(Tokenizer*);
static PyObject* Tokenizer_pop_keeping_context(Tokenizer*);
static void Tokenizer_fail_route(Tokenizer*);
static int Tokenizer_write(Tokenizer*, PyObject*);
static int Tokenizer_write_first(Tokenizer*, PyObject*);
static int Tokenizer_write_text(Tokenizer*, PyObject*);
static int Tokenizer_write_all(Tokenizer*, PyObject*);
static int Tokenizer_write_text_then_stack(Tokenizer*, PyObject*);
static PyObject* Tokenizer_read(Tokenizer*, Py_ssize_t);
static PyObject* Tokenizer_read_backwards(Tokenizer*, Py_ssize_t);
static int Tokenizer_parse_template_or_argument(Tokenizer*);
static int Tokenizer_parse_template(Tokenizer*);
static int Tokenizer_parse_argument(Tokenizer*);
static int Tokenizer_verify_safe(Tokenizer*, const char* []);
static int Tokenizer_handle_template_param(Tokenizer*);
static int Tokenizer_handle_template_param_value(Tokenizer*);
static PyObject* Tokenizer_handle_template_end(Tokenizer*);
static int Tokenizer_handle_argument_separator(Tokenizer*);
static PyObject* Tokenizer_handle_argument_end(Tokenizer*);
static int Tokenizer_parse_wikilink(Tokenizer*);
static int Tokenizer_handle_wikilink_separator(Tokenizer*);
static PyObject* Tokenizer_handle_wikilink_end(Tokenizer*);
static int Tokenizer_parse_heading(Tokenizer*);
static HeadingData* Tokenizer_handle_heading_end(Tokenizer*);
static int Tokenizer_really_parse_entity(Tokenizer*);
static int Tokenizer_parse_entity(Tokenizer*);
static int Tokenizer_parse_comment(Tokenizer*);
static PyObject* Tokenizer_parse(Tokenizer*, Py_ssize_t);
static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*);




/* More structs for creating the Tokenizer type: */ /* More structs for creating the Tokenizer type: */


Loading…
Cancel
Save