Browse Source

Break at 80 cols for most lines.

tags/v0.4
Ben Kurtovic 9 years ago
parent
commit
7489253e32
2 changed files with 52 additions and 30 deletions
  1. +40
    -24
      mwparserfromhell/parser/tokenizer.c
  2. +12
    -6
      mwparserfromhell/parser/tokenizer.py

+ 40
- 24
mwparserfromhell/parser/tokenizer.c View File

@@ -2456,13 +2456,15 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context)
/*
Emit a table tag.
*/
static int Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_markup,
const char* tag, PyObject* style, PyObject* padding,
const char* close_open_markup, PyObject* contents,
const char* open_close_markup)
static int
Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_markup,
const char* tag, PyObject* style, PyObject* padding,
const char* close_open_markup, PyObject* contents,
const char* open_close_markup)
{
PyObject *open_open_kwargs, *open_open_markup_unicode, *close_open_kwargs, *close_open_markup_unicode,
*open_close_kwargs, *open_close_markup_unicode;
PyObject *open_open_kwargs, *open_open_markup_unicode, *close_open_kwargs,
*close_open_markup_unicode, *open_close_kwargs,
*open_close_markup_unicode;

open_open_kwargs = PyDict_New();
if (!open_open_kwargs)
@@ -2472,7 +2474,8 @@ static int Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_marku
Py_DECREF(open_open_kwargs);
goto fail_decref_all;
}
PyDict_SetItemString(open_open_kwargs, "wiki_markup", open_open_markup_unicode);
PyDict_SetItemString(open_open_kwargs, "wiki_markup",
open_open_markup_unicode);
Py_DECREF(open_open_markup_unicode);
if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_open_kwargs))
goto fail_decref_all;
@@ -2494,7 +2497,8 @@ static int Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_marku
Py_DECREF(close_open_kwargs);
goto fail_decref_padding_contents;
}
PyDict_SetItemString(close_open_kwargs, "wiki_markup", close_open_markup_unicode);
PyDict_SetItemString(close_open_kwargs, "wiki_markup",
close_open_markup_unicode);
Py_DECREF(close_open_markup_unicode);
}
PyDict_SetItemString(close_open_kwargs, "padding", padding);
@@ -2516,7 +2520,8 @@ static int Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_marku
Py_DECREF(open_close_kwargs);
return -1;
}
PyDict_SetItemString(open_close_kwargs, "wiki_markup", open_close_markup_unicode);
PyDict_SetItemString(open_close_kwargs, "wiki_markup",
open_close_markup_unicode);
Py_DECREF(open_close_markup_unicode);
if (Tokenizer_emit_kwargs(self, TagOpenClose, open_close_kwargs))
return -1;
@@ -2538,8 +2543,9 @@ static int Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_marku
/*
Parse until ``end_token`` as style attributes for a table.
*/
static PyObject* Tokenizer_parse_as_table_style(Tokenizer* self, char end_token,
int break_on_table_end)
static PyObject*
Tokenizer_parse_as_table_style(Tokenizer* self, char end_token,
int break_on_table_end)
{
TagData *data = TagData_new();
PyObject *padding, *trash;
@@ -2655,7 +2661,8 @@ static int Tokenizer_handle_table_start(Tokenizer* self)
self->head += 2;
}

if (Tokenizer_emit_table_tag(self, "{|", "table", style, padding, NULL, table, "|}"))
if (Tokenizer_emit_table_tag(self, "{|", "table", style, padding, NULL,
table, "|}"))
return -1;
// offset displacement done by _parse()
self->head--;
@@ -2665,7 +2672,7 @@ static int Tokenizer_handle_table_start(Tokenizer* self)
/*
Return the stack in order to handle the table end.
*/
static PyObject * Tokenizer_handle_table_end(Tokenizer* self)
static PyObject* Tokenizer_handle_table_end(Tokenizer* self)
{
self->head += 2;
return Tokenizer_pop(self);
@@ -2720,7 +2727,8 @@ static int Tokenizer_handle_table_row(Tokenizer* self)
return -1;
}

if (Tokenizer_emit_table_tag(self, "|-", "tr", style, padding, NULL, row, ""))
if (Tokenizer_emit_table_tag(self, "|-", "tr", style, padding, NULL, row,
""))
return -1;
// offset displacement done by _parse()
self->head--;
@@ -2739,8 +2747,9 @@ static PyObject* Tokenizer_handle_table_row_end(Tokenizer* self)
Parse as normal syntax unless we hit a style marker, then parse style
as HTML attributes and the remainder as normal syntax.
*/
static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
const char *tag, uint64_t line_context)
static int
Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
const char *tag, uint64_t line_context)
{
uint64_t old_context = self->topstack->context;
uint64_t cell_context;
@@ -2757,7 +2766,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
return 0;
}

cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | LC_TABLE_CELL_STYLE | line_context, 1);
cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN |
LC_TABLE_CELL_STYLE | line_context, 1);
if (BAD_ROUTE) {
trash = Tokenizer_pop(self);
Py_XDECREF(trash);
@@ -2772,7 +2782,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
if (cell_context & LC_TABLE_CELL_STYLE) {
Py_DECREF(cell);
self->head = reset + strlen(markup);
if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context))
if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN |
line_context))
return -1;
padding = Tokenizer_parse_as_table_style(self, '|', 0);
if (BAD_ROUTE) {
@@ -2790,7 +2801,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
}
// Don't parse the style separator
self->head++;
cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context, 1);
cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN |
line_context, 1);
if (BAD_ROUTE) {
Py_DECREF(padding);
Py_DECREF(style);
@@ -2818,10 +2830,12 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
if (style) {
close_open_markup = "|";
}
if (Tokenizer_emit_table_tag(self, markup, tag, style, padding, close_open_markup, cell, ""))
if (Tokenizer_emit_table_tag(self, markup, tag, style, padding,
close_open_markup, cell, ""))
return -1;
// keep header/cell line contexts
self->topstack->context |= cell_context & (LC_TABLE_TH_LINE | LC_TABLE_TD_LINE);
self->topstack->context |= cell_context & (LC_TABLE_TH_LINE |
LC_TABLE_TD_LINE);
// offset displacement done by parse()
self->head--;
return 0;
@@ -2831,7 +2845,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
Returns the context, stack, and whether to reset the cell for style
in a tuple.
*/
static PyObject* Tokenizer_handle_table_cell_end(Tokenizer* self, int reset_for_style)
static PyObject*
Tokenizer_handle_table_cell_end(Tokenizer* self, int reset_for_style)
{
if (reset_for_style)
self->topstack->context |= LC_TABLE_CELL_STYLE;
@@ -2844,7 +2859,8 @@ static PyObject* Tokenizer_handle_table_cell_end(Tokenizer* self, int reset_for_
Make sure we are not trying to write an invalid character. Return 0 if
everything is safe, or -1 if the route must be failed.
*/
static int Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE data)
static int
Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE data)
{
if (context & LC_FAIL_NEXT)
return -1;
@@ -2895,7 +2911,7 @@ static int Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE d
}
else if (context & LC_FAIL_ON_LBRACE) {
if (data == '{' || (Tokenizer_READ_BACKWARDS(self, 1) == '{' &&
Tokenizer_READ_BACKWARDS(self, 2) == '{')) {
Tokenizer_READ_BACKWARDS(self, 2) == '{')) {
if (context & LC_TEMPLATE)
self->topstack->context |= LC_FAIL_ON_EQUALS;
else


+ 12
- 6
mwparserfromhell/parser/tokenizer.py View File

@@ -1010,7 +1010,8 @@ class Tokenizer(object):
if style:
self._emit_all(style)
if close_open_markup:
self._emit(tokens.TagCloseOpen(wiki_markup=close_open_markup, padding=padding))
self._emit(tokens.TagCloseOpen(wiki_markup=close_open_markup,
padding=padding))
else:
self._emit(tokens.TagCloseOpen(padding=padding))
if contents:
@@ -1130,7 +1131,8 @@ class Tokenizer(object):
return

try:
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context | contexts.TABLE_CELL_STYLE)
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN |
line_context | contexts.TABLE_CELL_STYLE)
except BadRoute:
self._head = reset
self._pop()
@@ -1140,7 +1142,8 @@ class Tokenizer(object):
reset_for_style = cell_context & contexts.TABLE_CELL_STYLE
if reset_for_style:
self._head = reset + len(markup)
self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context)
self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN |
line_context)
try:
padding = self._parse_as_table_style("|")
except BadRoute:
@@ -1151,7 +1154,8 @@ class Tokenizer(object):
# Don't parse the style separator
self._head += 1
try:
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context)
cell = self._parse(contexts.TABLE_OPEN |
contexts.TABLE_CELL_OPEN | line_context)
except BadRoute:
self._head = reset
ret = self._pop()
@@ -1160,9 +1164,11 @@ class Tokenizer(object):
self._context = old_context

close_open_markup = "|" if reset_for_style else None
self._emit_table_tag(markup, tag, style, padding, close_open_markup, cell, "")
self._emit_table_tag(markup, tag, style, padding, close_open_markup,
cell, "")
# keep header/cell line contexts
self._context |= cell_context & (contexts.TABLE_TH_LINE | contexts.TABLE_TD_LINE)
self._context |= cell_context & (contexts.TABLE_TH_LINE |
contexts.TABLE_TD_LINE)
# offset displacement done by parse()
self._head -= 1



Loading…
Cancel
Save