@@ -6,6 +6,7 @@ v0.4.1 (unreleased): | |||||
- Added support for Python 3.5. | - Added support for Python 3.5. | ||||
- '<' and '>' are now disallowed in wikilink titles and template names. This | - '<' and '>' are now disallowed in wikilink titles and template names. This | ||||
includes when denoting tags, but not comments. | includes when denoting tags, but not comments. | ||||
- Heavy refactoring and fixes to the C tokenizer. | |||||
- Fixed some bugs in the release scripts. | - Fixed some bugs in the release scripts. | ||||
v0.4 (released May 23, 2015): | v0.4 (released May 23, 2015): | ||||
@@ -13,6 +13,7 @@ Unreleased | |||||
- Added support for Python 3.5. | - Added support for Python 3.5. | ||||
- ``<`` and ``>`` are now disallowed in wikilink titles and template names. | - ``<`` and ``>`` are now disallowed in wikilink titles and template names. | ||||
This includes when denoting tags, but not comments. | This includes when denoting tags, but not comments. | ||||
- Heavy refactoring and fixes to the C tokenizer. | |||||
- Fixed some bugs in the release scripts. | - Fixed some bugs in the release scripts. | ||||
v0.4 | v0.4 | ||||
@@ -0,0 +1,40 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#ifndef PY_SSIZE_T_CLEAN | |||||
#define PY_SSIZE_T_CLEAN | |||||
#endif | |||||
#include <Python.h> | |||||
#include <structmember.h> | |||||
#include <bytesobject.h> | |||||
#if PY_MAJOR_VERSION >= 3 | |||||
#define IS_PY3K | |||||
#endif | |||||
#ifndef uint64_t | |||||
#define uint64_t unsigned PY_LONG_LONG | |||||
#endif | |||||
#define malloc PyObject_Malloc | |||||
#define free PyObject_Free |
@@ -0,0 +1,100 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#include "textbuffer.h" | |||||
#define TEXTBUFFER_BLOCKSIZE 1024 | |||||
/* | |||||
Create a new textbuffer object. | |||||
*/ | |||||
Textbuffer* Textbuffer_new(void) | |||||
{ | |||||
Textbuffer* buffer = malloc(sizeof(Textbuffer)); | |||||
if (!buffer) { | |||||
PyErr_NoMemory(); | |||||
return NULL; | |||||
} | |||||
buffer->size = 0; | |||||
buffer->data = malloc(sizeof(Py_UNICODE) * TEXTBUFFER_BLOCKSIZE); | |||||
if (!buffer->data) { | |||||
free(buffer); | |||||
PyErr_NoMemory(); | |||||
return NULL; | |||||
} | |||||
buffer->prev = buffer->next = NULL; | |||||
return buffer; | |||||
} | |||||
/* | |||||
Deallocate the given textbuffer. | |||||
*/ | |||||
void Textbuffer_dealloc(Textbuffer* self) | |||||
{ | |||||
Textbuffer* next; | |||||
while (self) { | |||||
free(self->data); | |||||
next = self->next; | |||||
free(self); | |||||
self = next; | |||||
} | |||||
} | |||||
/* | |||||
Write a Unicode codepoint to the given textbuffer. | |||||
*/ | |||||
int Textbuffer_write(Textbuffer** this, Py_UNICODE code) | |||||
{ | |||||
Textbuffer* self = *this; | |||||
if (self->size == TEXTBUFFER_BLOCKSIZE) { | |||||
Textbuffer* new = Textbuffer_new(); | |||||
if (!new) | |||||
return -1; | |||||
new->next = self; | |||||
self->prev = new; | |||||
*this = self = new; | |||||
} | |||||
self->data[self->size++] = code; | |||||
return 0; | |||||
} | |||||
/* | |||||
Return the contents of the textbuffer as a Python Unicode object. | |||||
*/ | |||||
PyObject* Textbuffer_render(Textbuffer* self) | |||||
{ | |||||
PyObject *result = PyUnicode_FromUnicode(self->data, self->size); | |||||
PyObject *left, *concat; | |||||
while (self->next) { | |||||
self = self->next; | |||||
left = PyUnicode_FromUnicode(self->data, self->size); | |||||
concat = PyUnicode_Concat(left, result); | |||||
Py_DECREF(left); | |||||
Py_DECREF(result); | |||||
result = concat; | |||||
} | |||||
return result; | |||||
} |
@@ -0,0 +1,40 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#include "common.h" | |||||
/* Structs */ | |||||
struct Textbuffer { | |||||
Py_ssize_t size; | |||||
Py_UNICODE* data; | |||||
struct Textbuffer* prev; | |||||
struct Textbuffer* next; | |||||
}; | |||||
typedef struct Textbuffer Textbuffer; | |||||
/* Functions */ | |||||
Textbuffer* Textbuffer_new(void); | |||||
void Textbuffer_dealloc(Textbuffer*); | |||||
int Textbuffer_write(Textbuffer**, Py_UNICODE); | |||||
PyObject* Textbuffer_render(Textbuffer*); |
@@ -1,5 +1,4 @@ | |||||
/* | /* | ||||
Tokenizer for MWParserFromHell | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | ||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||
@@ -89,75 +88,6 @@ static PyObject* strip_tag_name(PyObject* token, int take_attr) | |||||
return lowered; | return lowered; | ||||
} | } | ||||
static Textbuffer* Textbuffer_new(void) | |||||
{ | |||||
Textbuffer* buffer = malloc(sizeof(Textbuffer)); | |||||
if (!buffer) { | |||||
PyErr_NoMemory(); | |||||
return NULL; | |||||
} | |||||
buffer->size = 0; | |||||
buffer->data = malloc(sizeof(Py_UNICODE) * TEXTBUFFER_BLOCKSIZE); | |||||
if (!buffer->data) { | |||||
free(buffer); | |||||
PyErr_NoMemory(); | |||||
return NULL; | |||||
} | |||||
buffer->prev = buffer->next = NULL; | |||||
return buffer; | |||||
} | |||||
static void Textbuffer_dealloc(Textbuffer* self) | |||||
{ | |||||
Textbuffer* next; | |||||
while (self) { | |||||
free(self->data); | |||||
next = self->next; | |||||
free(self); | |||||
self = next; | |||||
} | |||||
} | |||||
/* | |||||
Write a Unicode codepoint to the given textbuffer. | |||||
*/ | |||||
static int Textbuffer_write(Textbuffer** this, Py_UNICODE code) | |||||
{ | |||||
Textbuffer* self = *this; | |||||
if (self->size == TEXTBUFFER_BLOCKSIZE) { | |||||
Textbuffer* new = Textbuffer_new(); | |||||
if (!new) | |||||
return -1; | |||||
new->next = self; | |||||
self->prev = new; | |||||
*this = self = new; | |||||
} | |||||
self->data[self->size++] = code; | |||||
return 0; | |||||
} | |||||
/* | |||||
Return the contents of the textbuffer as a Python Unicode object. | |||||
*/ | |||||
static PyObject* Textbuffer_render(Textbuffer* self) | |||||
{ | |||||
PyObject *result = PyUnicode_FromUnicode(self->data, self->size); | |||||
PyObject *left, *concat; | |||||
while (self->next) { | |||||
self = self->next; | |||||
left = PyUnicode_FromUnicode(self->data, self->size); | |||||
concat = PyUnicode_Concat(left, result); | |||||
Py_DECREF(left); | |||||
Py_DECREF(result); | |||||
result = concat; | |||||
} | |||||
return result; | |||||
} | |||||
static TagData* TagData_new(void) | static TagData* TagData_new(void) | ||||
{ | { | ||||
TagData *self = malloc(sizeof(TagData)); | TagData *self = malloc(sizeof(TagData)); |
@@ -1,5 +1,4 @@ | |||||
/* | /* | ||||
Tokenizer Header File for MWParserFromHell | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | ||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||
@@ -21,25 +20,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | SOFTWARE. | ||||
*/ | */ | ||||
#ifndef PY_SSIZE_T_CLEAN | |||||
#define PY_SSIZE_T_CLEAN | |||||
#endif | |||||
#include <Python.h> | |||||
#include <math.h> | #include <math.h> | ||||
#include <structmember.h> | |||||
#include <bytesobject.h> | |||||
#if PY_MAJOR_VERSION >= 3 | |||||
#define IS_PY3K | |||||
#endif | |||||
#ifndef uint64_t | |||||
#define uint64_t unsigned PY_LONG_LONG | |||||
#endif | |||||
#define malloc PyObject_Malloc | |||||
#define free PyObject_Free | |||||
#include "common.h" | |||||
#include "textbuffer.h" | |||||
#define DIGITS "0123456789" | #define DIGITS "0123456789" | ||||
#define HEXDIGITS "0123456789abcdefABCDEF" | #define HEXDIGITS "0123456789abcdefABCDEF" | ||||
@@ -50,7 +34,6 @@ static const char MARKERS[] = { | |||||
'-', '!', '\n', '\0'}; | '-', '!', '\n', '\0'}; | ||||
#define NUM_MARKERS 19 | #define NUM_MARKERS 19 | ||||
#define TEXTBUFFER_BLOCKSIZE 1024 | |||||
#define MAX_DEPTH 40 | #define MAX_DEPTH 40 | ||||
#define MAX_CYCLES 100000 | #define MAX_CYCLES 100000 | ||||
#define MAX_BRACES 255 | #define MAX_BRACES 255 | ||||
@@ -196,13 +179,6 @@ static PyObject* TagCloseClose; | |||||
/* Miscellaneous structs: */ | /* Miscellaneous structs: */ | ||||
struct Textbuffer { | |||||
Py_ssize_t size; | |||||
Py_UNICODE* data; | |||||
struct Textbuffer* prev; | |||||
struct Textbuffer* next; | |||||
}; | |||||
struct Stack { | struct Stack { | ||||
PyObject* stack; | PyObject* stack; | ||||
uint64_t context; | uint64_t context; | ||||
@@ -224,7 +200,6 @@ typedef struct { | |||||
Py_ssize_t reset; | Py_ssize_t reset; | ||||
} TagData; | } TagData; | ||||
typedef struct Textbuffer Textbuffer; | |||||
typedef struct Stack Stack; | typedef struct Stack Stack; | ||||
@@ -268,9 +243,6 @@ typedef struct { | |||||
/* Function prototypes: */ | /* Function prototypes: */ | ||||
static Textbuffer* Textbuffer_new(void); | |||||
static void Textbuffer_dealloc(Textbuffer*); | |||||
static TagData* TagData_new(void); | static TagData* TagData_new(void); | ||||
static void TagData_dealloc(TagData*); | static void TagData_dealloc(TagData*); | ||||
@@ -23,6 +23,7 @@ | |||||
from __future__ import print_function | from __future__ import print_function | ||||
from distutils.errors import DistutilsError, CCompilerError | from distutils.errors import DistutilsError, CCompilerError | ||||
from glob import glob | |||||
from os import environ | from os import environ | ||||
import sys | import sys | ||||
@@ -39,10 +40,6 @@ from mwparserfromhell.compat import py26, py3k | |||||
with open("README.rst", **({'encoding':'utf-8'} if py3k else {})) as fp: | with open("README.rst", **({'encoding':'utf-8'} if py3k else {})) as fp: | ||||
long_docs = fp.read() | long_docs = fp.read() | ||||
tokenizer = Extension("mwparserfromhell.parser._tokenizer", | |||||
sources=["mwparserfromhell/parser/tokenizer.c"], | |||||
depends=["mwparserfromhell/parser/tokenizer.h"]) | |||||
use_extension = True | use_extension = True | ||||
fallback = True | fallback = True | ||||
@@ -75,6 +72,12 @@ def build_ext_patched(self): | |||||
if fallback: | if fallback: | ||||
build_ext.run, build_ext_original = build_ext_patched, build_ext.run | build_ext.run, build_ext_original = build_ext_patched, build_ext.run | ||||
# Project-specific part begins here: | |||||
tokenizer = Extension("mwparserfromhell.parser._tokenizer", | |||||
sources=glob("mwparserfromhell/parser/ctokenizer/*.c"), | |||||
depends=glob("mwparserfromhell/parser/ctokenizer/*.h")) | |||||
setup( | setup( | ||||
name = "mwparserfromhell", | name = "mwparserfromhell", | ||||
packages = find_packages(exclude=("tests",)), | packages = find_packages(exclude=("tests",)), | ||||