@@ -6,6 +6,7 @@ v0.4.1 (unreleased): | |||
- Added support for Python 3.5. | |||
- '<' and '>' are now disallowed in wikilink titles and template names. This | |||
includes when denoting tags, but not comments. | |||
- Heavy refactoring and fixes to the C tokenizer. | |||
- Fixed some bugs in the release scripts. | |||
v0.4 (released May 23, 2015): | |||
@@ -13,6 +13,7 @@ Unreleased | |||
- Added support for Python 3.5. | |||
- ``<`` and ``>`` are now disallowed in wikilink titles and template names. | |||
This includes when denoting tags, but not comments. | |||
- Heavy refactoring and fixes to the C tokenizer. | |||
- Fixed some bugs in the release scripts. | |||
v0.4 | |||
@@ -0,0 +1,40 @@ | |||
/* | |||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
the Software without restriction, including without limitation the rights to | |||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||
of the Software, and to permit persons to whom the Software is furnished to do | |||
so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
*/ | |||
#ifndef PY_SSIZE_T_CLEAN | |||
#define PY_SSIZE_T_CLEAN | |||
#endif | |||
#include <Python.h> | |||
#include <structmember.h> | |||
#include <bytesobject.h> | |||
#if PY_MAJOR_VERSION >= 3 | |||
#define IS_PY3K | |||
#endif | |||
#ifndef uint64_t | |||
#define uint64_t unsigned PY_LONG_LONG | |||
#endif | |||
#define malloc PyObject_Malloc | |||
#define free PyObject_Free |
@@ -0,0 +1,100 @@ | |||
/* | |||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
the Software without restriction, including without limitation the rights to | |||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||
of the Software, and to permit persons to whom the Software is furnished to do | |||
so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
*/ | |||
#include "textbuffer.h" | |||
#define TEXTBUFFER_BLOCKSIZE 1024 | |||
/* | |||
Create a new textbuffer object. | |||
*/ | |||
Textbuffer* Textbuffer_new(void) | |||
{ | |||
Textbuffer* buffer = malloc(sizeof(Textbuffer)); | |||
if (!buffer) { | |||
PyErr_NoMemory(); | |||
return NULL; | |||
} | |||
buffer->size = 0; | |||
buffer->data = malloc(sizeof(Py_UNICODE) * TEXTBUFFER_BLOCKSIZE); | |||
if (!buffer->data) { | |||
free(buffer); | |||
PyErr_NoMemory(); | |||
return NULL; | |||
} | |||
buffer->prev = buffer->next = NULL; | |||
return buffer; | |||
} | |||
/* | |||
Deallocate the given textbuffer. | |||
*/ | |||
void Textbuffer_dealloc(Textbuffer* self) | |||
{ | |||
Textbuffer* next; | |||
while (self) { | |||
free(self->data); | |||
next = self->next; | |||
free(self); | |||
self = next; | |||
} | |||
} | |||
/* | |||
Write a Unicode codepoint to the given textbuffer. | |||
*/ | |||
int Textbuffer_write(Textbuffer** this, Py_UNICODE code) | |||
{ | |||
Textbuffer* self = *this; | |||
if (self->size == TEXTBUFFER_BLOCKSIZE) { | |||
Textbuffer* new = Textbuffer_new(); | |||
if (!new) | |||
return -1; | |||
new->next = self; | |||
self->prev = new; | |||
*this = self = new; | |||
} | |||
self->data[self->size++] = code; | |||
return 0; | |||
} | |||
/* | |||
Return the contents of the textbuffer as a Python Unicode object. | |||
*/ | |||
PyObject* Textbuffer_render(Textbuffer* self) | |||
{ | |||
PyObject *result = PyUnicode_FromUnicode(self->data, self->size); | |||
PyObject *left, *concat; | |||
while (self->next) { | |||
self = self->next; | |||
left = PyUnicode_FromUnicode(self->data, self->size); | |||
concat = PyUnicode_Concat(left, result); | |||
Py_DECREF(left); | |||
Py_DECREF(result); | |||
result = concat; | |||
} | |||
return result; | |||
} |
@@ -0,0 +1,40 @@ | |||
/* | |||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
the Software without restriction, including without limitation the rights to | |||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||
of the Software, and to permit persons to whom the Software is furnished to do | |||
so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
*/ | |||
#include "common.h" | |||
/* Structs */ | |||
struct Textbuffer { | |||
Py_ssize_t size; | |||
Py_UNICODE* data; | |||
struct Textbuffer* prev; | |||
struct Textbuffer* next; | |||
}; | |||
typedef struct Textbuffer Textbuffer; | |||
/* Functions */ | |||
Textbuffer* Textbuffer_new(void); | |||
void Textbuffer_dealloc(Textbuffer*); | |||
int Textbuffer_write(Textbuffer**, Py_UNICODE); | |||
PyObject* Textbuffer_render(Textbuffer*); |
@@ -1,5 +1,4 @@ | |||
/* | |||
Tokenizer for MWParserFromHell | |||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
@@ -89,75 +88,6 @@ static PyObject* strip_tag_name(PyObject* token, int take_attr) | |||
return lowered; | |||
} | |||
static Textbuffer* Textbuffer_new(void) | |||
{ | |||
Textbuffer* buffer = malloc(sizeof(Textbuffer)); | |||
if (!buffer) { | |||
PyErr_NoMemory(); | |||
return NULL; | |||
} | |||
buffer->size = 0; | |||
buffer->data = malloc(sizeof(Py_UNICODE) * TEXTBUFFER_BLOCKSIZE); | |||
if (!buffer->data) { | |||
free(buffer); | |||
PyErr_NoMemory(); | |||
return NULL; | |||
} | |||
buffer->prev = buffer->next = NULL; | |||
return buffer; | |||
} | |||
static void Textbuffer_dealloc(Textbuffer* self) | |||
{ | |||
Textbuffer* next; | |||
while (self) { | |||
free(self->data); | |||
next = self->next; | |||
free(self); | |||
self = next; | |||
} | |||
} | |||
/* | |||
Write a Unicode codepoint to the given textbuffer. | |||
*/ | |||
static int Textbuffer_write(Textbuffer** this, Py_UNICODE code) | |||
{ | |||
Textbuffer* self = *this; | |||
if (self->size == TEXTBUFFER_BLOCKSIZE) { | |||
Textbuffer* new = Textbuffer_new(); | |||
if (!new) | |||
return -1; | |||
new->next = self; | |||
self->prev = new; | |||
*this = self = new; | |||
} | |||
self->data[self->size++] = code; | |||
return 0; | |||
} | |||
/* | |||
Return the contents of the textbuffer as a Python Unicode object. | |||
*/ | |||
static PyObject* Textbuffer_render(Textbuffer* self) | |||
{ | |||
PyObject *result = PyUnicode_FromUnicode(self->data, self->size); | |||
PyObject *left, *concat; | |||
while (self->next) { | |||
self = self->next; | |||
left = PyUnicode_FromUnicode(self->data, self->size); | |||
concat = PyUnicode_Concat(left, result); | |||
Py_DECREF(left); | |||
Py_DECREF(result); | |||
result = concat; | |||
} | |||
return result; | |||
} | |||
static TagData* TagData_new(void) | |||
{ | |||
TagData *self = malloc(sizeof(TagData)); |
@@ -1,5 +1,4 @@ | |||
/* | |||
Tokenizer Header File for MWParserFromHell | |||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
@@ -21,25 +20,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
*/ | |||
#ifndef PY_SSIZE_T_CLEAN | |||
#define PY_SSIZE_T_CLEAN | |||
#endif | |||
#include <Python.h> | |||
#include <math.h> | |||
#include <structmember.h> | |||
#include <bytesobject.h> | |||
#if PY_MAJOR_VERSION >= 3 | |||
#define IS_PY3K | |||
#endif | |||
#ifndef uint64_t | |||
#define uint64_t unsigned PY_LONG_LONG | |||
#endif | |||
#define malloc PyObject_Malloc | |||
#define free PyObject_Free | |||
#include "common.h" | |||
#include "textbuffer.h" | |||
#define DIGITS "0123456789" | |||
#define HEXDIGITS "0123456789abcdefABCDEF" | |||
@@ -50,7 +34,6 @@ static const char MARKERS[] = { | |||
'-', '!', '\n', '\0'}; | |||
#define NUM_MARKERS 19 | |||
#define TEXTBUFFER_BLOCKSIZE 1024 | |||
#define MAX_DEPTH 40 | |||
#define MAX_CYCLES 100000 | |||
#define MAX_BRACES 255 | |||
@@ -196,13 +179,6 @@ static PyObject* TagCloseClose; | |||
/* Miscellaneous structs: */ | |||
struct Textbuffer { | |||
Py_ssize_t size; | |||
Py_UNICODE* data; | |||
struct Textbuffer* prev; | |||
struct Textbuffer* next; | |||
}; | |||
struct Stack { | |||
PyObject* stack; | |||
uint64_t context; | |||
@@ -224,7 +200,6 @@ typedef struct { | |||
Py_ssize_t reset; | |||
} TagData; | |||
typedef struct Textbuffer Textbuffer; | |||
typedef struct Stack Stack; | |||
@@ -268,9 +243,6 @@ typedef struct { | |||
/* Function prototypes: */ | |||
static Textbuffer* Textbuffer_new(void); | |||
static void Textbuffer_dealloc(Textbuffer*); | |||
static TagData* TagData_new(void); | |||
static void TagData_dealloc(TagData*); | |||
@@ -23,6 +23,7 @@ | |||
from __future__ import print_function | |||
from distutils.errors import DistutilsError, CCompilerError | |||
from glob import glob | |||
from os import environ | |||
import sys | |||
@@ -39,10 +40,6 @@ from mwparserfromhell.compat import py26, py3k | |||
with open("README.rst", **({'encoding':'utf-8'} if py3k else {})) as fp: | |||
long_docs = fp.read() | |||
tokenizer = Extension("mwparserfromhell.parser._tokenizer", | |||
sources=["mwparserfromhell/parser/tokenizer.c"], | |||
depends=["mwparserfromhell/parser/tokenizer.h"]) | |||
use_extension = True | |||
fallback = True | |||
@@ -75,6 +72,12 @@ def build_ext_patched(self): | |||
if fallback: | |||
build_ext.run, build_ext_original = build_ext_patched, build_ext.run | |||
# Project-specific part begins here: | |||
tokenizer = Extension("mwparserfromhell.parser._tokenizer", | |||
sources=glob("mwparserfromhell/parser/ctokenizer/*.c"), | |||
depends=glob("mwparserfromhell/parser/ctokenizer/*.h")) | |||
setup( | |||
name = "mwparserfromhell", | |||
packages = find_packages(exclude=("tests",)), | |||