@@ -8,9 +8,17 @@ v0.4.1 (unreleased): | |||||
includes when denoting tags, but not comments. | includes when denoting tags, but not comments. | ||||
- Fixed the behavior of preserve_spacing in Template.add() and keep_field in | - Fixed the behavior of preserve_spacing in Template.add() and keep_field in | ||||
Template.remove() on parameters with hidden keys. | Template.remove() on parameters with hidden keys. | ||||
- Removed _ListProxy.detach(). SmartLists now use weak references and their | |||||
children are garbage-collected properly. | |||||
- Fixed parser bugs involving: | - Fixed parser bugs involving: | ||||
- templates with completely blank names; | - templates with completely blank names; | ||||
- templates with newlines and comments. | - templates with newlines and comments. | ||||
- Heavy refactoring and fixes to the C tokenizer, including: | |||||
- corrected a design flaw in text handling, allowing for substantial speed | |||||
improvements when parsing long strings of plain text; | |||||
- implemented new Python 3.3 PEP 393 Unicode APIs. | |||||
- Fixed various bugs in SmartList, including one that was causing memory issues | |||||
on 64-bit builds of Python 2 on Windows. | |||||
- Fixed some bugs in the release scripts. | - Fixed some bugs in the release scripts. | ||||
v0.4 (released May 23, 2015): | v0.4 (released May 23, 2015): | ||||
@@ -0,0 +1,64 @@ | |||||
# This config file is used by appveyor.com to build Windows release binaries | |||||
version: 0.4.1.dev0-b{build} | |||||
branches: | |||||
only: | |||||
- master | |||||
skip_tags: true | |||||
environment: | |||||
global: | |||||
# See: http://stackoverflow.com/a/13751649/163740 | |||||
WRAPPER: "cmd /E:ON /V:ON /C .\\scripts\\win_wrapper.cmd" | |||||
PIP: "%WRAPPER% %PYTHON%\\Scripts\\pip.exe" | |||||
SETUPPY: "%WRAPPER% %PYTHON%\\python setup.py --with-extension" | |||||
PYPI_USERNAME: "earwigbot" | |||||
PYPI_PASSWORD: | |||||
secure: gOIcvPxSC2ujuhwOzwj3v8xjq3CCYd8keFWVnguLM+gcL0e02qshDHy7gwZZwj0+ | |||||
matrix: | |||||
- PYTHON: "C:\\Python27" | |||||
PYTHON_VERSION: "2.7" | |||||
PYTHON_ARCH: "32" | |||||
- PYTHON: "C:\\Python27-x64" | |||||
PYTHON_VERSION: "2.7" | |||||
PYTHON_ARCH: "64" | |||||
- PYTHON: "C:\\Python33" | |||||
PYTHON_VERSION: "3.3" | |||||
PYTHON_ARCH: "32" | |||||
- PYTHON: "C:\\Python33-x64" | |||||
PYTHON_VERSION: "3.3" | |||||
PYTHON_ARCH: "64" | |||||
- PYTHON: "C:\\Python34" | |||||
PYTHON_VERSION: "3.4" | |||||
PYTHON_ARCH: "32" | |||||
- PYTHON: "C:\\Python34-x64" | |||||
PYTHON_VERSION: "3.4" | |||||
PYTHON_ARCH: "64" | |||||
install: | |||||
- "%PIP% install wheel twine" | |||||
build_script: | |||||
- "%SETUPPY% build" | |||||
test_script: | |||||
- "%SETUPPY% -q test" | |||||
after_test: | |||||
- "%SETUPPY% bdist_wheel" | |||||
on_success: | |||||
- "twine upload dist\\* -u %PYPI_USERNAME% -p %PYPI_PASSWORD%" | |||||
artifacts: | |||||
- path: dist\* | |||||
deploy: off |
@@ -13,13 +13,24 @@ Unreleased | |||||
- Added support for Python 3.5. | - Added support for Python 3.5. | ||||
- ``<`` and ``>`` are now disallowed in wikilink titles and template names. | - ``<`` and ``>`` are now disallowed in wikilink titles and template names. | ||||
This includes when denoting tags, but not comments. | This includes when denoting tags, but not comments. | ||||
- Fixed the behavior of *preserve_spacing* in :func:`~.Template.add` and | |||||
*keep_field* in :func:`~.Template.remove` on parameters with hidden keys. | |||||
- Fixed the behavior of *preserve_spacing* in :meth:`.Template.add` and | |||||
*keep_field* in :meth:`.Template.remove` on parameters with hidden keys. | |||||
- Removed :meth:`._ListProxy.detach`. :class:`.SmartList`\ s now use weak | |||||
references and their children are garbage-collected properly. | |||||
- Fixed parser bugs involving: | - Fixed parser bugs involving: | ||||
- templates with completely blank names; | - templates with completely blank names; | ||||
- templates with newlines and comments. | - templates with newlines and comments. | ||||
- Heavy refactoring and fixes to the C tokenizer, including: | |||||
- corrected a design flaw in text handling, allowing for substantial speed | |||||
improvements when parsing long strings of plain text; | |||||
- implemented new Python 3.3 | |||||
`PEP 393 <https://www.python.org/dev/peps/pep-0393/>`_ Unicode APIs. | |||||
- Fixed various bugs in :class:`.SmartList`, including one that was causing | |||||
memory issues on 64-bit builds of Python 2 on Windows. | |||||
- Fixed some bugs in the release scripts. | - Fixed some bugs in the release scripts. | ||||
v0.4 | v0.4 | ||||
@@ -18,14 +18,12 @@ if py3k: | |||||
bytes = bytes | bytes = bytes | ||||
str = str | str = str | ||||
range = range | range = range | ||||
maxsize = sys.maxsize | |||||
import html.entities as htmlentities | import html.entities as htmlentities | ||||
else: | else: | ||||
bytes = str | bytes = str | ||||
str = unicode | str = unicode | ||||
range = xrange | range = xrange | ||||
maxsize = sys.maxint | |||||
import htmlentitydefs as htmlentities | import htmlentitydefs as htmlentities | ||||
del sys | del sys |
@@ -81,10 +81,8 @@ def is_single_only(tag): | |||||
"""Return whether or not the given *tag* must exist without a close tag.""" | """Return whether or not the given *tag* must exist without a close tag.""" | ||||
return tag.lower() in SINGLE_ONLY | return tag.lower() in SINGLE_ONLY | ||||
def is_scheme(scheme, slashes=True, reverse=False): | |||||
def is_scheme(scheme, slashes=True): | |||||
"""Return whether *scheme* is valid for external links.""" | """Return whether *scheme* is valid for external links.""" | ||||
if reverse: # Convenience for C | |||||
scheme = scheme[::-1] | |||||
scheme = scheme.lower() | scheme = scheme.lower() | ||||
if slashes: | if slashes: | ||||
return scheme in URI_SCHEMES | return scheme in URI_SCHEMES | ||||
@@ -0,0 +1,125 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#pragma once | |||||
#ifndef PY_SSIZE_T_CLEAN | |||||
#define PY_SSIZE_T_CLEAN // See: https://docs.python.org/2/c-api/arg.html | |||||
#endif | |||||
#include <Python.h> | |||||
#include <structmember.h> | |||||
#include <bytesobject.h> | |||||
/* Compatibility macros */ | |||||
#if PY_MAJOR_VERSION >= 3 | |||||
#define IS_PY3K | |||||
#endif | |||||
#ifndef uint64_t | |||||
#define uint64_t unsigned PY_LONG_LONG | |||||
#endif | |||||
#define malloc PyObject_Malloc // XXX: yuck | |||||
#define realloc PyObject_Realloc | |||||
#define free PyObject_Free | |||||
/* Unicode support macros */ | |||||
#if defined(IS_PY3K) && PY_MINOR_VERSION >= 3 | |||||
#define PEP_393 | |||||
#endif | |||||
#ifdef PEP_393 | |||||
#define Unicode Py_UCS4 | |||||
#define PyUnicode_FROM_SINGLE(chr) \ | |||||
PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &(chr), 1) | |||||
#else | |||||
#define Unicode Py_UNICODE | |||||
#define PyUnicode_FROM_SINGLE(chr) \ | |||||
PyUnicode_FromUnicode(&(chr), 1) | |||||
#define PyUnicode_GET_LENGTH PyUnicode_GET_SIZE | |||||
#endif | |||||
/* Error handling macros */ | |||||
#define BAD_ROUTE self->route_state | |||||
#define BAD_ROUTE_CONTEXT self->route_context | |||||
#define FAIL_ROUTE(context) { \ | |||||
self->route_state = 1; \ | |||||
self->route_context = context; \ | |||||
} | |||||
#define RESET_ROUTE() self->route_state = 0 | |||||
/* Shared globals */ | |||||
extern char** entitydefs; | |||||
extern PyObject* NOARGS; | |||||
extern PyObject* definitions; | |||||
/* Structs */ | |||||
typedef struct { | |||||
Py_ssize_t capacity; | |||||
Py_ssize_t length; | |||||
#ifdef PEP_393 | |||||
PyObject* object; | |||||
int kind; | |||||
void* data; | |||||
#else | |||||
Py_UNICODE* data; | |||||
#endif | |||||
} Textbuffer; | |||||
struct Stack { | |||||
PyObject* stack; | |||||
uint64_t context; | |||||
Textbuffer* textbuffer; | |||||
struct Stack* next; | |||||
}; | |||||
typedef struct Stack Stack; | |||||
typedef struct { | |||||
PyObject* object; /* base PyUnicodeObject object */ | |||||
Py_ssize_t length; /* length of object, in code points */ | |||||
#ifdef PEP_393 | |||||
int kind; /* object's kind value */ | |||||
void* data; /* object's raw unicode buffer */ | |||||
#else | |||||
Py_UNICODE* buf; /* object's internal buffer */ | |||||
#endif | |||||
} TokenizerInput; | |||||
typedef struct { | |||||
PyObject_HEAD | |||||
TokenizerInput text; /* text to tokenize */ | |||||
Stack* topstack; /* topmost stack */ | |||||
Py_ssize_t head; /* current position in text */ | |||||
int global; /* global context */ | |||||
int depth; /* stack recursion depth */ | |||||
int cycles; /* total number of stack recursions */ | |||||
int route_state; /* whether a BadRoute has been triggered */ | |||||
uint64_t route_context; /* context when the last BadRoute was triggered */ | |||||
int skip_style_tags; /* temp fix for the sometimes broken tag parser */ | |||||
} Tokenizer; |
@@ -0,0 +1,105 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#pragma once | |||||
/* Local contexts */ | |||||
#define LC_TEMPLATE 0x0000000000000007 | |||||
#define LC_TEMPLATE_NAME 0x0000000000000001 | |||||
#define LC_TEMPLATE_PARAM_KEY 0x0000000000000002 | |||||
#define LC_TEMPLATE_PARAM_VALUE 0x0000000000000004 | |||||
#define LC_ARGUMENT 0x0000000000000018 | |||||
#define LC_ARGUMENT_NAME 0x0000000000000008 | |||||
#define LC_ARGUMENT_DEFAULT 0x0000000000000010 | |||||
#define LC_WIKILINK 0x0000000000000060 | |||||
#define LC_WIKILINK_TITLE 0x0000000000000020 | |||||
#define LC_WIKILINK_TEXT 0x0000000000000040 | |||||
#define LC_EXT_LINK 0x0000000000000180 | |||||
#define LC_EXT_LINK_URI 0x0000000000000080 | |||||
#define LC_EXT_LINK_TITLE 0x0000000000000100 | |||||
#define LC_HEADING 0x0000000000007E00 | |||||
#define LC_HEADING_LEVEL_1 0x0000000000000200 | |||||
#define LC_HEADING_LEVEL_2 0x0000000000000400 | |||||
#define LC_HEADING_LEVEL_3 0x0000000000000800 | |||||
#define LC_HEADING_LEVEL_4 0x0000000000001000 | |||||
#define LC_HEADING_LEVEL_5 0x0000000000002000 | |||||
#define LC_HEADING_LEVEL_6 0x0000000000004000 | |||||
#define LC_TAG 0x0000000000078000 | |||||
#define LC_TAG_OPEN 0x0000000000008000 | |||||
#define LC_TAG_ATTR 0x0000000000010000 | |||||
#define LC_TAG_BODY 0x0000000000020000 | |||||
#define LC_TAG_CLOSE 0x0000000000040000 | |||||
#define LC_STYLE 0x0000000000780000 | |||||
#define LC_STYLE_ITALICS 0x0000000000080000 | |||||
#define LC_STYLE_BOLD 0x0000000000100000 | |||||
#define LC_STYLE_PASS_AGAIN 0x0000000000200000 | |||||
#define LC_STYLE_SECOND_PASS 0x0000000000400000 | |||||
#define LC_DLTERM 0x0000000000800000 | |||||
#define LC_SAFETY_CHECK 0x000000007F000000 | |||||
#define LC_HAS_TEXT 0x0000000001000000 | |||||
#define LC_FAIL_ON_TEXT 0x0000000002000000 | |||||
#define LC_FAIL_NEXT 0x0000000004000000 | |||||
#define LC_FAIL_ON_LBRACE 0x0000000008000000 | |||||
#define LC_FAIL_ON_RBRACE 0x0000000010000000 | |||||
#define LC_FAIL_ON_EQUALS 0x0000000020000000 | |||||
#define LC_HAS_TEMPLATE 0x0000000040000000 | |||||
#define LC_TABLE 0x0000001F80000000 | |||||
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000001A00000000 | |||||
#define LC_TABLE_OPEN 0x0000000080000000 | |||||
#define LC_TABLE_CELL_OPEN 0x0000000100000000 | |||||
#define LC_TABLE_CELL_STYLE 0x0000000200000000 | |||||
#define LC_TABLE_ROW_OPEN 0x0000000400000000 | |||||
#define LC_TABLE_TD_LINE 0x0000000800000000 | |||||
#define LC_TABLE_TH_LINE 0x0000001000000000 | |||||
/* Global contexts */ | |||||
#define GL_HEADING 0x1 | |||||
/* Aggregate contexts */ | |||||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN) | |||||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN) | |||||
#define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | |||||
#define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | |||||
/* Tag contexts */ | |||||
#define TAG_NAME 0x01 | |||||
#define TAG_ATTR_READY 0x02 | |||||
#define TAG_ATTR_NAME 0x04 | |||||
#define TAG_ATTR_VALUE 0x08 | |||||
#define TAG_QUOTED 0x10 | |||||
#define TAG_NOTE_SPACE 0x20 | |||||
#define TAG_NOTE_EQUALS 0x40 | |||||
#define TAG_NOTE_QUOTE 0x80 |
@@ -0,0 +1,78 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#include "tag_data.h" | |||||
#include "contexts.h" | |||||
/* | |||||
Initialize a new TagData object. | |||||
*/ | |||||
TagData* TagData_new(TokenizerInput* text) | |||||
{ | |||||
#define ALLOC_BUFFER(name) \ | |||||
name = Textbuffer_new(text); \ | |||||
if (!name) { \ | |||||
TagData_dealloc(self); \ | |||||
return NULL; \ | |||||
} | |||||
TagData *self = malloc(sizeof(TagData)); | |||||
if (!self) { | |||||
PyErr_NoMemory(); | |||||
return NULL; | |||||
} | |||||
self->context = TAG_NAME; | |||||
ALLOC_BUFFER(self->pad_first) | |||||
ALLOC_BUFFER(self->pad_before_eq) | |||||
ALLOC_BUFFER(self->pad_after_eq) | |||||
self->quoter = 0; | |||||
self->reset = 0; | |||||
return self; | |||||
#undef ALLOC_BUFFER | |||||
} | |||||
/* | |||||
Deallocate the given TagData object. | |||||
*/ | |||||
void TagData_dealloc(TagData* self) | |||||
{ | |||||
if (self->pad_first) | |||||
Textbuffer_dealloc(self->pad_first); | |||||
if (self->pad_before_eq) | |||||
Textbuffer_dealloc(self->pad_before_eq); | |||||
if (self->pad_after_eq) | |||||
Textbuffer_dealloc(self->pad_after_eq); | |||||
free(self); | |||||
} | |||||
/* | |||||
Clear the internal buffers of the given TagData object. | |||||
*/ | |||||
int TagData_reset_buffers(TagData* self) | |||||
{ | |||||
if (Textbuffer_reset(self->pad_first) || | |||||
Textbuffer_reset(self->pad_before_eq) || | |||||
Textbuffer_reset(self->pad_after_eq)) | |||||
return -1; | |||||
return 0; | |||||
} |
@@ -0,0 +1,43 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#pragma once | |||||
#include "common.h" | |||||
#include "textbuffer.h" | |||||
/* Structs */ | |||||
typedef struct { | |||||
uint64_t context; | |||||
Textbuffer* pad_first; | |||||
Textbuffer* pad_before_eq; | |||||
Textbuffer* pad_after_eq; | |||||
Unicode quoter; | |||||
Py_ssize_t reset; | |||||
} TagData; | |||||
/* Functions */ | |||||
TagData* TagData_new(TokenizerInput*); | |||||
void TagData_dealloc(TagData*); | |||||
int TagData_reset_buffers(TagData*); |
@@ -0,0 +1,232 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#include "textbuffer.h" | |||||
#define INITIAL_CAPACITY 32 | |||||
#define RESIZE_FACTOR 2 | |||||
#define CONCAT_EXTRA 32 | |||||
/* | |||||
Internal allocation function for textbuffers. | |||||
*/ | |||||
static int internal_alloc(Textbuffer* self, Unicode maxchar) | |||||
{ | |||||
self->capacity = INITIAL_CAPACITY; | |||||
self->length = 0; | |||||
#ifdef PEP_393 | |||||
self->object = PyUnicode_New(self->capacity, maxchar); | |||||
if (!self->object) | |||||
return -1; | |||||
self->kind = PyUnicode_KIND(self->object); | |||||
self->data = PyUnicode_DATA(self->object); | |||||
#else | |||||
(void) maxchar; // Unused | |||||
self->data = malloc(sizeof(Unicode) * self->capacity); | |||||
if (!self->data) | |||||
return -1; | |||||
#endif | |||||
return 0; | |||||
} | |||||
/* | |||||
Internal deallocation function for textbuffers. | |||||
*/ | |||||
static void internal_dealloc(Textbuffer* self) | |||||
{ | |||||
#ifdef PEP_393 | |||||
Py_DECREF(self->object); | |||||
#else | |||||
free(self->data); | |||||
#endif | |||||
} | |||||
/* | |||||
Internal resize function. | |||||
*/ | |||||
static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) | |||||
{ | |||||
#ifdef PEP_393 | |||||
PyObject *newobj; | |||||
void *newdata; | |||||
newobj = PyUnicode_New(new_cap, PyUnicode_MAX_CHAR_VALUE(self->object)); | |||||
if (!newobj) | |||||
return -1; | |||||
newdata = PyUnicode_DATA(newobj); | |||||
memcpy(newdata, self->data, self->length * self->kind); | |||||
Py_DECREF(self->object); | |||||
self->object = newobj; | |||||
self->data = newdata; | |||||
#else | |||||
if (!(self->data = realloc(self->data, sizeof(Unicode) * new_cap))) | |||||
return -1; | |||||
#endif | |||||
self->capacity = new_cap; | |||||
return 0; | |||||
} | |||||
/* | |||||
Create a new textbuffer object. | |||||
*/ | |||||
Textbuffer* Textbuffer_new(TokenizerInput* text) | |||||
{ | |||||
Textbuffer* self = malloc(sizeof(Textbuffer)); | |||||
Unicode maxchar = 0; | |||||
#ifdef PEP_393 | |||||
maxchar = PyUnicode_MAX_CHAR_VALUE(text->object); | |||||
#endif | |||||
if (!self) | |||||
goto fail_nomem; | |||||
if (internal_alloc(self, maxchar) < 0) | |||||
goto fail_dealloc; | |||||
return self; | |||||
fail_dealloc: | |||||
free(self); | |||||
fail_nomem: | |||||
PyErr_NoMemory(); | |||||
return NULL; | |||||
} | |||||
/* | |||||
Deallocate the given textbuffer. | |||||
*/ | |||||
void Textbuffer_dealloc(Textbuffer* self) | |||||
{ | |||||
internal_dealloc(self); | |||||
free(self); | |||||
} | |||||
/* | |||||
Reset a textbuffer to its initial, empty state. | |||||
*/ | |||||
int Textbuffer_reset(Textbuffer* self) | |||||
{ | |||||
Unicode maxchar = 0; | |||||
#ifdef PEP_393 | |||||
maxchar = PyUnicode_MAX_CHAR_VALUE(self->object); | |||||
#endif | |||||
internal_dealloc(self); | |||||
if (internal_alloc(self, maxchar)) | |||||
return -1; | |||||
return 0; | |||||
} | |||||
/* | |||||
Write a Unicode codepoint to the given textbuffer. | |||||
*/ | |||||
int Textbuffer_write(Textbuffer* self, Unicode code) | |||||
{ | |||||
if (self->length >= self->capacity) { | |||||
if (internal_resize(self, self->capacity * RESIZE_FACTOR) < 0) | |||||
return -1; | |||||
} | |||||
#ifdef PEP_393 | |||||
PyUnicode_WRITE(self->kind, self->data, self->length++, code); | |||||
#else | |||||
self->data[self->length++] = code; | |||||
#endif | |||||
return 0; | |||||
} | |||||
/* | |||||
Read a Unicode codepoint from the given index of the given textbuffer. | |||||
This function does not check for bounds. | |||||
*/ | |||||
Unicode Textbuffer_read(Textbuffer* self, Py_ssize_t index) | |||||
{ | |||||
#ifdef PEP_393 | |||||
return PyUnicode_READ(self->kind, self->data, index); | |||||
#else | |||||
return self->data[index]; | |||||
#endif | |||||
} | |||||
/* | |||||
Return the contents of the textbuffer as a Python Unicode object. | |||||
*/ | |||||
PyObject* Textbuffer_render(Textbuffer* self) | |||||
{ | |||||
#ifdef PEP_393 | |||||
return PyUnicode_FromKindAndData(self->kind, self->data, self->length); | |||||
#else | |||||
return PyUnicode_FromUnicode(self->data, self->length); | |||||
#endif | |||||
} | |||||
/* | |||||
Concatenate the 'other' textbuffer onto the end of the given textbuffer. | |||||
*/ | |||||
int Textbuffer_concat(Textbuffer* self, Textbuffer* other) | |||||
{ | |||||
Py_ssize_t newlen = self->length + other->length; | |||||
if (newlen > self->capacity) { | |||||
if (internal_resize(self, newlen + CONCAT_EXTRA) < 0) | |||||
return -1; | |||||
} | |||||
#ifdef PEP_393 | |||||
assert(self->kind == other->kind); | |||||
memcpy(((Py_UCS1*) self->data) + self->kind * self->length, other->data, | |||||
other->length * other->kind); | |||||
#else | |||||
memcpy(self->data + self->length, other->data, | |||||
other->length * sizeof(Unicode)); | |||||
#endif | |||||
self->length = newlen; | |||||
return 0; | |||||
} | |||||
/* | |||||
Reverse the contents of the given textbuffer. | |||||
*/ | |||||
void Textbuffer_reverse(Textbuffer* self) | |||||
{ | |||||
Py_ssize_t i, end = self->length - 1; | |||||
Unicode tmp; | |||||
for (i = 0; i < self->length / 2; i++) { | |||||
#ifdef PEP_393 | |||||
tmp = PyUnicode_READ(self->kind, self->data, i); | |||||
PyUnicode_WRITE(self->kind, self->data, i, | |||||
PyUnicode_READ(self->kind, self->data, end - i)); | |||||
PyUnicode_WRITE(self->kind, self->data, end - i, tmp); | |||||
#else | |||||
tmp = self->data[i]; | |||||
self->data[i] = self->data[end - i]; | |||||
self->data[end - i] = tmp; | |||||
#endif | |||||
} | |||||
} |
@@ -0,0 +1,36 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#pragma once | |||||
#include "common.h" | |||||
/* Functions */ | |||||
Textbuffer* Textbuffer_new(TokenizerInput*); | |||||
void Textbuffer_dealloc(Textbuffer*); | |||||
int Textbuffer_reset(Textbuffer*); | |||||
int Textbuffer_write(Textbuffer*, Unicode); | |||||
Unicode Textbuffer_read(Textbuffer*, Py_ssize_t); | |||||
PyObject* Textbuffer_render(Textbuffer*); | |||||
int Textbuffer_concat(Textbuffer*, Textbuffer*); | |||||
void Textbuffer_reverse(Textbuffer*); |
@@ -0,0 +1,35 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#pragma once | |||||
#include "common.h" | |||||
static const char MARKERS[] = { | |||||
'{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/', | |||||
'-', '!', '\n', '\0'}; | |||||
#define NUM_MARKERS 19 | |||||
/* Functions */ | |||||
PyObject* Tokenizer_parse(Tokenizer*, uint64_t, int); |
@@ -0,0 +1,345 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#include "tok_support.h" | |||||
#include "textbuffer.h" | |||||
#include "tokens.h" | |||||
/* | |||||
Add a new token stack, context, and textbuffer to the list. | |||||
*/ | |||||
int Tokenizer_push(Tokenizer* self, uint64_t context) | |||||
{ | |||||
Stack* top = malloc(sizeof(Stack)); | |||||
if (!top) { | |||||
PyErr_NoMemory(); | |||||
return -1; | |||||
} | |||||
top->stack = PyList_New(0); | |||||
top->context = context; | |||||
top->textbuffer = Textbuffer_new(&self->text); | |||||
if (!top->textbuffer) | |||||
return -1; | |||||
top->next = self->topstack; | |||||
self->topstack = top; | |||||
self->depth++; | |||||
self->cycles++; | |||||
return 0; | |||||
} | |||||
/* | |||||
Push the textbuffer onto the stack as a Text node and clear it. | |||||
*/ | |||||
int Tokenizer_push_textbuffer(Tokenizer* self) | |||||
{ | |||||
PyObject *text, *kwargs, *token; | |||||
Textbuffer* buffer = self->topstack->textbuffer; | |||||
if (buffer->length == 0) | |||||
return 0; | |||||
text = Textbuffer_render(buffer); | |||||
if (!text) | |||||
return -1; | |||||
kwargs = PyDict_New(); | |||||
if (!kwargs) { | |||||
Py_DECREF(text); | |||||
return -1; | |||||
} | |||||
PyDict_SetItemString(kwargs, "text", text); | |||||
Py_DECREF(text); | |||||
token = PyObject_Call(Text, NOARGS, kwargs); | |||||
Py_DECREF(kwargs); | |||||
if (!token) | |||||
return -1; | |||||
if (PyList_Append(self->topstack->stack, token)) { | |||||
Py_DECREF(token); | |||||
return -1; | |||||
} | |||||
Py_DECREF(token); | |||||
if (Textbuffer_reset(buffer)) | |||||
return -1; | |||||
return 0; | |||||
} | |||||
/* | |||||
Pop and deallocate the top token stack/context/textbuffer. | |||||
*/ | |||||
void Tokenizer_delete_top_of_stack(Tokenizer* self) | |||||
{ | |||||
Stack* top = self->topstack; | |||||
Py_DECREF(top->stack); | |||||
Textbuffer_dealloc(top->textbuffer); | |||||
self->topstack = top->next; | |||||
free(top); | |||||
self->depth--; | |||||
} | |||||
/* | |||||
Pop the current stack/context/textbuffer, returing the stack. | |||||
*/ | |||||
PyObject* Tokenizer_pop(Tokenizer* self) | |||||
{ | |||||
PyObject* stack; | |||||
if (Tokenizer_push_textbuffer(self)) | |||||
return NULL; | |||||
stack = self->topstack->stack; | |||||
Py_INCREF(stack); | |||||
Tokenizer_delete_top_of_stack(self); | |||||
return stack; | |||||
} | |||||
/* | |||||
Pop the current stack/context/textbuffer, returing the stack. We will also | |||||
replace the underlying stack's context with the current stack's. | |||||
*/ | |||||
PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) | |||||
{ | |||||
PyObject* stack; | |||||
uint64_t context; | |||||
if (Tokenizer_push_textbuffer(self)) | |||||
return NULL; | |||||
stack = self->topstack->stack; | |||||
Py_INCREF(stack); | |||||
context = self->topstack->context; | |||||
Tokenizer_delete_top_of_stack(self); | |||||
self->topstack->context = context; | |||||
return stack; | |||||
} | |||||
/* | |||||
Fail the current tokenization route. Discards the current | |||||
stack/context/textbuffer and sets the BAD_ROUTE flag. | |||||
*/ | |||||
void* Tokenizer_fail_route(Tokenizer* self) | |||||
{ | |||||
uint64_t context = self->topstack->context; | |||||
PyObject* stack = Tokenizer_pop(self); | |||||
Py_XDECREF(stack); | |||||
FAIL_ROUTE(context); | |||||
return NULL; | |||||
} | |||||
/* | |||||
Write a token to the current token stack. | |||||
*/ | |||||
int Tokenizer_emit_token(Tokenizer* self, PyObject* token, int first) | |||||
{ | |||||
PyObject* instance; | |||||
if (Tokenizer_push_textbuffer(self)) | |||||
return -1; | |||||
instance = PyObject_CallObject(token, NULL); | |||||
if (!instance) | |||||
return -1; | |||||
if (first ? PyList_Insert(self->topstack->stack, 0, instance) : | |||||
PyList_Append(self->topstack->stack, instance)) { | |||||
Py_DECREF(instance); | |||||
return -1; | |||||
} | |||||
Py_DECREF(instance); | |||||
return 0; | |||||
} | |||||
/* | |||||
Write a token to the current token stack, with kwargs. Steals a reference | |||||
to kwargs. | |||||
*/ | |||||
int Tokenizer_emit_token_kwargs(Tokenizer* self, PyObject* token, | |||||
PyObject* kwargs, int first) | |||||
{ | |||||
PyObject* instance; | |||||
if (Tokenizer_push_textbuffer(self)) { | |||||
Py_DECREF(kwargs); | |||||
return -1; | |||||
} | |||||
instance = PyObject_Call(token, NOARGS, kwargs); | |||||
if (!instance) { | |||||
Py_DECREF(kwargs); | |||||
return -1; | |||||
} | |||||
if (first ? PyList_Insert(self->topstack->stack, 0, instance): | |||||
PyList_Append(self->topstack->stack, instance)) { | |||||
Py_DECREF(instance); | |||||
Py_DECREF(kwargs); | |||||
return -1; | |||||
} | |||||
Py_DECREF(instance); | |||||
Py_DECREF(kwargs); | |||||
return 0; | |||||
} | |||||
/* | |||||
Write a Unicode codepoint to the current textbuffer. | |||||
*/ | |||||
int Tokenizer_emit_char(Tokenizer* self, Unicode code) | |||||
{ | |||||
return Textbuffer_write(self->topstack->textbuffer, code); | |||||
} | |||||
/* | |||||
Write a string of text to the current textbuffer. | |||||
*/ | |||||
int Tokenizer_emit_text(Tokenizer* self, const char* text) | |||||
{ | |||||
int i = 0; | |||||
while (text[i]) { | |||||
if (Tokenizer_emit_char(self, text[i])) | |||||
return -1; | |||||
i++; | |||||
} | |||||
return 0; | |||||
} | |||||
/* | |||||
Write the contents of another textbuffer to the current textbuffer, | |||||
deallocating it in the process. | |||||
*/ | |||||
int Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer) | |||||
{ | |||||
int retval = Textbuffer_concat(self->topstack->textbuffer, buffer); | |||||
Textbuffer_dealloc(buffer); | |||||
return retval; | |||||
} | |||||
/* | |||||
Write a series of tokens to the current stack at once. | |||||
*/ | |||||
int Tokenizer_emit_all(Tokenizer* self, PyObject* tokenlist) | |||||
{ | |||||
int pushed = 0; | |||||
PyObject *stack, *token, *left, *right, *text; | |||||
Textbuffer* buffer; | |||||
Py_ssize_t size; | |||||
if (PyList_GET_SIZE(tokenlist) > 0) { | |||||
token = PyList_GET_ITEM(tokenlist, 0); | |||||
switch (PyObject_IsInstance(token, Text)) { | |||||
case 0: | |||||
break; | |||||
case 1: { | |||||
pushed = 1; | |||||
buffer = self->topstack->textbuffer; | |||||
if (buffer->length == 0) | |||||
break; | |||||
left = Textbuffer_render(buffer); | |||||
if (!left) | |||||
return -1; | |||||
right = PyObject_GetAttrString(token, "text"); | |||||
if (!right) | |||||
return -1; | |||||
text = PyUnicode_Concat(left, right); | |||||
Py_DECREF(left); | |||||
Py_DECREF(right); | |||||
if (!text) | |||||
return -1; | |||||
if (PyObject_SetAttrString(token, "text", text)) { | |||||
Py_DECREF(text); | |||||
return -1; | |||||
} | |||||
Py_DECREF(text); | |||||
if (Textbuffer_reset(buffer)) | |||||
return -1; | |||||
break; | |||||
} | |||||
case -1: | |||||
return -1; | |||||
} | |||||
} | |||||
if (!pushed) { | |||||
if (Tokenizer_push_textbuffer(self)) | |||||
return -1; | |||||
} | |||||
stack = self->topstack->stack; | |||||
size = PyList_GET_SIZE(stack); | |||||
if (PyList_SetSlice(stack, size, size, tokenlist)) | |||||
return -1; | |||||
return 0; | |||||
} | |||||
/* | |||||
Pop the current stack, write text, and then write the stack. 'text' is a | |||||
NULL-terminated array of chars. | |||||
*/ | |||||
int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) | |||||
{ | |||||
PyObject* stack = Tokenizer_pop(self); | |||||
if (Tokenizer_emit_text(self, text)) { | |||||
Py_DECREF(stack); | |||||
return -1; | |||||
} | |||||
if (stack) { | |||||
if (PyList_GET_SIZE(stack) > 0) { | |||||
if (Tokenizer_emit_all(self, stack)) { | |||||
Py_DECREF(stack); | |||||
return -1; | |||||
} | |||||
} | |||||
Py_DECREF(stack); | |||||
} | |||||
self->head--; | |||||
return 0; | |||||
} | |||||
/* | |||||
Internal function to read the codepoint at the given index from the input. | |||||
*/ | |||||
static Unicode read_codepoint(TokenizerInput* text, Py_ssize_t index) | |||||
{ | |||||
#ifdef PEP_393 | |||||
return PyUnicode_READ(text->kind, text->data, index); | |||||
#else | |||||
return text->buf[index]; | |||||
#endif | |||||
} | |||||
/* | |||||
Read the value at a relative point in the wikicode, forwards. | |||||
*/ | |||||
Unicode Tokenizer_read(Tokenizer* self, Py_ssize_t delta) | |||||
{ | |||||
Py_ssize_t index = self->head + delta; | |||||
if (index >= self->text.length) | |||||
return '\0'; | |||||
return read_codepoint(&self->text, index); | |||||
} | |||||
/* | |||||
Read the value at a relative point in the wikicode, backwards. | |||||
*/ | |||||
Unicode Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) | |||||
{ | |||||
Py_ssize_t index; | |||||
if (delta > self->head) | |||||
return '\0'; | |||||
index = self->head - delta; | |||||
return read_codepoint(&self->text, index); | |||||
} |
@@ -0,0 +1,62 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#pragma once | |||||
#include "common.h" | |||||
/* Functions */ | |||||
int Tokenizer_push(Tokenizer*, uint64_t); | |||||
int Tokenizer_push_textbuffer(Tokenizer*); | |||||
void Tokenizer_delete_top_of_stack(Tokenizer*); | |||||
PyObject* Tokenizer_pop(Tokenizer*); | |||||
PyObject* Tokenizer_pop_keeping_context(Tokenizer*); | |||||
void* Tokenizer_fail_route(Tokenizer*); | |||||
int Tokenizer_emit_token(Tokenizer*, PyObject*, int); | |||||
int Tokenizer_emit_token_kwargs(Tokenizer*, PyObject*, PyObject*, int); | |||||
int Tokenizer_emit_char(Tokenizer*, Unicode); | |||||
int Tokenizer_emit_text(Tokenizer*, const char*); | |||||
int Tokenizer_emit_textbuffer(Tokenizer*, Textbuffer*); | |||||
int Tokenizer_emit_all(Tokenizer*, PyObject*); | |||||
int Tokenizer_emit_text_then_stack(Tokenizer*, const char*); | |||||
Unicode Tokenizer_read(Tokenizer*, Py_ssize_t); | |||||
Unicode Tokenizer_read_backwards(Tokenizer*, Py_ssize_t); | |||||
/* Macros */ | |||||
#define MAX_DEPTH 40 | |||||
#define MAX_CYCLES 100000 | |||||
#define Tokenizer_CAN_RECURSE(self) \ | |||||
(self->depth < MAX_DEPTH && self->cycles < MAX_CYCLES) | |||||
#define Tokenizer_emit(self, token) \ | |||||
Tokenizer_emit_token(self, token, 0) | |||||
#define Tokenizer_emit_first(self, token) \ | |||||
Tokenizer_emit_token(self, token, 1) | |||||
#define Tokenizer_emit_kwargs(self, token, kwargs) \ | |||||
Tokenizer_emit_token_kwargs(self, token, kwargs, 0) | |||||
#define Tokenizer_emit_first_kwargs(self, token, kwargs) \ | |||||
Tokenizer_emit_token_kwargs(self, token, kwargs, 1) |
@@ -0,0 +1,310 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#include "tokenizer.h" | |||||
#include "tok_parse.h" | |||||
#include "tokens.h" | |||||
/* Globals */ | |||||
int route_state; | |||||
uint64_t route_context; | |||||
char** entitydefs; | |||||
PyObject* NOARGS; | |||||
PyObject* definitions; | |||||
static PyObject* ParserError; | |||||
/* Forward declarations */ | |||||
static int load_exceptions(void); | |||||
/* | |||||
Create a new tokenizer object. | |||||
*/ | |||||
static PyObject* | |||||
Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) | |||||
{ | |||||
Tokenizer* self = (Tokenizer*) type->tp_alloc(type, 0); | |||||
return (PyObject*) self; | |||||
} | |||||
/* | |||||
Deallocate the given tokenizer's text field. | |||||
*/ | |||||
static void dealloc_tokenizer_text(TokenizerInput* text) | |||||
{ | |||||
Py_XDECREF(text->object); | |||||
} | |||||
/* | |||||
Deallocate the given tokenizer object. | |||||
*/ | |||||
static void Tokenizer_dealloc(Tokenizer* self) | |||||
{ | |||||
Stack *this = self->topstack, *next; | |||||
dealloc_tokenizer_text(&self->text); | |||||
while (this) { | |||||
Py_DECREF(this->stack); | |||||
Textbuffer_dealloc(this->textbuffer); | |||||
next = this->next; | |||||
free(this); | |||||
this = next; | |||||
} | |||||
Py_TYPE(self)->tp_free((PyObject*) self); | |||||
} | |||||
/* | |||||
Initialize a new tokenizer instance's text field. | |||||
*/ | |||||
static void init_tokenizer_text(TokenizerInput* text) | |||||
{ | |||||
text->object = Py_None; | |||||
Py_INCREF(Py_None); | |||||
text->length = 0; | |||||
#ifdef PEP_393 | |||||
text->kind = PyUnicode_WCHAR_KIND; | |||||
text->data = NULL; | |||||
#else | |||||
text->buf = NULL; | |||||
#endif | |||||
} | |||||
/* | |||||
Initialize a new tokenizer instance by setting instance attributes. | |||||
*/ | |||||
static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) | |||||
{ | |||||
static char* kwlist[] = {NULL}; | |||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist)) | |||||
return -1; | |||||
init_tokenizer_text(&self->text); | |||||
self->topstack = NULL; | |||||
self->head = self->global = self->depth = self->cycles = 0; | |||||
self->route_context = self->route_state = 0; | |||||
self->skip_style_tags = 0; | |||||
return 0; | |||||
} | |||||
/* | |||||
Load input text into the tokenizer. | |||||
*/ | |||||
static int load_tokenizer_text(TokenizerInput* text, PyObject *input) | |||||
{ | |||||
dealloc_tokenizer_text(text); | |||||
text->object = input; | |||||
#ifdef PEP_393 | |||||
if (PyUnicode_READY(input) < 0) | |||||
return -1; | |||||
text->kind = PyUnicode_KIND(input); | |||||
text->data = PyUnicode_DATA(input); | |||||
#else | |||||
text->buf = PyUnicode_AS_UNICODE(input); | |||||
#endif | |||||
text->length = PyUnicode_GET_LENGTH(input); | |||||
return 0; | |||||
} | |||||
/* | |||||
Build a list of tokens from a string of wikicode and return it. | |||||
*/ | |||||
static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||||
{ | |||||
PyObject *input, *tokens; | |||||
uint64_t context = 0; | |||||
int skip_style_tags = 0; | |||||
if (PyArg_ParseTuple(args, "U|ii", &input, &context, &skip_style_tags)) { | |||||
Py_INCREF(input); | |||||
if (load_tokenizer_text(&self->text, input)) | |||||
return NULL; | |||||
} | |||||
else { | |||||
const char *encoded; | |||||
Py_ssize_t size; | |||||
/* Failed to parse a Unicode object; try a string instead. */ | |||||
PyErr_Clear(); | |||||
if (!PyArg_ParseTuple(args, "s#|ii", &encoded, &size, &context, | |||||
&skip_style_tags)) | |||||
return NULL; | |||||
if (!(input = PyUnicode_FromStringAndSize(encoded, size))) | |||||
return NULL; | |||||
if (load_tokenizer_text(&self->text, input)) | |||||
return NULL; | |||||
} | |||||
self->head = self->global = self->depth = self->cycles = 0; | |||||
self->skip_style_tags = skip_style_tags; | |||||
tokens = Tokenizer_parse(self, context, 1); | |||||
if ((!tokens && !PyErr_Occurred()) || self->topstack) { | |||||
if (!ParserError) { | |||||
if (load_exceptions()) | |||||
return NULL; | |||||
} | |||||
if (BAD_ROUTE) { | |||||
RESET_ROUTE(); | |||||
PyErr_SetString(ParserError, "C tokenizer exited with BAD_ROUTE"); | |||||
} | |||||
else if (self->topstack) | |||||
PyErr_SetString(ParserError, | |||||
"C tokenizer exited with non-empty token stack"); | |||||
else | |||||
PyErr_SetString(ParserError, "C tokenizer exited unexpectedly"); | |||||
return NULL; | |||||
} | |||||
return tokens; | |||||
} | |||||
static int load_entities(void) | |||||
{ | |||||
PyObject *tempmod, *defmap, *deflist; | |||||
unsigned numdefs, i; | |||||
#ifdef IS_PY3K | |||||
PyObject *string; | |||||
#endif | |||||
tempmod = PyImport_ImportModule(ENTITYDEFS_MODULE); | |||||
if (!tempmod) | |||||
return -1; | |||||
defmap = PyObject_GetAttrString(tempmod, "entitydefs"); | |||||
if (!defmap) | |||||
return -1; | |||||
Py_DECREF(tempmod); | |||||
deflist = PyDict_Keys(defmap); | |||||
if (!deflist) | |||||
return -1; | |||||
Py_DECREF(defmap); | |||||
numdefs = (unsigned) PyList_GET_SIZE(defmap); | |||||
entitydefs = calloc(numdefs + 1, sizeof(char*)); | |||||
if (!entitydefs) | |||||
return -1; | |||||
for (i = 0; i < numdefs; i++) { | |||||
#ifdef IS_PY3K | |||||
string = PyUnicode_AsASCIIString(PyList_GET_ITEM(deflist, i)); | |||||
if (!string) | |||||
return -1; | |||||
entitydefs[i] = PyBytes_AsString(string); | |||||
#else | |||||
entitydefs[i] = PyBytes_AsString(PyList_GET_ITEM(deflist, i)); | |||||
#endif | |||||
if (!entitydefs[i]) | |||||
return -1; | |||||
} | |||||
Py_DECREF(deflist); | |||||
return 0; | |||||
} | |||||
static int load_tokens(void) | |||||
{ | |||||
PyObject *tempmod, *tokens, | |||||
*globals = PyEval_GetGlobals(), | |||||
*locals = PyEval_GetLocals(), | |||||
*fromlist = PyList_New(1), | |||||
*modname = IMPORT_NAME_FUNC("tokens"); | |||||
char *name = "mwparserfromhell.parser"; | |||||
if (!fromlist || !modname) | |||||
return -1; | |||||
PyList_SET_ITEM(fromlist, 0, modname); | |||||
tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | |||||
Py_DECREF(fromlist); | |||||
if (!tempmod) | |||||
return -1; | |||||
tokens = PyObject_GetAttrString(tempmod, "tokens"); | |||||
Py_DECREF(tempmod); | |||||
load_tokens_from_module(tokens); | |||||
Py_DECREF(tokens); | |||||
return 0; | |||||
} | |||||
static int load_defs(void) | |||||
{ | |||||
PyObject *tempmod, | |||||
*globals = PyEval_GetGlobals(), | |||||
*locals = PyEval_GetLocals(), | |||||
*fromlist = PyList_New(1), | |||||
*modname = IMPORT_NAME_FUNC("definitions"); | |||||
char *name = "mwparserfromhell"; | |||||
if (!fromlist || !modname) | |||||
return -1; | |||||
PyList_SET_ITEM(fromlist, 0, modname); | |||||
tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | |||||
Py_DECREF(fromlist); | |||||
if (!tempmod) | |||||
return -1; | |||||
definitions = PyObject_GetAttrString(tempmod, "definitions"); | |||||
Py_DECREF(tempmod); | |||||
return 0; | |||||
} | |||||
static int load_exceptions(void) | |||||
{ | |||||
PyObject *tempmod, *parsermod, | |||||
*globals = PyEval_GetGlobals(), | |||||
*locals = PyEval_GetLocals(), | |||||
*fromlist = PyList_New(1), | |||||
*modname = IMPORT_NAME_FUNC("parser"); | |||||
char *name = "mwparserfromhell"; | |||||
if (!fromlist || !modname) | |||||
return -1; | |||||
PyList_SET_ITEM(fromlist, 0, modname); | |||||
tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | |||||
Py_DECREF(fromlist); | |||||
if (!tempmod) | |||||
return -1; | |||||
parsermod = PyObject_GetAttrString(tempmod, "parser"); | |||||
Py_DECREF(tempmod); | |||||
ParserError = PyObject_GetAttrString(parsermod, "ParserError"); | |||||
Py_DECREF(parsermod); | |||||
return 0; | |||||
} | |||||
PyMODINIT_FUNC INIT_FUNC_NAME(void) | |||||
{ | |||||
PyObject *module; | |||||
TokenizerType.tp_new = PyType_GenericNew; | |||||
if (PyType_Ready(&TokenizerType) < 0) | |||||
INIT_ERROR; | |||||
module = CREATE_MODULE; | |||||
if (!module) | |||||
INIT_ERROR; | |||||
Py_INCREF(&TokenizerType); | |||||
PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); | |||||
Py_INCREF(Py_True); | |||||
PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True); | |||||
NOARGS = PyTuple_New(0); | |||||
if (!NOARGS || load_entities() || load_tokens() || load_defs()) | |||||
INIT_ERROR; | |||||
#ifdef IS_PY3K | |||||
return module; | |||||
#endif | |||||
} |
@@ -0,0 +1,111 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#pragma once | |||||
#include "common.h" | |||||
#include "textbuffer.h" | |||||
/* Functions */ | |||||
static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*); | |||||
static void Tokenizer_dealloc(Tokenizer*); | |||||
static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*); | |||||
static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); | |||||
/* Compatibility macros */ | |||||
#ifdef IS_PY3K | |||||
#define IMPORT_NAME_FUNC PyUnicode_FromString | |||||
#define CREATE_MODULE PyModule_Create(&module_def); | |||||
#define ENTITYDEFS_MODULE "html.entities" | |||||
#define INIT_FUNC_NAME PyInit__tokenizer | |||||
#define INIT_ERROR return NULL | |||||
#else | |||||
#define IMPORT_NAME_FUNC PyBytes_FromString | |||||
#define CREATE_MODULE Py_InitModule("_tokenizer", NULL); | |||||
#define ENTITYDEFS_MODULE "htmlentitydefs" | |||||
#define INIT_FUNC_NAME init_tokenizer | |||||
#define INIT_ERROR return | |||||
#endif | |||||
/* Structs */ | |||||
static PyMethodDef Tokenizer_methods[] = { | |||||
{"tokenize", (PyCFunction) Tokenizer_tokenize, METH_VARARGS, | |||||
"Build a list of tokens from a string of wikicode and return it."}, | |||||
{NULL} | |||||
}; | |||||
static PyMemberDef Tokenizer_members[] = { | |||||
{NULL} | |||||
}; | |||||
static PyTypeObject TokenizerType = { | |||||
PyVarObject_HEAD_INIT(NULL, 0) | |||||
"_tokenizer.CTokenizer", /* tp_name */ | |||||
sizeof(Tokenizer), /* tp_basicsize */ | |||||
0, /* tp_itemsize */ | |||||
(destructor) Tokenizer_dealloc, /* tp_dealloc */ | |||||
0, /* tp_print */ | |||||
0, /* tp_getattr */ | |||||
0, /* tp_setattr */ | |||||
0, /* tp_compare */ | |||||
0, /* tp_repr */ | |||||
0, /* tp_as_number */ | |||||
0, /* tp_as_sequence */ | |||||
0, /* tp_as_mapping */ | |||||
0, /* tp_hash */ | |||||
0, /* tp_call */ | |||||
0, /* tp_str */ | |||||
0, /* tp_getattro */ | |||||
0, /* tp_setattro */ | |||||
0, /* tp_as_buffer */ | |||||
Py_TPFLAGS_DEFAULT, /* tp_flags */ | |||||
"Creates a list of tokens from a string of wikicode.", /* tp_doc */ | |||||
0, /* tp_traverse */ | |||||
0, /* tp_clear */ | |||||
0, /* tp_richcompare */ | |||||
0, /* tp_weaklistoffset */ | |||||
0, /* tp_iter */ | |||||
0, /* tp_iternext */ | |||||
Tokenizer_methods, /* tp_methods */ | |||||
Tokenizer_members, /* tp_members */ | |||||
0, /* tp_getset */ | |||||
0, /* tp_base */ | |||||
0, /* tp_dict */ | |||||
0, /* tp_descr_get */ | |||||
0, /* tp_descr_set */ | |||||
0, /* tp_dictoffset */ | |||||
(initproc) Tokenizer_init, /* tp_init */ | |||||
0, /* tp_alloc */ | |||||
Tokenizer_new, /* tp_new */ | |||||
}; | |||||
#ifdef IS_PY3K | |||||
static PyModuleDef module_def = { | |||||
PyModuleDef_HEAD_INIT, | |||||
"_tokenizer", | |||||
"Creates a list of tokens from a string of wikicode.", | |||||
-1, NULL, NULL, NULL, NULL, NULL | |||||
}; | |||||
#endif |
@@ -0,0 +1,111 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#include "tokens.h" | |||||
/* Globals */ | |||||
PyObject* Text; | |||||
PyObject* TemplateOpen; | |||||
PyObject* TemplateParamSeparator; | |||||
PyObject* TemplateParamEquals; | |||||
PyObject* TemplateClose; | |||||
PyObject* ArgumentOpen; | |||||
PyObject* ArgumentSeparator; | |||||
PyObject* ArgumentClose; | |||||
PyObject* WikilinkOpen; | |||||
PyObject* WikilinkSeparator; | |||||
PyObject* WikilinkClose; | |||||
PyObject* ExternalLinkOpen; | |||||
PyObject* ExternalLinkSeparator; | |||||
PyObject* ExternalLinkClose; | |||||
PyObject* HTMLEntityStart; | |||||
PyObject* HTMLEntityNumeric; | |||||
PyObject* HTMLEntityHex; | |||||
PyObject* HTMLEntityEnd; | |||||
PyObject* HeadingStart; | |||||
PyObject* HeadingEnd; | |||||
PyObject* CommentStart; | |||||
PyObject* CommentEnd; | |||||
PyObject* TagOpenOpen; | |||||
PyObject* TagAttrStart; | |||||
PyObject* TagAttrEquals; | |||||
PyObject* TagAttrQuote; | |||||
PyObject* TagCloseOpen; | |||||
PyObject* TagCloseSelfclose; | |||||
PyObject* TagOpenClose; | |||||
PyObject* TagCloseClose; | |||||
/* | |||||
Load individual tokens into globals from the given Python module object. | |||||
*/ | |||||
void load_tokens_from_module(PyObject* module) | |||||
{ | |||||
Text = PyObject_GetAttrString(module, "Text"); | |||||
TemplateOpen = PyObject_GetAttrString(module, "TemplateOpen"); | |||||
TemplateParamSeparator = PyObject_GetAttrString(module, | |||||
"TemplateParamSeparator"); | |||||
TemplateParamEquals = PyObject_GetAttrString(module, | |||||
"TemplateParamEquals"); | |||||
TemplateClose = PyObject_GetAttrString(module, "TemplateClose"); | |||||
ArgumentOpen = PyObject_GetAttrString(module, "ArgumentOpen"); | |||||
ArgumentSeparator = PyObject_GetAttrString(module, "ArgumentSeparator"); | |||||
ArgumentClose = PyObject_GetAttrString(module, "ArgumentClose"); | |||||
WikilinkOpen = PyObject_GetAttrString(module, "WikilinkOpen"); | |||||
WikilinkSeparator = PyObject_GetAttrString(module, "WikilinkSeparator"); | |||||
WikilinkClose = PyObject_GetAttrString(module, "WikilinkClose"); | |||||
ExternalLinkOpen = PyObject_GetAttrString(module, "ExternalLinkOpen"); | |||||
ExternalLinkSeparator = PyObject_GetAttrString(module, | |||||
"ExternalLinkSeparator"); | |||||
ExternalLinkClose = PyObject_GetAttrString(module, "ExternalLinkClose"); | |||||
HTMLEntityStart = PyObject_GetAttrString(module, "HTMLEntityStart"); | |||||
HTMLEntityNumeric = PyObject_GetAttrString(module, "HTMLEntityNumeric"); | |||||
HTMLEntityHex = PyObject_GetAttrString(module, "HTMLEntityHex"); | |||||
HTMLEntityEnd = PyObject_GetAttrString(module, "HTMLEntityEnd"); | |||||
HeadingStart = PyObject_GetAttrString(module, "HeadingStart"); | |||||
HeadingEnd = PyObject_GetAttrString(module, "HeadingEnd"); | |||||
CommentStart = PyObject_GetAttrString(module, "CommentStart"); | |||||
CommentEnd = PyObject_GetAttrString(module, "CommentEnd"); | |||||
TagOpenOpen = PyObject_GetAttrString(module, "TagOpenOpen"); | |||||
TagAttrStart = PyObject_GetAttrString(module, "TagAttrStart"); | |||||
TagAttrEquals = PyObject_GetAttrString(module, "TagAttrEquals"); | |||||
TagAttrQuote = PyObject_GetAttrString(module, "TagAttrQuote"); | |||||
TagCloseOpen = PyObject_GetAttrString(module, "TagCloseOpen"); | |||||
TagCloseSelfclose = PyObject_GetAttrString(module, "TagCloseSelfclose"); | |||||
TagOpenClose = PyObject_GetAttrString(module, "TagOpenClose"); | |||||
TagCloseClose = PyObject_GetAttrString(module, "TagCloseClose"); | |||||
} |
@@ -0,0 +1,69 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#pragma once | |||||
#include "common.h" | |||||
/* Token globals */ | |||||
extern PyObject* Text; | |||||
extern PyObject* TemplateOpen; | |||||
extern PyObject* TemplateParamSeparator; | |||||
extern PyObject* TemplateParamEquals; | |||||
extern PyObject* TemplateClose; | |||||
extern PyObject* ArgumentOpen; | |||||
extern PyObject* ArgumentSeparator; | |||||
extern PyObject* ArgumentClose; | |||||
extern PyObject* WikilinkOpen; | |||||
extern PyObject* WikilinkSeparator; | |||||
extern PyObject* WikilinkClose; | |||||
extern PyObject* ExternalLinkOpen; | |||||
extern PyObject* ExternalLinkSeparator; | |||||
extern PyObject* ExternalLinkClose; | |||||
extern PyObject* HTMLEntityStart; | |||||
extern PyObject* HTMLEntityNumeric; | |||||
extern PyObject* HTMLEntityHex; | |||||
extern PyObject* HTMLEntityEnd; | |||||
extern PyObject* HeadingStart; | |||||
extern PyObject* HeadingEnd; | |||||
extern PyObject* CommentStart; | |||||
extern PyObject* CommentEnd; | |||||
extern PyObject* TagOpenOpen; | |||||
extern PyObject* TagAttrStart; | |||||
extern PyObject* TagAttrEquals; | |||||
extern PyObject* TagAttrQuote; | |||||
extern PyObject* TagCloseOpen; | |||||
extern PyObject* TagCloseSelfclose; | |||||
extern PyObject* TagOpenClose; | |||||
extern PyObject* TagCloseClose; | |||||
/* Functions */ | |||||
void load_tokens_from_module(PyObject*); |
@@ -1,367 +0,0 @@ | |||||
/* | |||||
Tokenizer Header File for MWParserFromHell | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#ifndef PY_SSIZE_T_CLEAN | |||||
#define PY_SSIZE_T_CLEAN | |||||
#endif | |||||
#include <Python.h> | |||||
#include <math.h> | |||||
#include <structmember.h> | |||||
#include <bytesobject.h> | |||||
#include <stdint.h> | |||||
#if PY_MAJOR_VERSION >= 3 | |||||
#define IS_PY3K | |||||
#endif | |||||
#define malloc PyObject_Malloc | |||||
#define free PyObject_Free | |||||
#define DIGITS "0123456789" | |||||
#define HEXDIGITS "0123456789abcdefABCDEF" | |||||
#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |||||
static const char MARKERS[] = { | |||||
'{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/', | |||||
'-', '!', '\n', '\0'}; | |||||
#define NUM_MARKERS 19 | |||||
#define TEXTBUFFER_BLOCKSIZE 1024 | |||||
#define MAX_DEPTH 40 | |||||
#define MAX_CYCLES 100000 | |||||
#define MAX_BRACES 255 | |||||
#define MAX_ENTITY_SIZE 8 | |||||
static int route_state = 0; | |||||
static uint64_t route_context = 0; | |||||
#define BAD_ROUTE route_state | |||||
#define BAD_ROUTE_CONTEXT route_context | |||||
#define FAIL_ROUTE(context) route_state = 1; route_context = context | |||||
#define RESET_ROUTE() route_state = 0 | |||||
static char** entitydefs; | |||||
static PyObject* EMPTY; | |||||
static PyObject* NOARGS; | |||||
static PyObject* ParserError; | |||||
static PyObject* definitions; | |||||
/* Tokens: */ | |||||
static PyObject* Text; | |||||
static PyObject* TemplateOpen; | |||||
static PyObject* TemplateParamSeparator; | |||||
static PyObject* TemplateParamEquals; | |||||
static PyObject* TemplateClose; | |||||
static PyObject* ArgumentOpen; | |||||
static PyObject* ArgumentSeparator; | |||||
static PyObject* ArgumentClose; | |||||
static PyObject* WikilinkOpen; | |||||
static PyObject* WikilinkSeparator; | |||||
static PyObject* WikilinkClose; | |||||
static PyObject* ExternalLinkOpen; | |||||
static PyObject* ExternalLinkSeparator; | |||||
static PyObject* ExternalLinkClose; | |||||
static PyObject* HTMLEntityStart; | |||||
static PyObject* HTMLEntityNumeric; | |||||
static PyObject* HTMLEntityHex; | |||||
static PyObject* HTMLEntityEnd; | |||||
static PyObject* HeadingStart; | |||||
static PyObject* HeadingEnd; | |||||
static PyObject* CommentStart; | |||||
static PyObject* CommentEnd; | |||||
static PyObject* TagOpenOpen; | |||||
static PyObject* TagAttrStart; | |||||
static PyObject* TagAttrEquals; | |||||
static PyObject* TagAttrQuote; | |||||
static PyObject* TagCloseOpen; | |||||
static PyObject* TagCloseSelfclose; | |||||
static PyObject* TagOpenClose; | |||||
static PyObject* TagCloseClose; | |||||
/* Local contexts: */ | |||||
#define LC_TEMPLATE 0x0000000000000007 | |||||
#define LC_TEMPLATE_NAME 0x0000000000000001 | |||||
#define LC_TEMPLATE_PARAM_KEY 0x0000000000000002 | |||||
#define LC_TEMPLATE_PARAM_VALUE 0x0000000000000004 | |||||
#define LC_ARGUMENT 0x0000000000000018 | |||||
#define LC_ARGUMENT_NAME 0x0000000000000008 | |||||
#define LC_ARGUMENT_DEFAULT 0x0000000000000010 | |||||
#define LC_WIKILINK 0x0000000000000060 | |||||
#define LC_WIKILINK_TITLE 0x0000000000000020 | |||||
#define LC_WIKILINK_TEXT 0x0000000000000040 | |||||
#define LC_EXT_LINK 0x0000000000000180 | |||||
#define LC_EXT_LINK_URI 0x0000000000000080 | |||||
#define LC_EXT_LINK_TITLE 0x0000000000000100 | |||||
#define LC_HEADING 0x0000000000007E00 | |||||
#define LC_HEADING_LEVEL_1 0x0000000000000200 | |||||
#define LC_HEADING_LEVEL_2 0x0000000000000400 | |||||
#define LC_HEADING_LEVEL_3 0x0000000000000800 | |||||
#define LC_HEADING_LEVEL_4 0x0000000000001000 | |||||
#define LC_HEADING_LEVEL_5 0x0000000000002000 | |||||
#define LC_HEADING_LEVEL_6 0x0000000000004000 | |||||
#define LC_TAG 0x0000000000078000 | |||||
#define LC_TAG_OPEN 0x0000000000008000 | |||||
#define LC_TAG_ATTR 0x0000000000010000 | |||||
#define LC_TAG_BODY 0x0000000000020000 | |||||
#define LC_TAG_CLOSE 0x0000000000040000 | |||||
#define LC_STYLE 0x0000000000780000 | |||||
#define LC_STYLE_ITALICS 0x0000000000080000 | |||||
#define LC_STYLE_BOLD 0x0000000000100000 | |||||
#define LC_STYLE_PASS_AGAIN 0x0000000000200000 | |||||
#define LC_STYLE_SECOND_PASS 0x0000000000400000 | |||||
#define LC_DLTERM 0x0000000000800000 | |||||
#define LC_SAFETY_CHECK 0x000000007F000000 | |||||
#define LC_HAS_TEXT 0x0000000001000000 | |||||
#define LC_FAIL_ON_TEXT 0x0000000002000000 | |||||
#define LC_FAIL_NEXT 0x0000000004000000 | |||||
#define LC_FAIL_ON_LBRACE 0x0000000008000000 | |||||
#define LC_FAIL_ON_RBRACE 0x0000000010000000 | |||||
#define LC_FAIL_ON_EQUALS 0x0000000020000000 | |||||
#define LC_HAS_TEMPLATE 0x0000000040000000 | |||||
#define LC_TABLE 0x0000001F80000000 | |||||
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000001A00000000 | |||||
#define LC_TABLE_OPEN 0x0000000080000000 | |||||
#define LC_TABLE_CELL_OPEN 0x0000000100000000 | |||||
#define LC_TABLE_CELL_STYLE 0x0000000200000000 | |||||
#define LC_TABLE_ROW_OPEN 0x0000000400000000 | |||||
#define LC_TABLE_TD_LINE 0x0000000800000000 | |||||
#define LC_TABLE_TH_LINE 0x0000001000000000 | |||||
/* Global contexts: */ | |||||
#define GL_HEADING 0x1 | |||||
/* Aggregate contexts: */ | |||||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN) | |||||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN) | |||||
#define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | |||||
#define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | |||||
/* Tag contexts: */ | |||||
#define TAG_NAME 0x01 | |||||
#define TAG_ATTR_READY 0x02 | |||||
#define TAG_ATTR_NAME 0x04 | |||||
#define TAG_ATTR_VALUE 0x08 | |||||
#define TAG_QUOTED 0x10 | |||||
#define TAG_NOTE_SPACE 0x20 | |||||
#define TAG_NOTE_EQUALS 0x40 | |||||
#define TAG_NOTE_QUOTE 0x80 | |||||
/* Miscellaneous structs: */ | |||||
struct Textbuffer { | |||||
Py_ssize_t size; | |||||
Py_UNICODE* data; | |||||
struct Textbuffer* prev; | |||||
struct Textbuffer* next; | |||||
}; | |||||
struct Stack { | |||||
PyObject* stack; | |||||
uint64_t context; | |||||
struct Textbuffer* textbuffer; | |||||
struct Stack* next; | |||||
}; | |||||
typedef struct { | |||||
PyObject* title; | |||||
int level; | |||||
} HeadingData; | |||||
typedef struct { | |||||
uint64_t context; | |||||
struct Textbuffer* pad_first; | |||||
struct Textbuffer* pad_before_eq; | |||||
struct Textbuffer* pad_after_eq; | |||||
Py_UNICODE quoter; | |||||
Py_ssize_t reset; | |||||
} TagData; | |||||
typedef struct Textbuffer Textbuffer; | |||||
typedef struct Stack Stack; | |||||
/* Tokenizer object definition: */ | |||||
typedef struct { | |||||
PyObject_HEAD | |||||
PyObject* text; /* text to tokenize */ | |||||
Stack* topstack; /* topmost stack */ | |||||
Py_ssize_t head; /* current position in text */ | |||||
Py_ssize_t length; /* length of text */ | |||||
int global; /* global context */ | |||||
int depth; /* stack recursion depth */ | |||||
int cycles; /* total number of stack recursions */ | |||||
int skip_style_tags; /* temporary fix for the sometimes broken tag parser */ | |||||
} Tokenizer; | |||||
/* Macros related to Tokenizer functions: */ | |||||
#define Tokenizer_READ(self, delta) (*PyUnicode_AS_UNICODE(Tokenizer_read(self, delta))) | |||||
#define Tokenizer_READ_BACKWARDS(self, delta) \ | |||||
(*PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, delta))) | |||||
#define Tokenizer_CAN_RECURSE(self) (self->depth < MAX_DEPTH && self->cycles < MAX_CYCLES) | |||||
#define Tokenizer_emit(self, token) Tokenizer_emit_token(self, token, 0) | |||||
#define Tokenizer_emit_first(self, token) Tokenizer_emit_token(self, token, 1) | |||||
#define Tokenizer_emit_kwargs(self, token, kwargs) Tokenizer_emit_token_kwargs(self, token, kwargs, 0) | |||||
#define Tokenizer_emit_first_kwargs(self, token, kwargs) Tokenizer_emit_token_kwargs(self, token, kwargs, 1) | |||||
/* Macros for accessing definitions: */ | |||||
#define GET_HTML_TAG(markup) (markup == ':' ? "dd" : markup == ';' ? "dt" : "li") | |||||
#define IS_PARSABLE(tag) (call_def_func("is_parsable", tag, NULL, NULL)) | |||||
#define IS_SINGLE(tag) (call_def_func("is_single", tag, NULL, NULL)) | |||||
#define IS_SINGLE_ONLY(tag) (call_def_func("is_single_only", tag, NULL, NULL)) | |||||
#define IS_SCHEME(scheme, slashes, reverse) \ | |||||
(call_def_func("is_scheme", scheme, slashes ? Py_True : Py_False, reverse ? Py_True : Py_False)) | |||||
/* Function prototypes: */ | |||||
static Textbuffer* Textbuffer_new(void); | |||||
static void Textbuffer_dealloc(Textbuffer*); | |||||
static TagData* TagData_new(void); | |||||
static void TagData_dealloc(TagData*); | |||||
static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*); | |||||
static void Tokenizer_dealloc(Tokenizer*); | |||||
static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*); | |||||
static int Tokenizer_parse_entity(Tokenizer*); | |||||
static int Tokenizer_parse_comment(Tokenizer*); | |||||
static int Tokenizer_handle_dl_term(Tokenizer*); | |||||
static int Tokenizer_parse_tag(Tokenizer*); | |||||
static PyObject* Tokenizer_parse(Tokenizer*, uint64_t, int); | |||||
static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); | |||||
static int load_exceptions(void); | |||||
/* Macros for Python 2/3 compatibility: */ | |||||
#ifdef IS_PY3K | |||||
#define NEW_INT_FUNC PyLong_FromSsize_t | |||||
#define IMPORT_NAME_FUNC PyUnicode_FromString | |||||
#define CREATE_MODULE PyModule_Create(&module_def); | |||||
#define ENTITYDEFS_MODULE "html.entities" | |||||
#define INIT_FUNC_NAME PyInit__tokenizer | |||||
#define INIT_ERROR return NULL | |||||
#else | |||||
#define NEW_INT_FUNC PyInt_FromSsize_t | |||||
#define IMPORT_NAME_FUNC PyBytes_FromString | |||||
#define CREATE_MODULE Py_InitModule("_tokenizer", NULL); | |||||
#define ENTITYDEFS_MODULE "htmlentitydefs" | |||||
#define INIT_FUNC_NAME init_tokenizer | |||||
#define INIT_ERROR return | |||||
#endif | |||||
/* More structs for creating the Tokenizer type: */ | |||||
static PyMethodDef Tokenizer_methods[] = { | |||||
{"tokenize", (PyCFunction) Tokenizer_tokenize, METH_VARARGS, | |||||
"Build a list of tokens from a string of wikicode and return it."}, | |||||
{NULL} | |||||
}; | |||||
static PyMemberDef Tokenizer_members[] = { | |||||
{NULL} | |||||
}; | |||||
static PyTypeObject TokenizerType = { | |||||
PyVarObject_HEAD_INIT(NULL, 0) | |||||
"_tokenizer.CTokenizer", /* tp_name */ | |||||
sizeof(Tokenizer), /* tp_basicsize */ | |||||
0, /* tp_itemsize */ | |||||
(destructor) Tokenizer_dealloc, /* tp_dealloc */ | |||||
0, /* tp_print */ | |||||
0, /* tp_getattr */ | |||||
0, /* tp_setattr */ | |||||
0, /* tp_compare */ | |||||
0, /* tp_repr */ | |||||
0, /* tp_as_number */ | |||||
0, /* tp_as_sequence */ | |||||
0, /* tp_as_mapping */ | |||||
0, /* tp_hash */ | |||||
0, /* tp_call */ | |||||
0, /* tp_str */ | |||||
0, /* tp_getattro */ | |||||
0, /* tp_setattro */ | |||||
0, /* tp_as_buffer */ | |||||
Py_TPFLAGS_DEFAULT, /* tp_flags */ | |||||
"Creates a list of tokens from a string of wikicode.", /* tp_doc */ | |||||
0, /* tp_traverse */ | |||||
0, /* tp_clear */ | |||||
0, /* tp_richcompare */ | |||||
0, /* tp_weaklistoffset */ | |||||
0, /* tp_iter */ | |||||
0, /* tp_iternext */ | |||||
Tokenizer_methods, /* tp_methods */ | |||||
Tokenizer_members, /* tp_members */ | |||||
0, /* tp_getset */ | |||||
0, /* tp_base */ | |||||
0, /* tp_dict */ | |||||
0, /* tp_descr_get */ | |||||
0, /* tp_descr_set */ | |||||
0, /* tp_dictoffset */ | |||||
(initproc) Tokenizer_init, /* tp_init */ | |||||
0, /* tp_alloc */ | |||||
Tokenizer_new, /* tp_new */ | |||||
}; | |||||
#ifdef IS_PY3K | |||||
static PyModuleDef module_def = { | |||||
PyModuleDef_HEAD_INIT, | |||||
"_tokenizer", | |||||
"Creates a list of tokens from a string of wikicode.", | |||||
-1, NULL, NULL, NULL, NULL, NULL | |||||
}; | |||||
#endif |
@@ -27,8 +27,10 @@ reflect changes made to the main list, and vice-versa. | |||||
""" | """ | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
from sys import maxsize | |||||
from weakref import ref | |||||
from .compat import maxsize, py3k | |||||
from .compat import py3k | |||||
__all__ = ["SmartList"] | __all__ = ["SmartList"] | ||||
@@ -45,16 +47,16 @@ def inheritdoc(method): | |||||
class _SliceNormalizerMixIn(object): | class _SliceNormalizerMixIn(object): | ||||
"""MixIn that provides a private method to normalize slices.""" | """MixIn that provides a private method to normalize slices.""" | ||||
def _normalize_slice(self, key): | |||||
def _normalize_slice(self, key, clamp=False): | |||||
"""Return a slice equivalent to the input *key*, standardized.""" | """Return a slice equivalent to the input *key*, standardized.""" | ||||
if key.start is not None: | |||||
if key.start is None: | |||||
start = 0 | |||||
else: | |||||
start = (len(self) + key.start) if key.start < 0 else key.start | start = (len(self) + key.start) if key.start < 0 else key.start | ||||
if key.stop is None or key.stop == maxsize: | |||||
stop = len(self) if clamp else None | |||||
else: | else: | ||||
start = 0 | |||||
if key.stop is not None: | |||||
stop = (len(self) + key.stop) if key.stop < 0 else key.stop | stop = (len(self) + key.stop) if key.stop < 0 else key.stop | ||||
else: | |||||
stop = maxsize | |||||
return slice(start, stop, key.step or 1) | return slice(start, stop, key.step or 1) | ||||
@@ -80,13 +82,6 @@ class SmartList(_SliceNormalizerMixIn, list): | |||||
[2, 3, 4] | [2, 3, 4] | ||||
>>> parent | >>> parent | ||||
[0, 1, 2, 3, 4] | [0, 1, 2, 3, 4] | ||||
The parent needs to keep a list of its children in order to update them, | |||||
which prevents them from being garbage-collected. If you are keeping the | |||||
parent around for a while but creating many children, it is advisable to | |||||
call :meth:`._ListProxy.detach` when you're finished with them. Certain | |||||
parent methods, like :meth:`reverse` and :meth:`sort`, will do this | |||||
automatically. | |||||
""" | """ | ||||
def __init__(self, iterable=None): | def __init__(self, iterable=None): | ||||
@@ -99,10 +94,11 @@ class SmartList(_SliceNormalizerMixIn, list): | |||||
def __getitem__(self, key): | def __getitem__(self, key): | ||||
if not isinstance(key, slice): | if not isinstance(key, slice): | ||||
return super(SmartList, self).__getitem__(key) | return super(SmartList, self).__getitem__(key) | ||||
key = self._normalize_slice(key) | |||||
key = self._normalize_slice(key, clamp=False) | |||||
sliceinfo = [key.start, key.stop, key.step] | sliceinfo = [key.start, key.stop, key.step] | ||||
child = _ListProxy(self, sliceinfo) | child = _ListProxy(self, sliceinfo) | ||||
self._children[id(child)] = (child, sliceinfo) | |||||
child_ref = ref(child, self._delete_child) | |||||
self._children[id(child_ref)] = (child_ref, sliceinfo) | |||||
return child | return child | ||||
def __setitem__(self, key, item): | def __setitem__(self, key, item): | ||||
@@ -110,20 +106,21 @@ class SmartList(_SliceNormalizerMixIn, list): | |||||
return super(SmartList, self).__setitem__(key, item) | return super(SmartList, self).__setitem__(key, item) | ||||
item = list(item) | item = list(item) | ||||
super(SmartList, self).__setitem__(key, item) | super(SmartList, self).__setitem__(key, item) | ||||
key = self._normalize_slice(key) | |||||
key = self._normalize_slice(key, clamp=True) | |||||
diff = len(item) + (key.start - key.stop) // key.step | diff = len(item) + (key.start - key.stop) // key.step | ||||
if not diff: | |||||
return | |||||
values = self._children.values if py3k else self._children.itervalues | values = self._children.values if py3k else self._children.itervalues | ||||
if diff: | |||||
for child, (start, stop, step) in values(): | |||||
if start > key.stop: | |||||
self._children[id(child)][1][0] += diff | |||||
if stop >= key.stop and stop != maxsize: | |||||
self._children[id(child)][1][1] += diff | |||||
for child, (start, stop, step) in values(): | |||||
if start > key.stop: | |||||
self._children[id(child)][1][0] += diff | |||||
if stop is not None and stop >= key.stop: | |||||
self._children[id(child)][1][1] += diff | |||||
def __delitem__(self, key): | def __delitem__(self, key): | ||||
super(SmartList, self).__delitem__(key) | super(SmartList, self).__delitem__(key) | ||||
if isinstance(key, slice): | if isinstance(key, slice): | ||||
key = self._normalize_slice(key) | |||||
key = self._normalize_slice(key, clamp=True) | |||||
else: | else: | ||||
key = slice(key, key + 1, 1) | key = slice(key, key + 1, 1) | ||||
diff = (key.stop - key.start) // key.step | diff = (key.stop - key.start) // key.step | ||||
@@ -131,7 +128,7 @@ class SmartList(_SliceNormalizerMixIn, list): | |||||
for child, (start, stop, step) in values(): | for child, (start, stop, step) in values(): | ||||
if start > key.start: | if start > key.start: | ||||
self._children[id(child)][1][0] -= diff | self._children[id(child)][1][0] -= diff | ||||
if stop >= key.stop and stop != maxsize: | |||||
if stop is not None and stop >= key.stop: | |||||
self._children[id(child)][1][1] -= diff | self._children[id(child)][1][1] -= diff | ||||
if not py3k: | if not py3k: | ||||
@@ -154,10 +151,16 @@ class SmartList(_SliceNormalizerMixIn, list): | |||||
self.extend(other) | self.extend(other) | ||||
return self | return self | ||||
def _delete_child(self, child_ref): | |||||
"""Remove a child reference that is about to be garbage-collected.""" | |||||
del self._children[id(child_ref)] | |||||
def _detach_children(self): | def _detach_children(self): | ||||
"""Remove all children and give them independent parent copies.""" | |||||
children = [val[0] for val in self._children.values()] | children = [val[0] for val in self._children.values()] | ||||
for child in children: | for child in children: | ||||
child.detach() | |||||
child()._parent = list(self) | |||||
self._children.clear() | |||||
@inheritdoc | @inheritdoc | ||||
def append(self, item): | def append(self, item): | ||||
@@ -226,7 +229,6 @@ class _ListProxy(_SliceNormalizerMixIn, list): | |||||
super(_ListProxy, self).__init__() | super(_ListProxy, self).__init__() | ||||
self._parent = parent | self._parent = parent | ||||
self._sliceinfo = sliceinfo | self._sliceinfo = sliceinfo | ||||
self._detached = False | |||||
def __repr__(self): | def __repr__(self): | ||||
return repr(self._render()) | return repr(self._render()) | ||||
@@ -273,24 +275,20 @@ class _ListProxy(_SliceNormalizerMixIn, list): | |||||
def __getitem__(self, key): | def __getitem__(self, key): | ||||
if isinstance(key, slice): | if isinstance(key, slice): | ||||
key = self._normalize_slice(key) | |||||
if key.stop == maxsize: | |||||
keystop = self._stop | |||||
else: | |||||
keystop = key.stop + self._start | |||||
adjusted = slice(key.start + self._start, keystop, key.step) | |||||
key = self._normalize_slice(key, clamp=True) | |||||
keystart = min(self._start + key.start, self._stop) | |||||
keystop = min(self._start + key.stop, self._stop) | |||||
adjusted = slice(keystart, keystop, key.step) | |||||
return self._parent[adjusted] | return self._parent[adjusted] | ||||
else: | else: | ||||
return self._render()[key] | return self._render()[key] | ||||
def __setitem__(self, key, item): | def __setitem__(self, key, item): | ||||
if isinstance(key, slice): | if isinstance(key, slice): | ||||
key = self._normalize_slice(key) | |||||
if key.stop == maxsize: | |||||
keystop = self._stop | |||||
else: | |||||
keystop = key.stop + self._start | |||||
adjusted = slice(key.start + self._start, keystop, key.step) | |||||
key = self._normalize_slice(key, clamp=True) | |||||
keystart = min(self._start + key.start, self._stop) | |||||
keystop = min(self._start + key.stop, self._stop) | |||||
adjusted = slice(keystart, keystop, key.step) | |||||
self._parent[adjusted] = item | self._parent[adjusted] = item | ||||
else: | else: | ||||
length = len(self) | length = len(self) | ||||
@@ -302,12 +300,10 @@ class _ListProxy(_SliceNormalizerMixIn, list): | |||||
def __delitem__(self, key): | def __delitem__(self, key): | ||||
if isinstance(key, slice): | if isinstance(key, slice): | ||||
key = self._normalize_slice(key) | |||||
if key.stop == maxsize: | |||||
keystop = self._stop | |||||
else: | |||||
keystop = key.stop + self._start | |||||
adjusted = slice(key.start + self._start, keystop, key.step) | |||||
key = self._normalize_slice(key, clamp=True) | |||||
keystart = min(self._start + key.start, self._stop) | |||||
keystop = min(self._start + key.stop, self._stop) | |||||
adjusted = slice(keystart, keystop, key.step) | |||||
del self._parent[adjusted] | del self._parent[adjusted] | ||||
else: | else: | ||||
length = len(self) | length = len(self) | ||||
@@ -370,7 +366,7 @@ class _ListProxy(_SliceNormalizerMixIn, list): | |||||
@property | @property | ||||
def _stop(self): | def _stop(self): | ||||
"""The ending index of this list, exclusive.""" | """The ending index of this list, exclusive.""" | ||||
if self._sliceinfo[1] == maxsize: | |||||
if self._sliceinfo[1] is None: | |||||
return len(self._parent) | return len(self._parent) | ||||
return self._sliceinfo[1] | return self._sliceinfo[1] | ||||
@@ -456,17 +452,5 @@ class _ListProxy(_SliceNormalizerMixIn, list): | |||||
item.sort(**kwargs) | item.sort(**kwargs) | ||||
self._parent[self._start:self._stop:self._step] = item | self._parent[self._start:self._stop:self._step] = item | ||||
def detach(self): | |||||
"""Detach the child so it operates like a normal list. | |||||
This allows children to be properly garbage-collected if their parent | |||||
is being kept around for a long time. This method has no effect if the | |||||
child is already detached. | |||||
""" | |||||
if not self._detached: | |||||
self._parent._children.pop(id(self)) | |||||
self._parent = list(self._parent) | |||||
self._detached = True | |||||
del inheritdoc | del inheritdoc |
@@ -0,0 +1,3 @@ | |||||
This directory contains support files used for *developing* mwparserfromhell, | |||||
not running it. If you are looking for code examples, read the documentation | |||||
or explore the source code. |
@@ -31,6 +31,13 @@ update_version() { | |||||
echo " done." | echo " done." | ||||
} | } | ||||
update_appveyor() { | |||||
filename="appveyor.yml" | |||||
echo -n "Updating $filename..." | |||||
sed -e "s/version: .*/version: $VERSION-b{build}/" -i "" $filename | |||||
echo " done." | |||||
} | |||||
update_changelog() { | update_changelog() { | ||||
filename="CHANGELOG" | filename="CHANGELOG" | ||||
echo -n "Updating $filename..." | echo -n "Updating $filename..." | ||||
@@ -67,25 +74,18 @@ do_git_stuff() { | |||||
} | } | ||||
upload_to_pypi() { | upload_to_pypi() { | ||||
# TODO: check whether these commands give output | |||||
echo -n "PyPI: uploading source tarball and docs..." | echo -n "PyPI: uploading source tarball and docs..." | ||||
python setup.py register sdist upload -s | |||||
python setup.py upload_docs | |||||
python setup.py -q register sdist upload -s | |||||
python setup.py -q upload_docs | |||||
echo " done." | echo " done." | ||||
} | } | ||||
windows_build() { | |||||
echo "PyPI: building/uploading Windows binaries..." | |||||
echo "*** Run in Windows: ./scripts/win_build.py" | |||||
echo "*** Press enter when done." | |||||
read | |||||
} | |||||
post_release() { | post_release() { | ||||
echo | echo | ||||
echo "*** Release completed." | echo "*** Release completed." | ||||
echo "*** Update: https://github.com/earwig/mwparserfromhell/releases/tag/v$VERSION" | echo "*** Update: https://github.com/earwig/mwparserfromhell/releases/tag/v$VERSION" | ||||
echo "*** Verify: https://pypi.python.org/pypi/mwparserfromhell" | echo "*** Verify: https://pypi.python.org/pypi/mwparserfromhell" | ||||
echo "*** Verify: https://ci.appveyor.com/project/earwig/mwparserfromhell" | |||||
echo "*** Verify: https://mwparserfromhell.readthedocs.org" | echo "*** Verify: https://mwparserfromhell.readthedocs.org" | ||||
echo "*** Press enter to sanity-check the release." | echo "*** Press enter to sanity-check the release." | ||||
read | read | ||||
@@ -153,11 +153,11 @@ cd "$SCRIPT_DIR/.." | |||||
check_git | check_git | ||||
update_version | update_version | ||||
update_appveyor | |||||
update_changelog | update_changelog | ||||
update_docs_changelog | update_docs_changelog | ||||
do_git_stuff | do_git_stuff | ||||
upload_to_pypi | upload_to_pypi | ||||
windows_build | |||||
post_release | post_release | ||||
test_release | test_release | ||||
@@ -1,58 +0,0 @@ | |||||
# Build requirements: | |||||
# | |||||
# Python 2.6-3.2: Visual C++ Express Edition 2008: | |||||
# http://go.microsoft.com/?linkid=7729279 | |||||
# | |||||
# Python 3.3+: Visual C++ Express Edition 2010: | |||||
# http://go.microsoft.com/?linkid=9709949 | |||||
# | |||||
# x64 builds: Microsoft Windows SDK for Windows 7 and .NET Framework 3.5 SP1: | |||||
# http://www.microsoft.com/en-us/download/details.aspx?id=3138 | |||||
# | |||||
# Python interpreter, 2.6, 2.7, 3.2-3.4: | |||||
# https://www.python.org/downloads/ | |||||
# | |||||
# Pip, setuptools, wheel: | |||||
# https://bootstrap.pypa.io/get-pip.py | |||||
# and run *for each* Python version: | |||||
# c:\pythonXX\python get-pip.py | |||||
# c:\pythonXX\scripts\pip install wheel | |||||
# | |||||
# Afterwards, run this script with any of the python interpreters (2.7 suggested) | |||||
from __future__ import print_function | |||||
import os | |||||
from subprocess import call, STDOUT | |||||
ENVIRONMENTS = ["26", "27", "32", "33", "34"] | |||||
def run(pyver, cmds): | |||||
cmd = [r"C:\Python%s\Python.exe" % pyver, "setup.py"] + cmds | |||||
print(" ".join(cmd), end=" ") | |||||
with open("%s%s.log" % (cmds[0], pyver), "w") as logfile: | |||||
retval = call(cmd, stdout=logfile, stderr=STDOUT, cwd="..") | |||||
if not retval: | |||||
print("[OK]") | |||||
else: | |||||
print("[FAILED (%i)]" % retval) | |||||
return retval | |||||
def main(): | |||||
path = os.path.split(__file__)[0] | |||||
if path: | |||||
os.chdir(path) | |||||
print("Building Windows wheels for Python %s:" % ", ".join(ENVIRONMENTS)) | |||||
for pyver in ENVIRONMENTS: | |||||
print() | |||||
try: | |||||
os.unlink("mwparserfromhell/parser/_tokenizer.pyd") | |||||
except OSError: | |||||
pass | |||||
if run(pyver, ["test"]) == 0: | |||||
run(pyver, ["bdist_wheel", "upload"]) # TODO: add "-s" to GPG sign | |||||
if __name__ == "__main__": | |||||
main() |
@@ -0,0 +1,43 @@ | |||||
:: To build extensions for 64 bit Python 3, we need to configure environment | |||||
:: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: | |||||
:: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) | |||||
:: | |||||
:: To build extensions for 64 bit Python 2, we need to configure environment | |||||
:: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: | |||||
:: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) | |||||
:: | |||||
:: 32 bit builds do not require specific environment configurations. | |||||
:: | |||||
:: Note: this script needs to be run with the /E:ON and /V:ON flags for the | |||||
:: cmd interpreter, at least for (SDK v7.0) | |||||
:: | |||||
:: More details at: | |||||
:: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows | |||||
:: http://stackoverflow.com/a/13751649/163740 | |||||
:: | |||||
:: Author: Olivier Grisel | |||||
:: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ | |||||
@ECHO OFF | |||||
SET COMMAND_TO_RUN=%* | |||||
SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows | |||||
SET MAJOR_PYTHON_VERSION="%PYTHON_VERSION:~0,1%" | |||||
IF %MAJOR_PYTHON_VERSION% == "2" ( | |||||
SET WINDOWS_SDK_VERSION="v7.0" | |||||
) ELSE IF %MAJOR_PYTHON_VERSION% == "3" ( | |||||
SET WINDOWS_SDK_VERSION="v7.1" | |||||
) ELSE ( | |||||
ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" | |||||
EXIT 1 | |||||
) | |||||
IF "%PYTHON_ARCH%"=="64" ( | |||||
SET DISTUTILS_USE_SDK=1 | |||||
SET MSSdk=1 | |||||
"%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% | |||||
"%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release | |||||
call %COMMAND_TO_RUN% || EXIT 1 | |||||
) ELSE ( | |||||
call %COMMAND_TO_RUN% || EXIT 1 | |||||
) |
@@ -21,17 +21,18 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
import os | |||||
from __future__ import print_function | |||||
from distutils.errors import DistutilsError, CCompilerError | |||||
from glob import glob | |||||
from os import environ | |||||
import sys | import sys | ||||
if (sys.version_info[0] == 2 and sys.version_info[1] < 6) or \ | |||||
(sys.version_info[1] == 3 and sys.version_info[1] < 2): | |||||
raise Exception("mwparserfromhell needs Python 2.6+ or 3.2+") | |||||
if sys.version_info >= (3, 0): | |||||
basestring = (str, ) | |||||
if ((sys.version_info[0] == 2 and sys.version_info[1] < 6) or | |||||
(sys.version_info[1] == 3 and sys.version_info[1] < 2)): | |||||
raise RuntimeError("mwparserfromhell needs Python 2.6+ or 3.2+") | |||||
from setuptools import setup, find_packages, Extension | from setuptools import setup, find_packages, Extension | ||||
from setuptools.command.build_ext import build_ext | |||||
from mwparserfromhell import __version__ | from mwparserfromhell import __version__ | ||||
from mwparserfromhell.compat import py26, py3k | from mwparserfromhell.compat import py26, py3k | ||||
@@ -39,70 +40,48 @@ from mwparserfromhell.compat import py26, py3k | |||||
with open("README.rst", **({'encoding':'utf-8'} if py3k else {})) as fp: | with open("README.rst", **({'encoding':'utf-8'} if py3k else {})) as fp: | ||||
long_docs = fp.read() | long_docs = fp.read() | ||||
tokenizer = Extension("mwparserfromhell.parser._tokenizer", | |||||
sources=["mwparserfromhell/parser/tokenizer.c"], | |||||
depends=["mwparserfromhell/parser/tokenizer.h"]) | |||||
use_extension = True | use_extension = True | ||||
fallback = True | |||||
# Allow env var WITHOUT_EXTENSION and args --with[out]-extension | |||||
if '--without-extension' in sys.argv: | |||||
use_extension = False | |||||
elif '--with-extension' in sys.argv: | |||||
pass | |||||
elif os.environ.get('WITHOUT_EXTENSION', '0') == '1': | |||||
use_extension = False | |||||
# Allow env var WITHOUT_EXTENSION and args --with[out]-extension: | |||||
# Remove the command line argument as it isn't understood by | |||||
# setuptools/distutils | |||||
sys.argv = [arg for arg in sys.argv | |||||
if not arg.startswith('--with') | |||||
and not arg.endswith('-extension')] | |||||
def optional_compile_setup(func=setup, use_ext=use_extension, | |||||
*args, **kwargs): | |||||
""" | |||||
Wrap setup to allow optional compilation of extensions. | |||||
env_var = environ.get("WITHOUT_EXTENSION") | |||||
if "--without-extension" in sys.argv: | |||||
use_extension = False | |||||
elif "--with-extension" in sys.argv: | |||||
fallback = False | |||||
elif env_var is not None: | |||||
if env_var == "1": | |||||
use_extension = False | |||||
elif env_var == "0": | |||||
fallback = False | |||||
Falls back to pure python mode (no extensions) | |||||
if compilation of extensions fails. | |||||
""" | |||||
extensions = kwargs.get('ext_modules', None) | |||||
# Remove the command line argument as it isn't understood by setuptools: | |||||
if use_ext and extensions: | |||||
try: | |||||
func(*args, **kwargs) | |||||
return | |||||
except SystemExit as e: | |||||
assert(e.args) | |||||
if e.args[0] is False: | |||||
raise | |||||
elif isinstance(e.args[0], basestring): | |||||
if e.args[0].startswith('usage: '): | |||||
raise | |||||
else: | |||||
# Fallback to pure python mode | |||||
print('setup with extension failed: %s' % repr(e)) | |||||
pass | |||||
except Exception as e: | |||||
print('setup with extension failed: %s' % repr(e)) | |||||
sys.argv = [arg for arg in sys.argv | |||||
if arg != "--without-extension" and arg != "--with-extension"] | |||||
if extensions: | |||||
if use_ext: | |||||
print('Falling back to pure python mode.') | |||||
else: | |||||
print('Using pure python mode.') | |||||
def build_ext_patched(self): | |||||
try: | |||||
build_ext_original(self) | |||||
except (DistutilsError, CCompilerError) as exc: | |||||
print("error: " + str(exc)) | |||||
print("Falling back to pure Python mode.") | |||||
del self.extensions[:] | |||||
del kwargs['ext_modules'] | |||||
if fallback: | |||||
build_ext.run, build_ext_original = build_ext_patched, build_ext.run | |||||
func(*args, **kwargs) | |||||
# Project-specific part begins here: | |||||
tokenizer = Extension("mwparserfromhell.parser._tokenizer", | |||||
sources=glob("mwparserfromhell/parser/ctokenizer/*.c"), | |||||
depends=glob("mwparserfromhell/parser/ctokenizer/*.h")) | |||||
optional_compile_setup( | |||||
setup( | |||||
name = "mwparserfromhell", | name = "mwparserfromhell", | ||||
packages = find_packages(exclude=("tests",)), | packages = find_packages(exclude=("tests",)), | ||||
ext_modules = [tokenizer], | |||||
ext_modules = [tokenizer] if use_extension else [], | |||||
tests_require = ["unittest2"] if py26 else [], | tests_require = ["unittest2"] if py26 else [], | ||||
test_suite = "tests.discover", | test_suite = "tests.discover", | ||||
version = __version__, | version = __version__, | ||||
@@ -42,8 +42,8 @@ class TokenizerTestCase(object): | |||||
directory. | directory. | ||||
""" | """ | ||||
@classmethod | |||||
def _build_test_method(cls, funcname, data): | |||||
@staticmethod | |||||
def _build_test_method(funcname, data): | |||||
"""Create and return a method to be treated as a test case method. | """Create and return a method to be treated as a test case method. | ||||
*data* is a dict containing multiple keys: the *input* text to be | *data* is a dict containing multiple keys: the *input* text to be | ||||
@@ -58,13 +58,35 @@ class TokenizerTestCase(object): | |||||
expected = data["output"] | expected = data["output"] | ||||
actual = self.tokenizer().tokenize(data["input"]) | actual = self.tokenizer().tokenize(data["input"]) | ||||
self.assertEqual(expected, actual) | self.assertEqual(expected, actual) | ||||
if not py3k: | if not py3k: | ||||
inner.__name__ = funcname.encode("utf8") | inner.__name__ = funcname.encode("utf8") | ||||
inner.__doc__ = data["label"] | inner.__doc__ = data["label"] | ||||
return inner | return inner | ||||
@staticmethod | |||||
def _parse_test(test, data): | |||||
"""Parse an individual *test*, storing its info in *data*.""" | |||||
for line in test.strip().splitlines(): | |||||
if line.startswith("name:"): | |||||
data["name"] = line[len("name:"):].strip() | |||||
elif line.startswith("label:"): | |||||
data["label"] = line[len("label:"):].strip() | |||||
elif line.startswith("input:"): | |||||
raw = line[len("input:"):].strip() | |||||
if raw[0] == '"' and raw[-1] == '"': | |||||
raw = raw[1:-1] | |||||
raw = raw.encode("raw_unicode_escape") | |||||
data["input"] = raw.decode("unicode_escape") | |||||
elif line.startswith("output:"): | |||||
raw = line[len("output:"):].strip() | |||||
try: | |||||
data["output"] = eval(raw, vars(tokens)) | |||||
except Exception as err: | |||||
raise _TestParseError(err) | |||||
@classmethod | @classmethod | ||||
def _load_tests(cls, filename, name, text): | |||||
def _load_tests(cls, filename, name, text, restrict=None): | |||||
"""Load all tests in *text* from the file *filename*.""" | """Load all tests in *text* from the file *filename*.""" | ||||
tests = text.split("\n---\n") | tests = text.split("\n---\n") | ||||
counter = 1 | counter = 1 | ||||
@@ -72,23 +94,7 @@ class TokenizerTestCase(object): | |||||
for test in tests: | for test in tests: | ||||
data = {"name": None, "label": None, "input": None, "output": None} | data = {"name": None, "label": None, "input": None, "output": None} | ||||
try: | try: | ||||
for line in test.strip().splitlines(): | |||||
if line.startswith("name:"): | |||||
data["name"] = line[len("name:"):].strip() | |||||
elif line.startswith("label:"): | |||||
data["label"] = line[len("label:"):].strip() | |||||
elif line.startswith("input:"): | |||||
raw = line[len("input:"):].strip() | |||||
if raw[0] == '"' and raw[-1] == '"': | |||||
raw = raw[1:-1] | |||||
raw = raw.encode("raw_unicode_escape") | |||||
data["input"] = raw.decode("unicode_escape") | |||||
elif line.startswith("output:"): | |||||
raw = line[len("output:"):].strip() | |||||
try: | |||||
data["output"] = eval(raw, vars(tokens)) | |||||
except Exception as err: | |||||
raise _TestParseError(err) | |||||
cls._parse_test(test, data) | |||||
except _TestParseError as err: | except _TestParseError as err: | ||||
if data["name"]: | if data["name"]: | ||||
error = "Could not parse test '{0}' in '{1}':\n\t{2}" | error = "Could not parse test '{0}' in '{1}':\n\t{2}" | ||||
@@ -97,6 +103,7 @@ class TokenizerTestCase(object): | |||||
error = "Could not parse a test in '{0}':\n\t{1}" | error = "Could not parse a test in '{0}':\n\t{1}" | ||||
print(error.format(filename, err)) | print(error.format(filename, err)) | ||||
continue | continue | ||||
if not data["name"]: | if not data["name"]: | ||||
error = "A test in '{0}' was ignored because it lacked a name" | error = "A test in '{0}' was ignored because it lacked a name" | ||||
print(error.format(filename)) | print(error.format(filename)) | ||||
@@ -105,27 +112,35 @@ class TokenizerTestCase(object): | |||||
error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output" | error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output" | ||||
print(error.format(data["name"], filename)) | print(error.format(data["name"], filename)) | ||||
continue | continue | ||||
number = str(counter).zfill(digits) | number = str(counter).zfill(digits) | ||||
counter += 1 | |||||
if restrict and data["name"] != restrict: | |||||
continue | |||||
fname = "test_{0}{1}_{2}".format(name, number, data["name"]) | fname = "test_{0}{1}_{2}".format(name, number, data["name"]) | ||||
meth = cls._build_test_method(fname, data) | meth = cls._build_test_method(fname, data) | ||||
setattr(cls, fname, meth) | setattr(cls, fname, meth) | ||||
counter += 1 | |||||
@classmethod | @classmethod | ||||
def build(cls): | def build(cls): | ||||
"""Load and install all tests from the 'tokenizer' directory.""" | """Load and install all tests from the 'tokenizer' directory.""" | ||||
def load_file(filename): | |||||
def load_file(filename, restrict=None): | |||||
with codecs.open(filename, "rU", encoding="utf8") as fp: | with codecs.open(filename, "rU", encoding="utf8") as fp: | ||||
text = fp.read() | text = fp.read() | ||||
name = path.split(filename)[1][:0-len(extension)] | |||||
cls._load_tests(filename, name, text) | |||||
name = path.split(filename)[1][:-len(extension)] | |||||
cls._load_tests(filename, name, text, restrict) | |||||
directory = path.join(path.dirname(__file__), "tokenizer") | directory = path.join(path.dirname(__file__), "tokenizer") | ||||
extension = ".mwtest" | extension = ".mwtest" | ||||
if len(sys.argv) > 2 and sys.argv[1] == "--use": | if len(sys.argv) > 2 and sys.argv[1] == "--use": | ||||
for name in sys.argv[2:]: | for name in sys.argv[2:]: | ||||
load_file(path.join(directory, name + extension)) | |||||
sys.argv = [sys.argv[0]] # So unittest doesn't try to load these | |||||
if "." in name: | |||||
name, test = name.split(".", 1) | |||||
else: | |||||
test = None | |||||
load_file(path.join(directory, name + extension), test) | |||||
sys.argv = [sys.argv[0]] # So unittest doesn't try to parse this | |||||
cls.skip_others = True | cls.skip_others = True | ||||
else: | else: | ||||
for filename in listdir(directory): | for filename in listdir(directory): | ||||
@@ -52,6 +52,7 @@ class TestSmartList(unittest.TestCase): | |||||
self.assertEqual([0, 1, 2], list1[:3]) | self.assertEqual([0, 1, 2], list1[:3]) | ||||
self.assertEqual([0, 1, 2, 3, "one", "two"], list1[:]) | self.assertEqual([0, 1, 2, 3, "one", "two"], list1[:]) | ||||
self.assertEqual([3, "one", "two"], list1[3:]) | self.assertEqual([3, "one", "two"], list1[3:]) | ||||
self.assertEqual([3, "one", "two"], list1[3:100]) | |||||
self.assertEqual(["one", "two"], list1[-2:]) | self.assertEqual(["one", "two"], list1[-2:]) | ||||
self.assertEqual([0, 1], list1[:-4]) | self.assertEqual([0, 1], list1[:-4]) | ||||
self.assertEqual([], list1[6:]) | self.assertEqual([], list1[6:]) | ||||
@@ -389,28 +390,35 @@ class TestSmartList(unittest.TestCase): | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) | self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) | ||||
self.assertEqual([4, 3, 2, 1.9, 1.8], child2) | self.assertEqual([4, 3, 2, 1.9, 1.8], child2) | ||||
child1.detach() | |||||
self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent) | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) | |||||
child3 = parent[9:] | |||||
self.assertEqual([8, 8.1, 8.2], child3) | |||||
del parent[8:] | |||||
self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6], parent) | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6], child1) | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8], child2) | |||||
self.assertEqual([], child3) | |||||
del child1 | |||||
self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6], parent) | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8], child2) | |||||
self.assertEqual([], child3) | |||||
self.assertEqual(2, len(parent._children)) | |||||
del child3 | |||||
self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6], parent) | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8], child2) | self.assertEqual([4, 3, 2, 1.9, 1.8], child2) | ||||
self.assertEqual(1, len(parent._children)) | self.assertEqual(1, len(parent._children)) | ||||
parent.remove(1.9) | parent.remove(1.9) | ||||
parent.remove(1.8) | parent.remove(1.8) | ||||
self.assertEqual([1, 4, 3, 2, 5, 6, 7, 8, 8.1, 8.2], parent) | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) | |||||
self.assertEqual([1, 4, 3, 2, 5, 6], parent) | |||||
self.assertEqual([4, 3, 2], child2) | self.assertEqual([4, 3, 2], child2) | ||||
parent.reverse() | parent.reverse() | ||||
self.assertEqual([8.2, 8.1, 8, 7, 6, 5, 2, 3, 4, 1], parent) | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) | |||||
self.assertEqual([6, 5, 2, 3, 4, 1], parent) | |||||
self.assertEqual([4, 3, 2], child2) | self.assertEqual([4, 3, 2], child2) | ||||
self.assertEqual(0, len(parent._children)) | self.assertEqual(0, len(parent._children)) | ||||
child2.detach() | |||||
self.assertEqual([8.2, 8.1, 8, 7, 6, 5, 2, 3, 4, 1], parent) | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) | |||||
self.assertEqual([4, 3, 2], child2) | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
unittest.main(verbosity=2) | unittest.main(verbosity=2) |
@@ -27,6 +27,6 @@ output: [Text(text="𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰")] | |||||
--- | --- | ||||
name: large | name: large | ||||
label: a lot of text, requiring multiple textbuffer blocks in the C tokenizer | |||||
label: a lot of text, requiring proper storage in the C tokenizer | |||||
input: "ZWfsZYcZyhGbkDYJiguJuuhsNyHGFkFhnjkbLJyXIygTHqcXdhsDkEOTSIKYlBiohLIkiXxvyebUyCGvvBcYqFdtcftGmaAanKXEIyYSEKlTfEEbdGhdePVwVImOyKiHSzAEuGyEVRIKPZaNjQsYqpqARIQfvAklFtQyTJVGlLwjJIxYkiqmHBmdOvTyNqJRbMvouoqXRyOhYDwowtkcZGSOcyzVxibQdnzhDYbrgbatUrlOMRvFSzmLWHRihtXnddwYadPgFWUOxAzAgddJVDXHerawdkrRuWaEXfuwQSkQUmLEJUmrgXDVlXCpciaisfuOUjBldElygamkkXbewzLucKRnAEBimIIotXeslRRhnqQjrypnLQvvdCsKFWPVTZaHvzJMFEahDHWcCbyXgxFvknWjhVfiLSDuFhGoFxqSvhjnnRZLmCMhmWeOgSoanDEInKTWHnbpKyUlabLppITDFFxyWKAnUYJQIcmYnrvMmzmtYvsbCYbebgAhMFVVFAKUSvlkLFYluDpbpBaNFWyfXTaOdSBrfiHDTWGBTUCXMqVvRCIMrEjWpQaGsABkioGnveQWqBTDdRQlxQiUipwfyqAocMddXqdvTHhEwjEzMkOSWVPjJvDtClhYwpvRztPmRKCSpGIpXQqrYtTLmShFdpKtOxGtGOZYIdyUGPjdmyvhJTQMtgYJWUUZnecRjBfQXsyWQWikyONySLzLEqRFqcJYdRNFcGwWZtfZasfFWcvdsHRXoqKlKYihRAOJdrPBDdxksXFwKceQVncmFXfUfBsNgjKzoObVExSnRnjegeEhqxXzPmFcuiasViAFeaXrAxXhSfSyCILkKYpjxNeKynUmdcGAbwRwRnlAFbOSCafmzXddiNpLCFTHBELvArdXFpKUGpSHRekhrMedMRNkQzmSyFKjVwiWwCvbNWjgxJRzYeRxHiCCRMXktmKBxbxGZvOpvZIJOwvGIxcBLzsMFlDqAMLtScdsJtrbIUAvKfcdChXGnBzIxGxXMgxJhayrziaCswdpjJJJhkaYnGhHXqZwOzHFdhhUIEtfjERdLaSPRTDDMHpQtonNaIgXUYhjdbnnKppfMBxgNSOOXJAPtFjfAKnrRDrumZBpNhxMstqjTGBViRkDqbTdXYUirsedifGYzZpQkvdNhtFTOPgsYXYCwZHLcSLSfwfpQKtWfZuRUUryHJsbVsAOQcIJdSKKlOvCeEjUQNRPHKXuBJUjPuaAJJxcDMqyaufqfVwUmHLdjeYZzSiiGLHOTCInpVAalbXXTMLugLiwFiyPSuSFiyJUKVrWjbZAHaJtZnQmnvorRrxdPKThqXzNgTjszQiCoMczRnwGYJMERUWGXFyrSbAqsHmLwLlnJOJoXNsjVehQjVOpQOQJAZWwFZBlgyVIplzLTlFwumPgBLYrUIAJAcmvHPGfHfWQguCjfTYzxYfbohaLFAPwxFRrNuCdCzLlEbuhyYjCmuDBTJDMCdLpNRVqEALjnPSaBPsKWRCKNGwEMFpiEWbYZRwaMopjoUuBUvMpvyLfsPKDrfQLiFOQIWPtLIMoijUEUYfhykHrSKbTtrvjwIzHdWZDVwLIpNkloCqpzIsErxxKAFuFEjikWNYChqYqVslXMtoSWzNhbMuxYbzLfJIcPGoUeGPkGyPQNhDyrjgdKekzftFrRPTuyLYqCArkDcWHTrjPQHfoThBNnTQyMwLEWxEnBXLtzJmFVLGEPrdbEwlXpgYfnVnWoNXgPQKKyiXifpvrmJATzQOzYwFhliiYxlbnsEPKbHYUfJLrwYPfSUwTIHiEvBFMrEtVmqJobfcwsiiEudTIiAnrtuywgKLOiMYbEIOAOJdOXqroPjWnQQcTNxFvkIEIsuHLyhSqSphuSmlvknzydQEnebOreeZwOouXYKlObAkaWHhOdTFLoMCHOWrVKeXjcniaxtgCziKEqWOZUWHJQpcDJzYnnduDZrmxgjZroBRwoPBUTJMYipsgJwbTSlvMyXXdAmiEWGMiQxhGvHGPLOKeTxNaLnFVbWpiYIVyqN" | input: "ZWfsZYcZyhGbkDYJiguJuuhsNyHGFkFhnjkbLJyXIygTHqcXdhsDkEOTSIKYlBiohLIkiXxvyebUyCGvvBcYqFdtcftGmaAanKXEIyYSEKlTfEEbdGhdePVwVImOyKiHSzAEuGyEVRIKPZaNjQsYqpqARIQfvAklFtQyTJVGlLwjJIxYkiqmHBmdOvTyNqJRbMvouoqXRyOhYDwowtkcZGSOcyzVxibQdnzhDYbrgbatUrlOMRvFSzmLWHRihtXnddwYadPgFWUOxAzAgddJVDXHerawdkrRuWaEXfuwQSkQUmLEJUmrgXDVlXCpciaisfuOUjBldElygamkkXbewzLucKRnAEBimIIotXeslRRhnqQjrypnLQvvdCsKFWPVTZaHvzJMFEahDHWcCbyXgxFvknWjhVfiLSDuFhGoFxqSvhjnnRZLmCMhmWeOgSoanDEInKTWHnbpKyUlabLppITDFFxyWKAnUYJQIcmYnrvMmzmtYvsbCYbebgAhMFVVFAKUSvlkLFYluDpbpBaNFWyfXTaOdSBrfiHDTWGBTUCXMqVvRCIMrEjWpQaGsABkioGnveQWqBTDdRQlxQiUipwfyqAocMddXqdvTHhEwjEzMkOSWVPjJvDtClhYwpvRztPmRKCSpGIpXQqrYtTLmShFdpKtOxGtGOZYIdyUGPjdmyvhJTQMtgYJWUUZnecRjBfQXsyWQWikyONySLzLEqRFqcJYdRNFcGwWZtfZasfFWcvdsHRXoqKlKYihRAOJdrPBDdxksXFwKceQVncmFXfUfBsNgjKzoObVExSnRnjegeEhqxXzPmFcuiasViAFeaXrAxXhSfSyCILkKYpjxNeKynUmdcGAbwRwRnlAFbOSCafmzXddiNpLCFTHBELvArdXFpKUGpSHRekhrMedMRNkQzmSyFKjVwiWwCvbNWjgxJRzYeRxHiCCRMXktmKBxbxGZvOpvZIJOwvGIxcBLzsMFlDqAMLtScdsJtrbIUAvKfcdChXGnBzIxGxXMgxJhayrziaCswdpjJJJhkaYnGhHXqZwOzHFdhhUIEtfjERdLaSPRTDDMHpQtonNaIgXUYhjdbnnKppfMBxgNSOOXJAPtFjfAKnrRDrumZBpNhxMstqjTGBViRkDqbTdXYUirsedifGYzZpQkvdNhtFTOPgsYXYCwZHLcSLSfwfpQKtWfZuRUUryHJsbVsAOQcIJdSKKlOvCeEjUQNRPHKXuBJUjPuaAJJxcDMqyaufqfVwUmHLdjeYZzSiiGLHOTCInpVAalbXXTMLugLiwFiyPSuSFiyJUKVrWjbZAHaJtZnQmnvorRrxdPKThqXzNgTjszQiCoMczRnwGYJMERUWGXFyrSbAqsHmLwLlnJOJoXNsjVehQjVOpQOQJAZWwFZBlgyVIplzLTlFwumPgBLYrUIAJAcmvHPGfHfWQguCjfTYzxYfbohaLFAPwxFRrNuCdCzLlEbuhyYjCmuDBTJDMCdLpNRVqEALjnPSaBPsKWRCKNGwEMFpiEWbYZRwaMopjoUuBUvMpvyLfsPKDrfQLiFOQIWPtLIMoijUEUYfhykHrSKbTtrvjwIzHdWZDVwLIpNkloCqpzIsErxxKAFuFEjikWNYChqYqVslXMtoSWzNhbMuxYbzLfJIcPGoUeGPkGyPQNhDyrjgdKekzftFrRPTuyLYqCArkDcWHTrjPQHfoThBNnTQyMwLEWxEnBXLtzJmFVLGEPrdbEwlXpgYfnVnWoNXgPQKKyiXifpvrmJATzQOzYwFhliiYxlbnsEPKbHYUfJLrwYPfSUwTIHiEvBFMrEtVmqJobfcwsiiEudTIiAnrtuywgKLOiMYbEIOAOJdOXqroPjWnQQcTNxFvkIEIsuHLyhSqSphuSmlvknzydQEnebOreeZwOouXYKlObAkaWHhOdTFLoMCHOWrVKeXjcniaxtgCziKEqWOZUWHJQpcDJzYnnduDZrmxgjZroBRwoPBUTJMYipsgJwbTSlvMyXXdAmiEWGMiQxhGvHGPLOKeTxNaLnFVbWpiYIVyqN" | ||||
output: [Text(text="ZWfsZYcZyhGbkDYJiguJuuhsNyHGFkFhnjkbLJyXIygTHqcXdhsDkEOTSIKYlBiohLIkiXxvyebUyCGvvBcYqFdtcftGmaAanKXEIyYSEKlTfEEbdGhdePVwVImOyKiHSzAEuGyEVRIKPZaNjQsYqpqARIQfvAklFtQyTJVGlLwjJIxYkiqmHBmdOvTyNqJRbMvouoqXRyOhYDwowtkcZGSOcyzVxibQdnzhDYbrgbatUrlOMRvFSzmLWHRihtXnddwYadPgFWUOxAzAgddJVDXHerawdkrRuWaEXfuwQSkQUmLEJUmrgXDVlXCpciaisfuOUjBldElygamkkXbewzLucKRnAEBimIIotXeslRRhnqQjrypnLQvvdCsKFWPVTZaHvzJMFEahDHWcCbyXgxFvknWjhVfiLSDuFhGoFxqSvhjnnRZLmCMhmWeOgSoanDEInKTWHnbpKyUlabLppITDFFxyWKAnUYJQIcmYnrvMmzmtYvsbCYbebgAhMFVVFAKUSvlkLFYluDpbpBaNFWyfXTaOdSBrfiHDTWGBTUCXMqVvRCIMrEjWpQaGsABkioGnveQWqBTDdRQlxQiUipwfyqAocMddXqdvTHhEwjEzMkOSWVPjJvDtClhYwpvRztPmRKCSpGIpXQqrYtTLmShFdpKtOxGtGOZYIdyUGPjdmyvhJTQMtgYJWUUZnecRjBfQXsyWQWikyONySLzLEqRFqcJYdRNFcGwWZtfZasfFWcvdsHRXoqKlKYihRAOJdrPBDdxksXFwKceQVncmFXfUfBsNgjKzoObVExSnRnjegeEhqxXzPmFcuiasViAFeaXrAxXhSfSyCILkKYpjxNeKynUmdcGAbwRwRnlAFbOSCafmzXddiNpLCFTHBELvArdXFpKUGpSHRekhrMedMRNkQzmSyFKjVwiWwCvbNWjgxJRzYeRxHiCCRMXktmKBxbxGZvOpvZIJOwvGIxcBLzsMFlDqAMLtScdsJtrbIUAvKfcdChXGnBzIxGxXMgxJhayrziaCswdpjJJJhkaYnGhHXqZwOzHFdhhUIEtfjERdLaSPRTDDMHpQtonNaIgXUYhjdbnnKppfMBxgNSOOXJAPtFjfAKnrRDrumZBpNhxMstqjTGBViRkDqbTdXYUirsedifGYzZpQkvdNhtFTOPgsYXYCwZHLcSLSfwfpQKtWfZuRUUryHJsbVsAOQcIJdSKKlOvCeEjUQNRPHKXuBJUjPuaAJJxcDMqyaufqfVwUmHLdjeYZzSiiGLHOTCInpVAalbXXTMLugLiwFiyPSuSFiyJUKVrWjbZAHaJtZnQmnvorRrxdPKThqXzNgTjszQiCoMczRnwGYJMERUWGXFyrSbAqsHmLwLlnJOJoXNsjVehQjVOpQOQJAZWwFZBlgyVIplzLTlFwumPgBLYrUIAJAcmvHPGfHfWQguCjfTYzxYfbohaLFAPwxFRrNuCdCzLlEbuhyYjCmuDBTJDMCdLpNRVqEALjnPSaBPsKWRCKNGwEMFpiEWbYZRwaMopjoUuBUvMpvyLfsPKDrfQLiFOQIWPtLIMoijUEUYfhykHrSKbTtrvjwIzHdWZDVwLIpNkloCqpzIsErxxKAFuFEjikWNYChqYqVslXMtoSWzNhbMuxYbzLfJIcPGoUeGPkGyPQNhDyrjgdKekzftFrRPTuyLYqCArkDcWHTrjPQHfoThBNnTQyMwLEWxEnBXLtzJmFVLGEPrdbEwlXpgYfnVnWoNXgPQKKyiXifpvrmJATzQOzYwFhliiYxlbnsEPKbHYUfJLrwYPfSUwTIHiEvBFMrEtVmqJobfcwsiiEudTIiAnrtuywgKLOiMYbEIOAOJdOXqroPjWnQQcTNxFvkIEIsuHLyhSqSphuSmlvknzydQEnebOreeZwOouXYKlObAkaWHhOdTFLoMCHOWrVKeXjcniaxtgCziKEqWOZUWHJQpcDJzYnnduDZrmxgjZroBRwoPBUTJMYipsgJwbTSlvMyXXdAmiEWGMiQxhGvHGPLOKeTxNaLnFVbWpiYIVyqN")] | output: [Text(text="ZWfsZYcZyhGbkDYJiguJuuhsNyHGFkFhnjkbLJyXIygTHqcXdhsDkEOTSIKYlBiohLIkiXxvyebUyCGvvBcYqFdtcftGmaAanKXEIyYSEKlTfEEbdGhdePVwVImOyKiHSzAEuGyEVRIKPZaNjQsYqpqARIQfvAklFtQyTJVGlLwjJIxYkiqmHBmdOvTyNqJRbMvouoqXRyOhYDwowtkcZGSOcyzVxibQdnzhDYbrgbatUrlOMRvFSzmLWHRihtXnddwYadPgFWUOxAzAgddJVDXHerawdkrRuWaEXfuwQSkQUmLEJUmrgXDVlXCpciaisfuOUjBldElygamkkXbewzLucKRnAEBimIIotXeslRRhnqQjrypnLQvvdCsKFWPVTZaHvzJMFEahDHWcCbyXgxFvknWjhVfiLSDuFhGoFxqSvhjnnRZLmCMhmWeOgSoanDEInKTWHnbpKyUlabLppITDFFxyWKAnUYJQIcmYnrvMmzmtYvsbCYbebgAhMFVVFAKUSvlkLFYluDpbpBaNFWyfXTaOdSBrfiHDTWGBTUCXMqVvRCIMrEjWpQaGsABkioGnveQWqBTDdRQlxQiUipwfyqAocMddXqdvTHhEwjEzMkOSWVPjJvDtClhYwpvRztPmRKCSpGIpXQqrYtTLmShFdpKtOxGtGOZYIdyUGPjdmyvhJTQMtgYJWUUZnecRjBfQXsyWQWikyONySLzLEqRFqcJYdRNFcGwWZtfZasfFWcvdsHRXoqKlKYihRAOJdrPBDdxksXFwKceQVncmFXfUfBsNgjKzoObVExSnRnjegeEhqxXzPmFcuiasViAFeaXrAxXhSfSyCILkKYpjxNeKynUmdcGAbwRwRnlAFbOSCafmzXddiNpLCFTHBELvArdXFpKUGpSHRekhrMedMRNkQzmSyFKjVwiWwCvbNWjgxJRzYeRxHiCCRMXktmKBxbxGZvOpvZIJOwvGIxcBLzsMFlDqAMLtScdsJtrbIUAvKfcdChXGnBzIxGxXMgxJhayrziaCswdpjJJJhkaYnGhHXqZwOzHFdhhUIEtfjERdLaSPRTDDMHpQtonNaIgXUYhjdbnnKppfMBxgNSOOXJAPtFjfAKnrRDrumZBpNhxMstqjTGBViRkDqbTdXYUirsedifGYzZpQkvdNhtFTOPgsYXYCwZHLcSLSfwfpQKtWfZuRUUryHJsbVsAOQcIJdSKKlOvCeEjUQNRPHKXuBJUjPuaAJJxcDMqyaufqfVwUmHLdjeYZzSiiGLHOTCInpVAalbXXTMLugLiwFiyPSuSFiyJUKVrWjbZAHaJtZnQmnvorRrxdPKThqXzNgTjszQiCoMczRnwGYJMERUWGXFyrSbAqsHmLwLlnJOJoXNsjVehQjVOpQOQJAZWwFZBlgyVIplzLTlFwumPgBLYrUIAJAcmvHPGfHfWQguCjfTYzxYfbohaLFAPwxFRrNuCdCzLlEbuhyYjCmuDBTJDMCdLpNRVqEALjnPSaBPsKWRCKNGwEMFpiEWbYZRwaMopjoUuBUvMpvyLfsPKDrfQLiFOQIWPtLIMoijUEUYfhykHrSKbTtrvjwIzHdWZDVwLIpNkloCqpzIsErxxKAFuFEjikWNYChqYqVslXMtoSWzNhbMuxYbzLfJIcPGoUeGPkGyPQNhDyrjgdKekzftFrRPTuyLYqCArkDcWHTrjPQHfoThBNnTQyMwLEWxEnBXLtzJmFVLGEPrdbEwlXpgYfnVnWoNXgPQKKyiXifpvrmJATzQOzYwFhliiYxlbnsEPKbHYUfJLrwYPfSUwTIHiEvBFMrEtVmqJobfcwsiiEudTIiAnrtuywgKLOiMYbEIOAOJdOXqroPjWnQQcTNxFvkIEIsuHLyhSqSphuSmlvknzydQEnebOreeZwOouXYKlObAkaWHhOdTFLoMCHOWrVKeXjcniaxtgCziKEqWOZUWHJQpcDJzYnnduDZrmxgjZroBRwoPBUTJMYipsgJwbTSlvMyXXdAmiEWGMiQxhGvHGPLOKeTxNaLnFVbWpiYIVyqN")] |