@@ -20,14 +20,18 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | SOFTWARE. | ||||
*/ | */ | ||||
#pragma once | |||||
#ifndef PY_SSIZE_T_CLEAN | #ifndef PY_SSIZE_T_CLEAN | ||||
#define PY_SSIZE_T_CLEAN | |||||
#define PY_SSIZE_T_CLEAN // See: https://docs.python.org/2/c-api/arg.html | |||||
#endif | #endif | ||||
#include <Python.h> | #include <Python.h> | ||||
#include <structmember.h> | #include <structmember.h> | ||||
#include <bytesobject.h> | #include <bytesobject.h> | ||||
/* Compatibility macros */ | |||||
#if PY_MAJOR_VERSION >= 3 | #if PY_MAJOR_VERSION >= 3 | ||||
#define IS_PY3K | #define IS_PY3K | ||||
#endif | #endif | ||||
@@ -36,5 +40,53 @@ SOFTWARE. | |||||
#define uint64_t unsigned PY_LONG_LONG | #define uint64_t unsigned PY_LONG_LONG | ||||
#endif | #endif | ||||
#define malloc PyObject_Malloc | |||||
#define malloc PyObject_Malloc // XXX: yuck | |||||
#define free PyObject_Free | #define free PyObject_Free | ||||
/* Error handling globals/macros */ | |||||
extern int route_state; // TODO: this is NOT thread-safe! | |||||
extern uint64_t route_context; | |||||
#define BAD_ROUTE route_state | |||||
#define BAD_ROUTE_CONTEXT route_context | |||||
#define FAIL_ROUTE(context) { route_state = 1; route_context = context; } | |||||
#define RESET_ROUTE() route_state = 0 | |||||
/* Shared globals */ | |||||
extern char** entitydefs; | |||||
extern PyObject* EMPTY; | |||||
extern PyObject* NOARGS; | |||||
extern PyObject* definitions; | |||||
/* Structs */ | |||||
struct Textbuffer { | |||||
Py_ssize_t size; | |||||
Py_UNICODE* data; | |||||
struct Textbuffer* prev; | |||||
struct Textbuffer* next; | |||||
}; | |||||
typedef struct Textbuffer Textbuffer; | |||||
struct Stack { | |||||
PyObject* stack; | |||||
uint64_t context; | |||||
struct Textbuffer* textbuffer; | |||||
struct Stack* next; | |||||
}; | |||||
typedef struct Stack Stack; | |||||
typedef struct { | |||||
PyObject_HEAD | |||||
PyObject* text; /* text to tokenize */ | |||||
Stack* topstack; /* topmost stack */ | |||||
Py_ssize_t head; /* current position in text */ | |||||
Py_ssize_t length; /* length of text */ | |||||
int global; /* global context */ | |||||
int depth; /* stack recursion depth */ | |||||
int cycles; /* total number of stack recursions */ | |||||
int skip_style_tags; /* temporary fix for the sometimes broken tag parser */ | |||||
} Tokenizer; |
@@ -0,0 +1,104 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#pragma once | |||||
/* Local contexts */ | |||||
#define LC_TEMPLATE 0x0000000000000007 | |||||
#define LC_TEMPLATE_NAME 0x0000000000000001 | |||||
#define LC_TEMPLATE_PARAM_KEY 0x0000000000000002 | |||||
#define LC_TEMPLATE_PARAM_VALUE 0x0000000000000004 | |||||
#define LC_ARGUMENT 0x0000000000000018 | |||||
#define LC_ARGUMENT_NAME 0x0000000000000008 | |||||
#define LC_ARGUMENT_DEFAULT 0x0000000000000010 | |||||
#define LC_WIKILINK 0x0000000000000060 | |||||
#define LC_WIKILINK_TITLE 0x0000000000000020 | |||||
#define LC_WIKILINK_TEXT 0x0000000000000040 | |||||
#define LC_EXT_LINK 0x0000000000000180 | |||||
#define LC_EXT_LINK_URI 0x0000000000000080 | |||||
#define LC_EXT_LINK_TITLE 0x0000000000000100 | |||||
#define LC_HEADING 0x0000000000007E00 | |||||
#define LC_HEADING_LEVEL_1 0x0000000000000200 | |||||
#define LC_HEADING_LEVEL_2 0x0000000000000400 | |||||
#define LC_HEADING_LEVEL_3 0x0000000000000800 | |||||
#define LC_HEADING_LEVEL_4 0x0000000000001000 | |||||
#define LC_HEADING_LEVEL_5 0x0000000000002000 | |||||
#define LC_HEADING_LEVEL_6 0x0000000000004000 | |||||
#define LC_TAG 0x0000000000078000 | |||||
#define LC_TAG_OPEN 0x0000000000008000 | |||||
#define LC_TAG_ATTR 0x0000000000010000 | |||||
#define LC_TAG_BODY 0x0000000000020000 | |||||
#define LC_TAG_CLOSE 0x0000000000040000 | |||||
#define LC_STYLE 0x0000000000780000 | |||||
#define LC_STYLE_ITALICS 0x0000000000080000 | |||||
#define LC_STYLE_BOLD 0x0000000000100000 | |||||
#define LC_STYLE_PASS_AGAIN 0x0000000000200000 | |||||
#define LC_STYLE_SECOND_PASS 0x0000000000400000 | |||||
#define LC_DLTERM 0x0000000000800000 | |||||
#define LC_SAFETY_CHECK 0x000000003F000000 | |||||
#define LC_HAS_TEXT 0x0000000001000000 | |||||
#define LC_FAIL_ON_TEXT 0x0000000002000000 | |||||
#define LC_FAIL_NEXT 0x0000000004000000 | |||||
#define LC_FAIL_ON_LBRACE 0x0000000008000000 | |||||
#define LC_FAIL_ON_RBRACE 0x0000000010000000 | |||||
#define LC_FAIL_ON_EQUALS 0x0000000020000000 | |||||
#define LC_TABLE 0x0000000FC0000000 | |||||
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000D00000000 | |||||
#define LC_TABLE_OPEN 0x0000000040000000 | |||||
#define LC_TABLE_CELL_OPEN 0x0000000080000000 | |||||
#define LC_TABLE_CELL_STYLE 0x0000000100000000 | |||||
#define LC_TABLE_ROW_OPEN 0x0000000200000000 | |||||
#define LC_TABLE_TD_LINE 0x0000000400000000 | |||||
#define LC_TABLE_TH_LINE 0x0000000800000000 | |||||
/* Global contexts */ | |||||
#define GL_HEADING 0x1 | |||||
/* Aggregate contexts */ | |||||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN) | |||||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN) | |||||
#define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | |||||
#define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | |||||
/* Tag contexts */ | |||||
#define TAG_NAME 0x01 | |||||
#define TAG_ATTR_READY 0x02 | |||||
#define TAG_ATTR_NAME 0x04 | |||||
#define TAG_ATTR_VALUE 0x08 | |||||
#define TAG_QUOTED 0x10 | |||||
#define TAG_NOTE_SPACE 0x20 | |||||
#define TAG_NOTE_EQUALS 0x40 | |||||
#define TAG_NOTE_QUOTE 0x80 |
@@ -0,0 +1,88 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#include "tag_data.h" | |||||
#include "contexts.h" | |||||
/* | |||||
Initialize a new TagData object. | |||||
*/ | |||||
TagData* TagData_new(void) | |||||
{ | |||||
#define ALLOC_BUFFER(name) \ | |||||
name = Textbuffer_new(); \ | |||||
if (!name) { \ | |||||
TagData_dealloc(self); \ | |||||
return NULL; \ | |||||
} | |||||
TagData *self = malloc(sizeof(TagData)); | |||||
if (!self) { | |||||
PyErr_NoMemory(); | |||||
return NULL; | |||||
} | |||||
self->context = TAG_NAME; | |||||
ALLOC_BUFFER(self->pad_first) | |||||
ALLOC_BUFFER(self->pad_before_eq) | |||||
ALLOC_BUFFER(self->pad_after_eq) | |||||
self->quoter = 0; | |||||
self->reset = 0; | |||||
return self; | |||||
#undef ALLOC_BUFFER | |||||
} | |||||
/* | |||||
Deallocate the given TagData object. | |||||
*/ | |||||
void TagData_dealloc(TagData* self) | |||||
{ | |||||
#define DEALLOC_BUFFER(name) \ | |||||
if (name) \ | |||||
Textbuffer_dealloc(name); | |||||
DEALLOC_BUFFER(self->pad_first); | |||||
DEALLOC_BUFFER(self->pad_before_eq); | |||||
DEALLOC_BUFFER(self->pad_after_eq); | |||||
free(self); | |||||
#undef DEALLOC_BUFFER | |||||
} | |||||
/* | |||||
Clear the internal buffers of the given TagData object. | |||||
*/ | |||||
int TagData_reset_buffers(TagData* self) | |||||
{ | |||||
#define RESET_BUFFER(name) \ | |||||
Textbuffer_dealloc(name); \ | |||||
name = Textbuffer_new(); \ | |||||
if (!name) \ | |||||
return -1; | |||||
RESET_BUFFER(self->pad_first) | |||||
RESET_BUFFER(self->pad_before_eq) | |||||
RESET_BUFFER(self->pad_after_eq) | |||||
return 0; | |||||
#undef RESET_BUFFER | |||||
} |
@@ -0,0 +1,43 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#pragma once | |||||
#include "common.h" | |||||
#include "textbuffer.h" | |||||
/* Structs */ | |||||
typedef struct { | |||||
uint64_t context; | |||||
Textbuffer* pad_first; | |||||
Textbuffer* pad_before_eq; | |||||
Textbuffer* pad_after_eq; | |||||
Py_UNICODE quoter; | |||||
Py_ssize_t reset; | |||||
} TagData; | |||||
/* Functions */ | |||||
TagData* TagData_new(void); | |||||
void TagData_dealloc(TagData*); | |||||
int TagData_reset_buffers(TagData*); |
@@ -20,17 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | SOFTWARE. | ||||
*/ | */ | ||||
#include "common.h" | |||||
/* Structs */ | |||||
#pragma once | |||||
struct Textbuffer { | |||||
Py_ssize_t size; | |||||
Py_UNICODE* data; | |||||
struct Textbuffer* prev; | |||||
struct Textbuffer* next; | |||||
}; | |||||
typedef struct Textbuffer Textbuffer; | |||||
#include "common.h" | |||||
/* Functions */ | /* Functions */ | ||||
@@ -0,0 +1,29 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#pragma once | |||||
#include "common.h" | |||||
/* Functions */ | |||||
PyObject* Tokenizer_parse(Tokenizer*, uint64_t, int); |
@@ -0,0 +1,362 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#include "tok_support.h" | |||||
#include "textbuffer.h" | |||||
#include "tokens.h" | |||||
/* | |||||
Add a new token stack, context, and textbuffer to the list. | |||||
*/ | |||||
int Tokenizer_push(Tokenizer* self, uint64_t context) | |||||
{ | |||||
Stack* top = malloc(sizeof(Stack)); | |||||
if (!top) { | |||||
PyErr_NoMemory(); | |||||
return -1; | |||||
} | |||||
top->stack = PyList_New(0); | |||||
top->context = context; | |||||
top->textbuffer = Textbuffer_new(); | |||||
if (!top->textbuffer) | |||||
return -1; | |||||
top->next = self->topstack; | |||||
self->topstack = top; | |||||
self->depth++; | |||||
self->cycles++; | |||||
return 0; | |||||
} | |||||
/* | |||||
Push the textbuffer onto the stack as a Text node and clear it. | |||||
*/ | |||||
int Tokenizer_push_textbuffer(Tokenizer* self) | |||||
{ | |||||
PyObject *text, *kwargs, *token; | |||||
Textbuffer* buffer = self->topstack->textbuffer; | |||||
if (buffer->size == 0 && !buffer->next) | |||||
return 0; | |||||
text = Textbuffer_render(buffer); | |||||
if (!text) | |||||
return -1; | |||||
kwargs = PyDict_New(); | |||||
if (!kwargs) { | |||||
Py_DECREF(text); | |||||
return -1; | |||||
} | |||||
PyDict_SetItemString(kwargs, "text", text); | |||||
Py_DECREF(text); | |||||
token = PyObject_Call(Text, NOARGS, kwargs); | |||||
Py_DECREF(kwargs); | |||||
if (!token) | |||||
return -1; | |||||
if (PyList_Append(self->topstack->stack, token)) { | |||||
Py_DECREF(token); | |||||
return -1; | |||||
} | |||||
Py_DECREF(token); | |||||
Textbuffer_dealloc(buffer); | |||||
self->topstack->textbuffer = Textbuffer_new(); | |||||
if (!self->topstack->textbuffer) | |||||
return -1; | |||||
return 0; | |||||
} | |||||
/* | |||||
Pop and deallocate the top token stack/context/textbuffer. | |||||
*/ | |||||
void Tokenizer_delete_top_of_stack(Tokenizer* self) | |||||
{ | |||||
Stack* top = self->topstack; | |||||
Py_DECREF(top->stack); | |||||
Textbuffer_dealloc(top->textbuffer); | |||||
self->topstack = top->next; | |||||
free(top); | |||||
self->depth--; | |||||
} | |||||
/* | |||||
Pop the current stack/context/textbuffer, returing the stack. | |||||
*/ | |||||
PyObject* Tokenizer_pop(Tokenizer* self) | |||||
{ | |||||
PyObject* stack; | |||||
if (Tokenizer_push_textbuffer(self)) | |||||
return NULL; | |||||
stack = self->topstack->stack; | |||||
Py_INCREF(stack); | |||||
Tokenizer_delete_top_of_stack(self); | |||||
return stack; | |||||
} | |||||
/* | |||||
Pop the current stack/context/textbuffer, returing the stack. We will also | |||||
replace the underlying stack's context with the current stack's. | |||||
*/ | |||||
PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) | |||||
{ | |||||
PyObject* stack; | |||||
uint64_t context; | |||||
if (Tokenizer_push_textbuffer(self)) | |||||
return NULL; | |||||
stack = self->topstack->stack; | |||||
Py_INCREF(stack); | |||||
context = self->topstack->context; | |||||
Tokenizer_delete_top_of_stack(self); | |||||
self->topstack->context = context; | |||||
return stack; | |||||
} | |||||
/* | |||||
Fail the current tokenization route. Discards the current | |||||
stack/context/textbuffer and sets the BAD_ROUTE flag. | |||||
*/ | |||||
void* Tokenizer_fail_route(Tokenizer* self) | |||||
{ | |||||
uint64_t context = self->topstack->context; | |||||
PyObject* stack = Tokenizer_pop(self); | |||||
Py_XDECREF(stack); | |||||
FAIL_ROUTE(context); | |||||
return NULL; | |||||
} | |||||
/* | |||||
Write a token to the current token stack. | |||||
*/ | |||||
int Tokenizer_emit_token(Tokenizer* self, PyObject* token, int first) | |||||
{ | |||||
PyObject* instance; | |||||
if (Tokenizer_push_textbuffer(self)) | |||||
return -1; | |||||
instance = PyObject_CallObject(token, NULL); | |||||
if (!instance) | |||||
return -1; | |||||
if (first ? PyList_Insert(self->topstack->stack, 0, instance) : | |||||
PyList_Append(self->topstack->stack, instance)) { | |||||
Py_DECREF(instance); | |||||
return -1; | |||||
} | |||||
Py_DECREF(instance); | |||||
return 0; | |||||
} | |||||
/* | |||||
Write a token to the current token stack, with kwargs. Steals a reference | |||||
to kwargs. | |||||
*/ | |||||
int Tokenizer_emit_token_kwargs(Tokenizer* self, PyObject* token, | |||||
PyObject* kwargs, int first) | |||||
{ | |||||
PyObject* instance; | |||||
if (Tokenizer_push_textbuffer(self)) { | |||||
Py_DECREF(kwargs); | |||||
return -1; | |||||
} | |||||
instance = PyObject_Call(token, NOARGS, kwargs); | |||||
if (!instance) { | |||||
Py_DECREF(kwargs); | |||||
return -1; | |||||
} | |||||
if (first ? PyList_Insert(self->topstack->stack, 0, instance): | |||||
PyList_Append(self->topstack->stack, instance)) { | |||||
Py_DECREF(instance); | |||||
Py_DECREF(kwargs); | |||||
return -1; | |||||
} | |||||
Py_DECREF(instance); | |||||
Py_DECREF(kwargs); | |||||
return 0; | |||||
} | |||||
/* | |||||
Write a Unicode codepoint to the current textbuffer. | |||||
*/ | |||||
int Tokenizer_emit_char(Tokenizer* self, Py_UNICODE code) | |||||
{ | |||||
return Textbuffer_write(&(self->topstack->textbuffer), code); | |||||
} | |||||
/* | |||||
Write a string of text to the current textbuffer. | |||||
*/ | |||||
int Tokenizer_emit_text(Tokenizer* self, const char* text) | |||||
{ | |||||
int i = 0; | |||||
while (text[i]) { | |||||
if (Tokenizer_emit_char(self, text[i])) | |||||
return -1; | |||||
i++; | |||||
} | |||||
return 0; | |||||
} | |||||
/* | |||||
Write the contents of another textbuffer to the current textbuffer, | |||||
deallocating it in the process. | |||||
*/ | |||||
int | |||||
Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer, int reverse) | |||||
{ | |||||
Textbuffer *original = buffer; | |||||
Py_ssize_t i; | |||||
if (reverse) { | |||||
do { | |||||
for (i = buffer->size - 1; i >= 0; i--) { | |||||
if (Tokenizer_emit_char(self, buffer->data[i])) { | |||||
Textbuffer_dealloc(original); | |||||
return -1; | |||||
} | |||||
} | |||||
} while ((buffer = buffer->next)); | |||||
} | |||||
else { | |||||
while (buffer->next) | |||||
buffer = buffer->next; | |||||
do { | |||||
for (i = 0; i < buffer->size; i++) { | |||||
if (Tokenizer_emit_char(self, buffer->data[i])) { | |||||
Textbuffer_dealloc(original); | |||||
return -1; | |||||
} | |||||
} | |||||
} while ((buffer = buffer->prev)); | |||||
} | |||||
Textbuffer_dealloc(original); | |||||
return 0; | |||||
} | |||||
/* | |||||
Write a series of tokens to the current stack at once. | |||||
*/ | |||||
int Tokenizer_emit_all(Tokenizer* self, PyObject* tokenlist) | |||||
{ | |||||
int pushed = 0; | |||||
PyObject *stack, *token, *left, *right, *text; | |||||
Textbuffer* buffer; | |||||
Py_ssize_t size; | |||||
if (PyList_GET_SIZE(tokenlist) > 0) { | |||||
token = PyList_GET_ITEM(tokenlist, 0); | |||||
switch (PyObject_IsInstance(token, Text)) { | |||||
case 0: | |||||
break; | |||||
case 1: { | |||||
pushed = 1; | |||||
buffer = self->topstack->textbuffer; | |||||
if (buffer->size == 0 && !buffer->next) | |||||
break; | |||||
left = Textbuffer_render(buffer); | |||||
if (!left) | |||||
return -1; | |||||
right = PyObject_GetAttrString(token, "text"); | |||||
if (!right) | |||||
return -1; | |||||
text = PyUnicode_Concat(left, right); | |||||
Py_DECREF(left); | |||||
Py_DECREF(right); | |||||
if (!text) | |||||
return -1; | |||||
if (PyObject_SetAttrString(token, "text", text)) { | |||||
Py_DECREF(text); | |||||
return -1; | |||||
} | |||||
Py_DECREF(text); | |||||
Textbuffer_dealloc(buffer); | |||||
self->topstack->textbuffer = Textbuffer_new(); | |||||
if (!self->topstack->textbuffer) | |||||
return -1; | |||||
break; | |||||
} | |||||
case -1: | |||||
return -1; | |||||
} | |||||
} | |||||
if (!pushed) { | |||||
if (Tokenizer_push_textbuffer(self)) | |||||
return -1; | |||||
} | |||||
stack = self->topstack->stack; | |||||
size = PyList_GET_SIZE(stack); | |||||
if (PyList_SetSlice(stack, size, size, tokenlist)) | |||||
return -1; | |||||
return 0; | |||||
} | |||||
/* | |||||
Pop the current stack, write text, and then write the stack. 'text' is a | |||||
NULL-terminated array of chars. | |||||
*/ | |||||
int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) | |||||
{ | |||||
PyObject* stack = Tokenizer_pop(self); | |||||
if (Tokenizer_emit_text(self, text)) { | |||||
Py_DECREF(stack); | |||||
return -1; | |||||
} | |||||
if (stack) { | |||||
if (PyList_GET_SIZE(stack) > 0) { | |||||
if (Tokenizer_emit_all(self, stack)) { | |||||
Py_DECREF(stack); | |||||
return -1; | |||||
} | |||||
} | |||||
Py_DECREF(stack); | |||||
} | |||||
self->head--; | |||||
return 0; | |||||
} | |||||
/* | |||||
Read the value at a relative point in the wikicode, forwards. | |||||
*/ | |||||
PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta) | |||||
{ | |||||
Py_ssize_t index = self->head + delta; | |||||
if (index >= self->length) | |||||
return EMPTY; | |||||
return PyList_GET_ITEM(self->text, index); | |||||
} | |||||
/* | |||||
Read the value at a relative point in the wikicode, backwards. | |||||
*/ | |||||
PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) | |||||
{ | |||||
Py_ssize_t index; | |||||
if (delta > self->head) | |||||
return EMPTY; | |||||
index = self->head - delta; | |||||
return PyList_GET_ITEM(self->text, index); | |||||
} |
@@ -0,0 +1,66 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#pragma once | |||||
#include "common.h" | |||||
/* Functions */ | |||||
int Tokenizer_push(Tokenizer*, uint64_t); | |||||
int Tokenizer_push_textbuffer(Tokenizer*); | |||||
void Tokenizer_delete_top_of_stack(Tokenizer*); | |||||
PyObject* Tokenizer_pop(Tokenizer*); | |||||
PyObject* Tokenizer_pop_keeping_context(Tokenizer*); | |||||
void* Tokenizer_fail_route(Tokenizer*); | |||||
int Tokenizer_emit_token(Tokenizer*, PyObject*, int); | |||||
int Tokenizer_emit_token_kwargs(Tokenizer*, PyObject*, PyObject*, int); | |||||
int Tokenizer_emit_char(Tokenizer*, Py_UNICODE); | |||||
int Tokenizer_emit_text(Tokenizer*, const char*); | |||||
int Tokenizer_emit_textbuffer(Tokenizer*, Textbuffer*, int); | |||||
int Tokenizer_emit_all(Tokenizer*, PyObject*); | |||||
int Tokenizer_emit_text_then_stack(Tokenizer*, const char*); | |||||
PyObject* Tokenizer_read(Tokenizer*, Py_ssize_t); | |||||
PyObject* Tokenizer_read_backwards(Tokenizer*, Py_ssize_t); | |||||
/* Macros */ | |||||
#define MAX_DEPTH 40 | |||||
#define MAX_CYCLES 100000 | |||||
#define Tokenizer_READ(self, delta) \ | |||||
(*PyUnicode_AS_UNICODE(Tokenizer_read(self, delta))) | |||||
#define Tokenizer_READ_BACKWARDS(self, delta) \ | |||||
(*PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, delta))) | |||||
#define Tokenizer_CAN_RECURSE(self) \ | |||||
(self->depth < MAX_DEPTH && self->cycles < MAX_CYCLES) | |||||
#define Tokenizer_emit(self, token) \ | |||||
Tokenizer_emit_token(self, token, 0) | |||||
#define Tokenizer_emit_first(self, token) \ | |||||
Tokenizer_emit_token(self, token, 1) | |||||
#define Tokenizer_emit_kwargs(self, token, kwargs) \ | |||||
Tokenizer_emit_token_kwargs(self, token, kwargs, 0) | |||||
#define Tokenizer_emit_first_kwargs(self, token, kwargs) \ | |||||
Tokenizer_emit_token_kwargs(self, token, kwargs, 1) |
@@ -20,256 +20,27 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | SOFTWARE. | ||||
*/ | */ | ||||
#include <math.h> | |||||
#pragma once | |||||
#include "common.h" | #include "common.h" | ||||
#include "textbuffer.h" | #include "textbuffer.h" | ||||
#define DIGITS "0123456789" | |||||
#define HEXDIGITS "0123456789abcdefABCDEF" | |||||
#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |||||
static const char MARKERS[] = { | |||||
'{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/', | |||||
'-', '!', '\n', '\0'}; | |||||
#define NUM_MARKERS 19 | |||||
#define MAX_DEPTH 40 | |||||
#define MAX_CYCLES 100000 | |||||
#define MAX_BRACES 255 | |||||
#define MAX_ENTITY_SIZE 8 | |||||
static int route_state = 0; | |||||
static uint64_t route_context = 0; | |||||
#define BAD_ROUTE route_state | |||||
#define BAD_ROUTE_CONTEXT route_context | |||||
#define FAIL_ROUTE(context) route_state = 1; route_context = context | |||||
#define RESET_ROUTE() route_state = 0 | |||||
static char** entitydefs; | |||||
static PyObject* EMPTY; | |||||
static PyObject* NOARGS; | |||||
static PyObject* ParserError; | |||||
static PyObject* definitions; | |||||
/* Tokens: */ | |||||
static PyObject* Text; | |||||
static PyObject* TemplateOpen; | |||||
static PyObject* TemplateParamSeparator; | |||||
static PyObject* TemplateParamEquals; | |||||
static PyObject* TemplateClose; | |||||
static PyObject* ArgumentOpen; | |||||
static PyObject* ArgumentSeparator; | |||||
static PyObject* ArgumentClose; | |||||
static PyObject* WikilinkOpen; | |||||
static PyObject* WikilinkSeparator; | |||||
static PyObject* WikilinkClose; | |||||
static PyObject* ExternalLinkOpen; | |||||
static PyObject* ExternalLinkSeparator; | |||||
static PyObject* ExternalLinkClose; | |||||
static PyObject* HTMLEntityStart; | |||||
static PyObject* HTMLEntityNumeric; | |||||
static PyObject* HTMLEntityHex; | |||||
static PyObject* HTMLEntityEnd; | |||||
static PyObject* HeadingStart; | |||||
static PyObject* HeadingEnd; | |||||
static PyObject* CommentStart; | |||||
static PyObject* CommentEnd; | |||||
static PyObject* TagOpenOpen; | |||||
static PyObject* TagAttrStart; | |||||
static PyObject* TagAttrEquals; | |||||
static PyObject* TagAttrQuote; | |||||
static PyObject* TagCloseOpen; | |||||
static PyObject* TagCloseSelfclose; | |||||
static PyObject* TagOpenClose; | |||||
static PyObject* TagCloseClose; | |||||
/* Local contexts: */ | |||||
#define LC_TEMPLATE 0x0000000000000007 | |||||
#define LC_TEMPLATE_NAME 0x0000000000000001 | |||||
#define LC_TEMPLATE_PARAM_KEY 0x0000000000000002 | |||||
#define LC_TEMPLATE_PARAM_VALUE 0x0000000000000004 | |||||
#define LC_ARGUMENT 0x0000000000000018 | |||||
#define LC_ARGUMENT_NAME 0x0000000000000008 | |||||
#define LC_ARGUMENT_DEFAULT 0x0000000000000010 | |||||
#define LC_WIKILINK 0x0000000000000060 | |||||
#define LC_WIKILINK_TITLE 0x0000000000000020 | |||||
#define LC_WIKILINK_TEXT 0x0000000000000040 | |||||
#define LC_EXT_LINK 0x0000000000000180 | |||||
#define LC_EXT_LINK_URI 0x0000000000000080 | |||||
#define LC_EXT_LINK_TITLE 0x0000000000000100 | |||||
#define LC_HEADING 0x0000000000007E00 | |||||
#define LC_HEADING_LEVEL_1 0x0000000000000200 | |||||
#define LC_HEADING_LEVEL_2 0x0000000000000400 | |||||
#define LC_HEADING_LEVEL_3 0x0000000000000800 | |||||
#define LC_HEADING_LEVEL_4 0x0000000000001000 | |||||
#define LC_HEADING_LEVEL_5 0x0000000000002000 | |||||
#define LC_HEADING_LEVEL_6 0x0000000000004000 | |||||
#define LC_TAG 0x0000000000078000 | |||||
#define LC_TAG_OPEN 0x0000000000008000 | |||||
#define LC_TAG_ATTR 0x0000000000010000 | |||||
#define LC_TAG_BODY 0x0000000000020000 | |||||
#define LC_TAG_CLOSE 0x0000000000040000 | |||||
#define LC_STYLE 0x0000000000780000 | |||||
#define LC_STYLE_ITALICS 0x0000000000080000 | |||||
#define LC_STYLE_BOLD 0x0000000000100000 | |||||
#define LC_STYLE_PASS_AGAIN 0x0000000000200000 | |||||
#define LC_STYLE_SECOND_PASS 0x0000000000400000 | |||||
#define LC_DLTERM 0x0000000000800000 | |||||
#define LC_SAFETY_CHECK 0x000000003F000000 | |||||
#define LC_HAS_TEXT 0x0000000001000000 | |||||
#define LC_FAIL_ON_TEXT 0x0000000002000000 | |||||
#define LC_FAIL_NEXT 0x0000000004000000 | |||||
#define LC_FAIL_ON_LBRACE 0x0000000008000000 | |||||
#define LC_FAIL_ON_RBRACE 0x0000000010000000 | |||||
#define LC_FAIL_ON_EQUALS 0x0000000020000000 | |||||
#define LC_TABLE 0x0000000FC0000000 | |||||
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000D00000000 | |||||
#define LC_TABLE_OPEN 0x0000000040000000 | |||||
#define LC_TABLE_CELL_OPEN 0x0000000080000000 | |||||
#define LC_TABLE_CELL_STYLE 0x0000000100000000 | |||||
#define LC_TABLE_ROW_OPEN 0x0000000200000000 | |||||
#define LC_TABLE_TD_LINE 0x0000000400000000 | |||||
#define LC_TABLE_TH_LINE 0x0000000800000000 | |||||
/* Global contexts: */ | |||||
#define GL_HEADING 0x1 | |||||
/* Aggregate contexts: */ | |||||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN) | |||||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN) | |||||
#define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | |||||
#define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | |||||
/* Tag contexts: */ | |||||
#define TAG_NAME 0x01 | |||||
#define TAG_ATTR_READY 0x02 | |||||
#define TAG_ATTR_NAME 0x04 | |||||
#define TAG_ATTR_VALUE 0x08 | |||||
#define TAG_QUOTED 0x10 | |||||
#define TAG_NOTE_SPACE 0x20 | |||||
#define TAG_NOTE_EQUALS 0x40 | |||||
#define TAG_NOTE_QUOTE 0x80 | |||||
/* Miscellaneous structs: */ | |||||
struct Stack { | |||||
PyObject* stack; | |||||
uint64_t context; | |||||
struct Textbuffer* textbuffer; | |||||
struct Stack* next; | |||||
}; | |||||
typedef struct { | |||||
PyObject* title; | |||||
int level; | |||||
} HeadingData; | |||||
typedef struct { | |||||
uint64_t context; | |||||
struct Textbuffer* pad_first; | |||||
struct Textbuffer* pad_before_eq; | |||||
struct Textbuffer* pad_after_eq; | |||||
Py_UNICODE quoter; | |||||
Py_ssize_t reset; | |||||
} TagData; | |||||
typedef struct Stack Stack; | |||||
/* Tokenizer object definition: */ | |||||
typedef struct { | |||||
PyObject_HEAD | |||||
PyObject* text; /* text to tokenize */ | |||||
Stack* topstack; /* topmost stack */ | |||||
Py_ssize_t head; /* current position in text */ | |||||
Py_ssize_t length; /* length of text */ | |||||
int global; /* global context */ | |||||
int depth; /* stack recursion depth */ | |||||
int cycles; /* total number of stack recursions */ | |||||
int skip_style_tags; /* temporary fix for the sometimes broken tag parser */ | |||||
} Tokenizer; | |||||
/* Macros related to Tokenizer functions: */ | |||||
#define Tokenizer_READ(self, delta) (*PyUnicode_AS_UNICODE(Tokenizer_read(self, delta))) | |||||
#define Tokenizer_READ_BACKWARDS(self, delta) \ | |||||
(*PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, delta))) | |||||
#define Tokenizer_CAN_RECURSE(self) (self->depth < MAX_DEPTH && self->cycles < MAX_CYCLES) | |||||
#define Tokenizer_emit(self, token) Tokenizer_emit_token(self, token, 0) | |||||
#define Tokenizer_emit_first(self, token) Tokenizer_emit_token(self, token, 1) | |||||
#define Tokenizer_emit_kwargs(self, token, kwargs) Tokenizer_emit_token_kwargs(self, token, kwargs, 0) | |||||
#define Tokenizer_emit_first_kwargs(self, token, kwargs) Tokenizer_emit_token_kwargs(self, token, kwargs, 1) | |||||
/* Macros for accessing definitions: */ | |||||
#define GET_HTML_TAG(markup) (markup == ':' ? "dd" : markup == ';' ? "dt" : "li") | |||||
#define IS_PARSABLE(tag) (call_def_func("is_parsable", tag, NULL, NULL)) | |||||
#define IS_SINGLE(tag) (call_def_func("is_single", tag, NULL, NULL)) | |||||
#define IS_SINGLE_ONLY(tag) (call_def_func("is_single_only", tag, NULL, NULL)) | |||||
#define IS_SCHEME(scheme, slashes, reverse) \ | |||||
(call_def_func("is_scheme", scheme, slashes ? Py_True : Py_False, reverse ? Py_True : Py_False)) | |||||
/* Function prototypes: */ | |||||
static TagData* TagData_new(void); | |||||
static void TagData_dealloc(TagData*); | |||||
/* Functions */ | |||||
static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*); | static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*); | ||||
static void Tokenizer_dealloc(Tokenizer*); | static void Tokenizer_dealloc(Tokenizer*); | ||||
static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*); | static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*); | ||||
static int Tokenizer_parse_entity(Tokenizer*); | |||||
static int Tokenizer_parse_comment(Tokenizer*); | |||||
static int Tokenizer_handle_dl_term(Tokenizer*); | |||||
static int Tokenizer_parse_tag(Tokenizer*); | |||||
static PyObject* Tokenizer_parse(Tokenizer*, uint64_t, int); | |||||
static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); | static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); | ||||
static int load_exceptions(void); | |||||
/* Macros for Python 2/3 compatibility: */ | |||||
/* Compatibility macros */ | |||||
#ifdef IS_PY3K | #ifdef IS_PY3K | ||||
#define NEW_INT_FUNC PyLong_FromSsize_t | |||||
#define IMPORT_NAME_FUNC PyUnicode_FromString | #define IMPORT_NAME_FUNC PyUnicode_FromString | ||||
#define CREATE_MODULE PyModule_Create(&module_def); | #define CREATE_MODULE PyModule_Create(&module_def); | ||||
#define ENTITYDEFS_MODULE "html.entities" | #define ENTITYDEFS_MODULE "html.entities" | ||||
#define INIT_FUNC_NAME PyInit__tokenizer | #define INIT_FUNC_NAME PyInit__tokenizer | ||||
#define INIT_ERROR return NULL | #define INIT_ERROR return NULL | ||||
#else | #else | ||||
#define NEW_INT_FUNC PyInt_FromSsize_t | |||||
#define IMPORT_NAME_FUNC PyBytes_FromString | #define IMPORT_NAME_FUNC PyBytes_FromString | ||||
#define CREATE_MODULE Py_InitModule("_tokenizer", NULL); | #define CREATE_MODULE Py_InitModule("_tokenizer", NULL); | ||||
#define ENTITYDEFS_MODULE "htmlentitydefs" | #define ENTITYDEFS_MODULE "htmlentitydefs" | ||||
@@ -277,8 +48,7 @@ static int load_exceptions(void); | |||||
#define INIT_ERROR return | #define INIT_ERROR return | ||||
#endif | #endif | ||||
/* More structs for creating the Tokenizer type: */ | |||||
/* Structs */ | |||||
static PyMethodDef Tokenizer_methods[] = { | static PyMethodDef Tokenizer_methods[] = { | ||||
{"tokenize", (PyCFunction) Tokenizer_tokenize, METH_VARARGS, | {"tokenize", (PyCFunction) Tokenizer_tokenize, METH_VARARGS, | ||||
@@ -0,0 +1,111 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#include "tokens.h" | |||||
/* Globals */ | |||||
PyObject* Text; | |||||
PyObject* TemplateOpen; | |||||
PyObject* TemplateParamSeparator; | |||||
PyObject* TemplateParamEquals; | |||||
PyObject* TemplateClose; | |||||
PyObject* ArgumentOpen; | |||||
PyObject* ArgumentSeparator; | |||||
PyObject* ArgumentClose; | |||||
PyObject* WikilinkOpen; | |||||
PyObject* WikilinkSeparator; | |||||
PyObject* WikilinkClose; | |||||
PyObject* ExternalLinkOpen; | |||||
PyObject* ExternalLinkSeparator; | |||||
PyObject* ExternalLinkClose; | |||||
PyObject* HTMLEntityStart; | |||||
PyObject* HTMLEntityNumeric; | |||||
PyObject* HTMLEntityHex; | |||||
PyObject* HTMLEntityEnd; | |||||
PyObject* HeadingStart; | |||||
PyObject* HeadingEnd; | |||||
PyObject* CommentStart; | |||||
PyObject* CommentEnd; | |||||
PyObject* TagOpenOpen; | |||||
PyObject* TagAttrStart; | |||||
PyObject* TagAttrEquals; | |||||
PyObject* TagAttrQuote; | |||||
PyObject* TagCloseOpen; | |||||
PyObject* TagCloseSelfclose; | |||||
PyObject* TagOpenClose; | |||||
PyObject* TagCloseClose; | |||||
/* | |||||
Load individual tokens into globals from the given Python module object. | |||||
*/ | |||||
void load_tokens_from_module(PyObject* module) | |||||
{ | |||||
Text = PyObject_GetAttrString(module, "Text"); | |||||
TemplateOpen = PyObject_GetAttrString(module, "TemplateOpen"); | |||||
TemplateParamSeparator = PyObject_GetAttrString(module, | |||||
"TemplateParamSeparator"); | |||||
TemplateParamEquals = PyObject_GetAttrString(module, | |||||
"TemplateParamEquals"); | |||||
TemplateClose = PyObject_GetAttrString(module, "TemplateClose"); | |||||
ArgumentOpen = PyObject_GetAttrString(module, "ArgumentOpen"); | |||||
ArgumentSeparator = PyObject_GetAttrString(module, "ArgumentSeparator"); | |||||
ArgumentClose = PyObject_GetAttrString(module, "ArgumentClose"); | |||||
WikilinkOpen = PyObject_GetAttrString(module, "WikilinkOpen"); | |||||
WikilinkSeparator = PyObject_GetAttrString(module, "WikilinkSeparator"); | |||||
WikilinkClose = PyObject_GetAttrString(module, "WikilinkClose"); | |||||
ExternalLinkOpen = PyObject_GetAttrString(module, "ExternalLinkOpen"); | |||||
ExternalLinkSeparator = PyObject_GetAttrString(module, | |||||
"ExternalLinkSeparator"); | |||||
ExternalLinkClose = PyObject_GetAttrString(module, "ExternalLinkClose"); | |||||
HTMLEntityStart = PyObject_GetAttrString(module, "HTMLEntityStart"); | |||||
HTMLEntityNumeric = PyObject_GetAttrString(module, "HTMLEntityNumeric"); | |||||
HTMLEntityHex = PyObject_GetAttrString(module, "HTMLEntityHex"); | |||||
HTMLEntityEnd = PyObject_GetAttrString(module, "HTMLEntityEnd"); | |||||
HeadingStart = PyObject_GetAttrString(module, "HeadingStart"); | |||||
HeadingEnd = PyObject_GetAttrString(module, "HeadingEnd"); | |||||
CommentStart = PyObject_GetAttrString(module, "CommentStart"); | |||||
CommentEnd = PyObject_GetAttrString(module, "CommentEnd"); | |||||
TagOpenOpen = PyObject_GetAttrString(module, "TagOpenOpen"); | |||||
TagAttrStart = PyObject_GetAttrString(module, "TagAttrStart"); | |||||
TagAttrEquals = PyObject_GetAttrString(module, "TagAttrEquals"); | |||||
TagAttrQuote = PyObject_GetAttrString(module, "TagAttrQuote"); | |||||
TagCloseOpen = PyObject_GetAttrString(module, "TagCloseOpen"); | |||||
TagCloseSelfclose = PyObject_GetAttrString(module, "TagCloseSelfclose"); | |||||
TagOpenClose = PyObject_GetAttrString(module, "TagOpenClose"); | |||||
TagCloseClose = PyObject_GetAttrString(module, "TagCloseClose"); | |||||
} |
@@ -0,0 +1,69 @@ | |||||
/* | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||||
this software and associated documentation files (the "Software"), to deal in | |||||
the Software without restriction, including without limitation the rights to | |||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||||
of the Software, and to permit persons to whom the Software is furnished to do | |||||
so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. | |||||
*/ | |||||
#pragma once | |||||
#include "common.h" | |||||
/* Token globals */ | |||||
extern PyObject* Text; | |||||
extern PyObject* TemplateOpen; | |||||
extern PyObject* TemplateParamSeparator; | |||||
extern PyObject* TemplateParamEquals; | |||||
extern PyObject* TemplateClose; | |||||
extern PyObject* ArgumentOpen; | |||||
extern PyObject* ArgumentSeparator; | |||||
extern PyObject* ArgumentClose; | |||||
extern PyObject* WikilinkOpen; | |||||
extern PyObject* WikilinkSeparator; | |||||
extern PyObject* WikilinkClose; | |||||
extern PyObject* ExternalLinkOpen; | |||||
extern PyObject* ExternalLinkSeparator; | |||||
extern PyObject* ExternalLinkClose; | |||||
extern PyObject* HTMLEntityStart; | |||||
extern PyObject* HTMLEntityNumeric; | |||||
extern PyObject* HTMLEntityHex; | |||||
extern PyObject* HTMLEntityEnd; | |||||
extern PyObject* HeadingStart; | |||||
extern PyObject* HeadingEnd; | |||||
extern PyObject* CommentStart; | |||||
extern PyObject* CommentEnd; | |||||
extern PyObject* TagOpenOpen; | |||||
extern PyObject* TagAttrStart; | |||||
extern PyObject* TagAttrEquals; | |||||
extern PyObject* TagAttrQuote; | |||||
extern PyObject* TagCloseOpen; | |||||
extern PyObject* TagCloseSelfclose; | |||||
extern PyObject* TagOpenClose; | |||||
extern PyObject* TagCloseClose; | |||||
/* Functions */ | |||||
void load_tokens_from_module(PyObject*); |