@@ -20,14 +20,18 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
*/ | |||
#pragma once | |||
#ifndef PY_SSIZE_T_CLEAN | |||
#define PY_SSIZE_T_CLEAN | |||
#define PY_SSIZE_T_CLEAN // See: https://docs.python.org/2/c-api/arg.html | |||
#endif | |||
#include <Python.h> | |||
#include <structmember.h> | |||
#include <bytesobject.h> | |||
/* Compatibility macros */ | |||
#if PY_MAJOR_VERSION >= 3 | |||
#define IS_PY3K | |||
#endif | |||
@@ -36,5 +40,53 @@ SOFTWARE. | |||
#define uint64_t unsigned PY_LONG_LONG | |||
#endif | |||
#define malloc PyObject_Malloc | |||
#define malloc PyObject_Malloc // XXX: yuck | |||
#define free PyObject_Free | |||
/* Error handling globals/macros */ | |||
extern int route_state; // TODO: this is NOT thread-safe! | |||
extern uint64_t route_context; | |||
#define BAD_ROUTE route_state | |||
#define BAD_ROUTE_CONTEXT route_context | |||
#define FAIL_ROUTE(context) { route_state = 1; route_context = context; } | |||
#define RESET_ROUTE() route_state = 0 | |||
/* Shared globals */ | |||
extern char** entitydefs; | |||
extern PyObject* EMPTY; | |||
extern PyObject* NOARGS; | |||
extern PyObject* definitions; | |||
/* Structs */ | |||
struct Textbuffer { | |||
Py_ssize_t size; | |||
Py_UNICODE* data; | |||
struct Textbuffer* prev; | |||
struct Textbuffer* next; | |||
}; | |||
typedef struct Textbuffer Textbuffer; | |||
struct Stack { | |||
PyObject* stack; | |||
uint64_t context; | |||
struct Textbuffer* textbuffer; | |||
struct Stack* next; | |||
}; | |||
typedef struct Stack Stack; | |||
typedef struct { | |||
PyObject_HEAD | |||
PyObject* text; /* text to tokenize */ | |||
Stack* topstack; /* topmost stack */ | |||
Py_ssize_t head; /* current position in text */ | |||
Py_ssize_t length; /* length of text */ | |||
int global; /* global context */ | |||
int depth; /* stack recursion depth */ | |||
int cycles; /* total number of stack recursions */ | |||
int skip_style_tags; /* temporary fix for the sometimes broken tag parser */ | |||
} Tokenizer; |
@@ -0,0 +1,104 @@ | |||
/* | |||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
the Software without restriction, including without limitation the rights to | |||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||
of the Software, and to permit persons to whom the Software is furnished to do | |||
so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
*/ | |||
#pragma once | |||
/* Local contexts */ | |||
#define LC_TEMPLATE 0x0000000000000007 | |||
#define LC_TEMPLATE_NAME 0x0000000000000001 | |||
#define LC_TEMPLATE_PARAM_KEY 0x0000000000000002 | |||
#define LC_TEMPLATE_PARAM_VALUE 0x0000000000000004 | |||
#define LC_ARGUMENT 0x0000000000000018 | |||
#define LC_ARGUMENT_NAME 0x0000000000000008 | |||
#define LC_ARGUMENT_DEFAULT 0x0000000000000010 | |||
#define LC_WIKILINK 0x0000000000000060 | |||
#define LC_WIKILINK_TITLE 0x0000000000000020 | |||
#define LC_WIKILINK_TEXT 0x0000000000000040 | |||
#define LC_EXT_LINK 0x0000000000000180 | |||
#define LC_EXT_LINK_URI 0x0000000000000080 | |||
#define LC_EXT_LINK_TITLE 0x0000000000000100 | |||
#define LC_HEADING 0x0000000000007E00 | |||
#define LC_HEADING_LEVEL_1 0x0000000000000200 | |||
#define LC_HEADING_LEVEL_2 0x0000000000000400 | |||
#define LC_HEADING_LEVEL_3 0x0000000000000800 | |||
#define LC_HEADING_LEVEL_4 0x0000000000001000 | |||
#define LC_HEADING_LEVEL_5 0x0000000000002000 | |||
#define LC_HEADING_LEVEL_6 0x0000000000004000 | |||
#define LC_TAG 0x0000000000078000 | |||
#define LC_TAG_OPEN 0x0000000000008000 | |||
#define LC_TAG_ATTR 0x0000000000010000 | |||
#define LC_TAG_BODY 0x0000000000020000 | |||
#define LC_TAG_CLOSE 0x0000000000040000 | |||
#define LC_STYLE 0x0000000000780000 | |||
#define LC_STYLE_ITALICS 0x0000000000080000 | |||
#define LC_STYLE_BOLD 0x0000000000100000 | |||
#define LC_STYLE_PASS_AGAIN 0x0000000000200000 | |||
#define LC_STYLE_SECOND_PASS 0x0000000000400000 | |||
#define LC_DLTERM 0x0000000000800000 | |||
#define LC_SAFETY_CHECK 0x000000003F000000 | |||
#define LC_HAS_TEXT 0x0000000001000000 | |||
#define LC_FAIL_ON_TEXT 0x0000000002000000 | |||
#define LC_FAIL_NEXT 0x0000000004000000 | |||
#define LC_FAIL_ON_LBRACE 0x0000000008000000 | |||
#define LC_FAIL_ON_RBRACE 0x0000000010000000 | |||
#define LC_FAIL_ON_EQUALS 0x0000000020000000 | |||
#define LC_TABLE 0x0000000FC0000000 | |||
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000D00000000 | |||
#define LC_TABLE_OPEN 0x0000000040000000 | |||
#define LC_TABLE_CELL_OPEN 0x0000000080000000 | |||
#define LC_TABLE_CELL_STYLE 0x0000000100000000 | |||
#define LC_TABLE_ROW_OPEN 0x0000000200000000 | |||
#define LC_TABLE_TD_LINE 0x0000000400000000 | |||
#define LC_TABLE_TH_LINE 0x0000000800000000 | |||
/* Global contexts */ | |||
#define GL_HEADING 0x1 | |||
/* Aggregate contexts */ | |||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN) | |||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN) | |||
#define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | |||
#define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | |||
/* Tag contexts */ | |||
#define TAG_NAME 0x01 | |||
#define TAG_ATTR_READY 0x02 | |||
#define TAG_ATTR_NAME 0x04 | |||
#define TAG_ATTR_VALUE 0x08 | |||
#define TAG_QUOTED 0x10 | |||
#define TAG_NOTE_SPACE 0x20 | |||
#define TAG_NOTE_EQUALS 0x40 | |||
#define TAG_NOTE_QUOTE 0x80 |
@@ -0,0 +1,88 @@ | |||
/* | |||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
the Software without restriction, including without limitation the rights to | |||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||
of the Software, and to permit persons to whom the Software is furnished to do | |||
so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
*/ | |||
#include "tag_data.h" | |||
#include "contexts.h" | |||
/* | |||
Initialize a new TagData object. | |||
*/ | |||
TagData* TagData_new(void) | |||
{ | |||
#define ALLOC_BUFFER(name) \ | |||
name = Textbuffer_new(); \ | |||
if (!name) { \ | |||
TagData_dealloc(self); \ | |||
return NULL; \ | |||
} | |||
TagData *self = malloc(sizeof(TagData)); | |||
if (!self) { | |||
PyErr_NoMemory(); | |||
return NULL; | |||
} | |||
self->context = TAG_NAME; | |||
ALLOC_BUFFER(self->pad_first) | |||
ALLOC_BUFFER(self->pad_before_eq) | |||
ALLOC_BUFFER(self->pad_after_eq) | |||
self->quoter = 0; | |||
self->reset = 0; | |||
return self; | |||
#undef ALLOC_BUFFER | |||
} | |||
/* | |||
Deallocate the given TagData object. | |||
*/ | |||
void TagData_dealloc(TagData* self) | |||
{ | |||
#define DEALLOC_BUFFER(name) \ | |||
if (name) \ | |||
Textbuffer_dealloc(name); | |||
DEALLOC_BUFFER(self->pad_first); | |||
DEALLOC_BUFFER(self->pad_before_eq); | |||
DEALLOC_BUFFER(self->pad_after_eq); | |||
free(self); | |||
#undef DEALLOC_BUFFER | |||
} | |||
/* | |||
Clear the internal buffers of the given TagData object. | |||
*/ | |||
int TagData_reset_buffers(TagData* self) | |||
{ | |||
#define RESET_BUFFER(name) \ | |||
Textbuffer_dealloc(name); \ | |||
name = Textbuffer_new(); \ | |||
if (!name) \ | |||
return -1; | |||
RESET_BUFFER(self->pad_first) | |||
RESET_BUFFER(self->pad_before_eq) | |||
RESET_BUFFER(self->pad_after_eq) | |||
return 0; | |||
#undef RESET_BUFFER | |||
} |
@@ -0,0 +1,43 @@ | |||
/* | |||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
the Software without restriction, including without limitation the rights to | |||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||
of the Software, and to permit persons to whom the Software is furnished to do | |||
so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
*/ | |||
#pragma once | |||
#include "common.h" | |||
#include "textbuffer.h" | |||
/* Structs */ | |||
typedef struct { | |||
uint64_t context; | |||
Textbuffer* pad_first; | |||
Textbuffer* pad_before_eq; | |||
Textbuffer* pad_after_eq; | |||
Py_UNICODE quoter; | |||
Py_ssize_t reset; | |||
} TagData; | |||
/* Functions */ | |||
TagData* TagData_new(void); | |||
void TagData_dealloc(TagData*); | |||
int TagData_reset_buffers(TagData*); |
@@ -20,17 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
*/ | |||
#include "common.h" | |||
/* Structs */ | |||
#pragma once | |||
struct Textbuffer { | |||
Py_ssize_t size; | |||
Py_UNICODE* data; | |||
struct Textbuffer* prev; | |||
struct Textbuffer* next; | |||
}; | |||
typedef struct Textbuffer Textbuffer; | |||
#include "common.h" | |||
/* Functions */ | |||
@@ -0,0 +1,29 @@ | |||
/* | |||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
the Software without restriction, including without limitation the rights to | |||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||
of the Software, and to permit persons to whom the Software is furnished to do | |||
so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
*/ | |||
#pragma once | |||
#include "common.h" | |||
/* Functions */ | |||
PyObject* Tokenizer_parse(Tokenizer*, uint64_t, int); |
@@ -0,0 +1,362 @@ | |||
/* | |||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
the Software without restriction, including without limitation the rights to | |||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||
of the Software, and to permit persons to whom the Software is furnished to do | |||
so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
*/ | |||
#include "tok_support.h" | |||
#include "textbuffer.h" | |||
#include "tokens.h" | |||
/* | |||
Add a new token stack, context, and textbuffer to the list. | |||
*/ | |||
int Tokenizer_push(Tokenizer* self, uint64_t context) | |||
{ | |||
Stack* top = malloc(sizeof(Stack)); | |||
if (!top) { | |||
PyErr_NoMemory(); | |||
return -1; | |||
} | |||
top->stack = PyList_New(0); | |||
top->context = context; | |||
top->textbuffer = Textbuffer_new(); | |||
if (!top->textbuffer) | |||
return -1; | |||
top->next = self->topstack; | |||
self->topstack = top; | |||
self->depth++; | |||
self->cycles++; | |||
return 0; | |||
} | |||
/* | |||
Push the textbuffer onto the stack as a Text node and clear it. | |||
*/ | |||
int Tokenizer_push_textbuffer(Tokenizer* self) | |||
{ | |||
PyObject *text, *kwargs, *token; | |||
Textbuffer* buffer = self->topstack->textbuffer; | |||
if (buffer->size == 0 && !buffer->next) | |||
return 0; | |||
text = Textbuffer_render(buffer); | |||
if (!text) | |||
return -1; | |||
kwargs = PyDict_New(); | |||
if (!kwargs) { | |||
Py_DECREF(text); | |||
return -1; | |||
} | |||
PyDict_SetItemString(kwargs, "text", text); | |||
Py_DECREF(text); | |||
token = PyObject_Call(Text, NOARGS, kwargs); | |||
Py_DECREF(kwargs); | |||
if (!token) | |||
return -1; | |||
if (PyList_Append(self->topstack->stack, token)) { | |||
Py_DECREF(token); | |||
return -1; | |||
} | |||
Py_DECREF(token); | |||
Textbuffer_dealloc(buffer); | |||
self->topstack->textbuffer = Textbuffer_new(); | |||
if (!self->topstack->textbuffer) | |||
return -1; | |||
return 0; | |||
} | |||
/* | |||
Pop and deallocate the top token stack/context/textbuffer. | |||
*/ | |||
void Tokenizer_delete_top_of_stack(Tokenizer* self) | |||
{ | |||
Stack* top = self->topstack; | |||
Py_DECREF(top->stack); | |||
Textbuffer_dealloc(top->textbuffer); | |||
self->topstack = top->next; | |||
free(top); | |||
self->depth--; | |||
} | |||
/* | |||
Pop the current stack/context/textbuffer, returing the stack. | |||
*/ | |||
PyObject* Tokenizer_pop(Tokenizer* self) | |||
{ | |||
PyObject* stack; | |||
if (Tokenizer_push_textbuffer(self)) | |||
return NULL; | |||
stack = self->topstack->stack; | |||
Py_INCREF(stack); | |||
Tokenizer_delete_top_of_stack(self); | |||
return stack; | |||
} | |||
/* | |||
Pop the current stack/context/textbuffer, returing the stack. We will also | |||
replace the underlying stack's context with the current stack's. | |||
*/ | |||
PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) | |||
{ | |||
PyObject* stack; | |||
uint64_t context; | |||
if (Tokenizer_push_textbuffer(self)) | |||
return NULL; | |||
stack = self->topstack->stack; | |||
Py_INCREF(stack); | |||
context = self->topstack->context; | |||
Tokenizer_delete_top_of_stack(self); | |||
self->topstack->context = context; | |||
return stack; | |||
} | |||
/* | |||
Fail the current tokenization route. Discards the current | |||
stack/context/textbuffer and sets the BAD_ROUTE flag. | |||
*/ | |||
void* Tokenizer_fail_route(Tokenizer* self) | |||
{ | |||
uint64_t context = self->topstack->context; | |||
PyObject* stack = Tokenizer_pop(self); | |||
Py_XDECREF(stack); | |||
FAIL_ROUTE(context); | |||
return NULL; | |||
} | |||
/* | |||
Write a token to the current token stack. | |||
*/ | |||
int Tokenizer_emit_token(Tokenizer* self, PyObject* token, int first) | |||
{ | |||
PyObject* instance; | |||
if (Tokenizer_push_textbuffer(self)) | |||
return -1; | |||
instance = PyObject_CallObject(token, NULL); | |||
if (!instance) | |||
return -1; | |||
if (first ? PyList_Insert(self->topstack->stack, 0, instance) : | |||
PyList_Append(self->topstack->stack, instance)) { | |||
Py_DECREF(instance); | |||
return -1; | |||
} | |||
Py_DECREF(instance); | |||
return 0; | |||
} | |||
/* | |||
Write a token to the current token stack, with kwargs. Steals a reference | |||
to kwargs. | |||
*/ | |||
int Tokenizer_emit_token_kwargs(Tokenizer* self, PyObject* token, | |||
PyObject* kwargs, int first) | |||
{ | |||
PyObject* instance; | |||
if (Tokenizer_push_textbuffer(self)) { | |||
Py_DECREF(kwargs); | |||
return -1; | |||
} | |||
instance = PyObject_Call(token, NOARGS, kwargs); | |||
if (!instance) { | |||
Py_DECREF(kwargs); | |||
return -1; | |||
} | |||
if (first ? PyList_Insert(self->topstack->stack, 0, instance): | |||
PyList_Append(self->topstack->stack, instance)) { | |||
Py_DECREF(instance); | |||
Py_DECREF(kwargs); | |||
return -1; | |||
} | |||
Py_DECREF(instance); | |||
Py_DECREF(kwargs); | |||
return 0; | |||
} | |||
/* | |||
Write a Unicode codepoint to the current textbuffer. | |||
*/ | |||
int Tokenizer_emit_char(Tokenizer* self, Py_UNICODE code) | |||
{ | |||
return Textbuffer_write(&(self->topstack->textbuffer), code); | |||
} | |||
/* | |||
Write a string of text to the current textbuffer. | |||
*/ | |||
int Tokenizer_emit_text(Tokenizer* self, const char* text) | |||
{ | |||
int i = 0; | |||
while (text[i]) { | |||
if (Tokenizer_emit_char(self, text[i])) | |||
return -1; | |||
i++; | |||
} | |||
return 0; | |||
} | |||
/* | |||
Write the contents of another textbuffer to the current textbuffer, | |||
deallocating it in the process. | |||
*/ | |||
int | |||
Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer, int reverse) | |||
{ | |||
Textbuffer *original = buffer; | |||
Py_ssize_t i; | |||
if (reverse) { | |||
do { | |||
for (i = buffer->size - 1; i >= 0; i--) { | |||
if (Tokenizer_emit_char(self, buffer->data[i])) { | |||
Textbuffer_dealloc(original); | |||
return -1; | |||
} | |||
} | |||
} while ((buffer = buffer->next)); | |||
} | |||
else { | |||
while (buffer->next) | |||
buffer = buffer->next; | |||
do { | |||
for (i = 0; i < buffer->size; i++) { | |||
if (Tokenizer_emit_char(self, buffer->data[i])) { | |||
Textbuffer_dealloc(original); | |||
return -1; | |||
} | |||
} | |||
} while ((buffer = buffer->prev)); | |||
} | |||
Textbuffer_dealloc(original); | |||
return 0; | |||
} | |||
/* | |||
Write a series of tokens to the current stack at once. | |||
*/ | |||
int Tokenizer_emit_all(Tokenizer* self, PyObject* tokenlist) | |||
{ | |||
int pushed = 0; | |||
PyObject *stack, *token, *left, *right, *text; | |||
Textbuffer* buffer; | |||
Py_ssize_t size; | |||
if (PyList_GET_SIZE(tokenlist) > 0) { | |||
token = PyList_GET_ITEM(tokenlist, 0); | |||
switch (PyObject_IsInstance(token, Text)) { | |||
case 0: | |||
break; | |||
case 1: { | |||
pushed = 1; | |||
buffer = self->topstack->textbuffer; | |||
if (buffer->size == 0 && !buffer->next) | |||
break; | |||
left = Textbuffer_render(buffer); | |||
if (!left) | |||
return -1; | |||
right = PyObject_GetAttrString(token, "text"); | |||
if (!right) | |||
return -1; | |||
text = PyUnicode_Concat(left, right); | |||
Py_DECREF(left); | |||
Py_DECREF(right); | |||
if (!text) | |||
return -1; | |||
if (PyObject_SetAttrString(token, "text", text)) { | |||
Py_DECREF(text); | |||
return -1; | |||
} | |||
Py_DECREF(text); | |||
Textbuffer_dealloc(buffer); | |||
self->topstack->textbuffer = Textbuffer_new(); | |||
if (!self->topstack->textbuffer) | |||
return -1; | |||
break; | |||
} | |||
case -1: | |||
return -1; | |||
} | |||
} | |||
if (!pushed) { | |||
if (Tokenizer_push_textbuffer(self)) | |||
return -1; | |||
} | |||
stack = self->topstack->stack; | |||
size = PyList_GET_SIZE(stack); | |||
if (PyList_SetSlice(stack, size, size, tokenlist)) | |||
return -1; | |||
return 0; | |||
} | |||
/* | |||
Pop the current stack, write text, and then write the stack. 'text' is a | |||
NULL-terminated array of chars. | |||
*/ | |||
int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) | |||
{ | |||
PyObject* stack = Tokenizer_pop(self); | |||
if (Tokenizer_emit_text(self, text)) { | |||
Py_DECREF(stack); | |||
return -1; | |||
} | |||
if (stack) { | |||
if (PyList_GET_SIZE(stack) > 0) { | |||
if (Tokenizer_emit_all(self, stack)) { | |||
Py_DECREF(stack); | |||
return -1; | |||
} | |||
} | |||
Py_DECREF(stack); | |||
} | |||
self->head--; | |||
return 0; | |||
} | |||
/* | |||
Read the value at a relative point in the wikicode, forwards. | |||
*/ | |||
PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta) | |||
{ | |||
Py_ssize_t index = self->head + delta; | |||
if (index >= self->length) | |||
return EMPTY; | |||
return PyList_GET_ITEM(self->text, index); | |||
} | |||
/* | |||
Read the value at a relative point in the wikicode, backwards. | |||
*/ | |||
PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) | |||
{ | |||
Py_ssize_t index; | |||
if (delta > self->head) | |||
return EMPTY; | |||
index = self->head - delta; | |||
return PyList_GET_ITEM(self->text, index); | |||
} |
@@ -0,0 +1,66 @@ | |||
/* | |||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
the Software without restriction, including without limitation the rights to | |||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||
of the Software, and to permit persons to whom the Software is furnished to do | |||
so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
*/ | |||
#pragma once | |||
#include "common.h" | |||
/* Functions */ | |||
int Tokenizer_push(Tokenizer*, uint64_t); | |||
int Tokenizer_push_textbuffer(Tokenizer*); | |||
void Tokenizer_delete_top_of_stack(Tokenizer*); | |||
PyObject* Tokenizer_pop(Tokenizer*); | |||
PyObject* Tokenizer_pop_keeping_context(Tokenizer*); | |||
void* Tokenizer_fail_route(Tokenizer*); | |||
int Tokenizer_emit_token(Tokenizer*, PyObject*, int); | |||
int Tokenizer_emit_token_kwargs(Tokenizer*, PyObject*, PyObject*, int); | |||
int Tokenizer_emit_char(Tokenizer*, Py_UNICODE); | |||
int Tokenizer_emit_text(Tokenizer*, const char*); | |||
int Tokenizer_emit_textbuffer(Tokenizer*, Textbuffer*, int); | |||
int Tokenizer_emit_all(Tokenizer*, PyObject*); | |||
int Tokenizer_emit_text_then_stack(Tokenizer*, const char*); | |||
PyObject* Tokenizer_read(Tokenizer*, Py_ssize_t); | |||
PyObject* Tokenizer_read_backwards(Tokenizer*, Py_ssize_t); | |||
/* Macros */ | |||
#define MAX_DEPTH 40 | |||
#define MAX_CYCLES 100000 | |||
#define Tokenizer_READ(self, delta) \ | |||
(*PyUnicode_AS_UNICODE(Tokenizer_read(self, delta))) | |||
#define Tokenizer_READ_BACKWARDS(self, delta) \ | |||
(*PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, delta))) | |||
#define Tokenizer_CAN_RECURSE(self) \ | |||
(self->depth < MAX_DEPTH && self->cycles < MAX_CYCLES) | |||
#define Tokenizer_emit(self, token) \ | |||
Tokenizer_emit_token(self, token, 0) | |||
#define Tokenizer_emit_first(self, token) \ | |||
Tokenizer_emit_token(self, token, 1) | |||
#define Tokenizer_emit_kwargs(self, token, kwargs) \ | |||
Tokenizer_emit_token_kwargs(self, token, kwargs, 0) | |||
#define Tokenizer_emit_first_kwargs(self, token, kwargs) \ | |||
Tokenizer_emit_token_kwargs(self, token, kwargs, 1) |
@@ -20,256 +20,27 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
*/ | |||
#include <math.h> | |||
#pragma once | |||
#include "common.h" | |||
#include "textbuffer.h" | |||
#define DIGITS "0123456789" | |||
#define HEXDIGITS "0123456789abcdefABCDEF" | |||
#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |||
static const char MARKERS[] = { | |||
'{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/', | |||
'-', '!', '\n', '\0'}; | |||
#define NUM_MARKERS 19 | |||
#define MAX_DEPTH 40 | |||
#define MAX_CYCLES 100000 | |||
#define MAX_BRACES 255 | |||
#define MAX_ENTITY_SIZE 8 | |||
static int route_state = 0; | |||
static uint64_t route_context = 0; | |||
#define BAD_ROUTE route_state | |||
#define BAD_ROUTE_CONTEXT route_context | |||
#define FAIL_ROUTE(context) route_state = 1; route_context = context | |||
#define RESET_ROUTE() route_state = 0 | |||
static char** entitydefs; | |||
static PyObject* EMPTY; | |||
static PyObject* NOARGS; | |||
static PyObject* ParserError; | |||
static PyObject* definitions; | |||
/* Tokens: */ | |||
static PyObject* Text; | |||
static PyObject* TemplateOpen; | |||
static PyObject* TemplateParamSeparator; | |||
static PyObject* TemplateParamEquals; | |||
static PyObject* TemplateClose; | |||
static PyObject* ArgumentOpen; | |||
static PyObject* ArgumentSeparator; | |||
static PyObject* ArgumentClose; | |||
static PyObject* WikilinkOpen; | |||
static PyObject* WikilinkSeparator; | |||
static PyObject* WikilinkClose; | |||
static PyObject* ExternalLinkOpen; | |||
static PyObject* ExternalLinkSeparator; | |||
static PyObject* ExternalLinkClose; | |||
static PyObject* HTMLEntityStart; | |||
static PyObject* HTMLEntityNumeric; | |||
static PyObject* HTMLEntityHex; | |||
static PyObject* HTMLEntityEnd; | |||
static PyObject* HeadingStart; | |||
static PyObject* HeadingEnd; | |||
static PyObject* CommentStart; | |||
static PyObject* CommentEnd; | |||
static PyObject* TagOpenOpen; | |||
static PyObject* TagAttrStart; | |||
static PyObject* TagAttrEquals; | |||
static PyObject* TagAttrQuote; | |||
static PyObject* TagCloseOpen; | |||
static PyObject* TagCloseSelfclose; | |||
static PyObject* TagOpenClose; | |||
static PyObject* TagCloseClose; | |||
/* Local contexts: */ | |||
#define LC_TEMPLATE 0x0000000000000007 | |||
#define LC_TEMPLATE_NAME 0x0000000000000001 | |||
#define LC_TEMPLATE_PARAM_KEY 0x0000000000000002 | |||
#define LC_TEMPLATE_PARAM_VALUE 0x0000000000000004 | |||
#define LC_ARGUMENT 0x0000000000000018 | |||
#define LC_ARGUMENT_NAME 0x0000000000000008 | |||
#define LC_ARGUMENT_DEFAULT 0x0000000000000010 | |||
#define LC_WIKILINK 0x0000000000000060 | |||
#define LC_WIKILINK_TITLE 0x0000000000000020 | |||
#define LC_WIKILINK_TEXT 0x0000000000000040 | |||
#define LC_EXT_LINK 0x0000000000000180 | |||
#define LC_EXT_LINK_URI 0x0000000000000080 | |||
#define LC_EXT_LINK_TITLE 0x0000000000000100 | |||
#define LC_HEADING 0x0000000000007E00 | |||
#define LC_HEADING_LEVEL_1 0x0000000000000200 | |||
#define LC_HEADING_LEVEL_2 0x0000000000000400 | |||
#define LC_HEADING_LEVEL_3 0x0000000000000800 | |||
#define LC_HEADING_LEVEL_4 0x0000000000001000 | |||
#define LC_HEADING_LEVEL_5 0x0000000000002000 | |||
#define LC_HEADING_LEVEL_6 0x0000000000004000 | |||
#define LC_TAG 0x0000000000078000 | |||
#define LC_TAG_OPEN 0x0000000000008000 | |||
#define LC_TAG_ATTR 0x0000000000010000 | |||
#define LC_TAG_BODY 0x0000000000020000 | |||
#define LC_TAG_CLOSE 0x0000000000040000 | |||
#define LC_STYLE 0x0000000000780000 | |||
#define LC_STYLE_ITALICS 0x0000000000080000 | |||
#define LC_STYLE_BOLD 0x0000000000100000 | |||
#define LC_STYLE_PASS_AGAIN 0x0000000000200000 | |||
#define LC_STYLE_SECOND_PASS 0x0000000000400000 | |||
#define LC_DLTERM 0x0000000000800000 | |||
#define LC_SAFETY_CHECK 0x000000003F000000 | |||
#define LC_HAS_TEXT 0x0000000001000000 | |||
#define LC_FAIL_ON_TEXT 0x0000000002000000 | |||
#define LC_FAIL_NEXT 0x0000000004000000 | |||
#define LC_FAIL_ON_LBRACE 0x0000000008000000 | |||
#define LC_FAIL_ON_RBRACE 0x0000000010000000 | |||
#define LC_FAIL_ON_EQUALS 0x0000000020000000 | |||
#define LC_TABLE 0x0000000FC0000000 | |||
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000D00000000 | |||
#define LC_TABLE_OPEN 0x0000000040000000 | |||
#define LC_TABLE_CELL_OPEN 0x0000000080000000 | |||
#define LC_TABLE_CELL_STYLE 0x0000000100000000 | |||
#define LC_TABLE_ROW_OPEN 0x0000000200000000 | |||
#define LC_TABLE_TD_LINE 0x0000000400000000 | |||
#define LC_TABLE_TH_LINE 0x0000000800000000 | |||
/* Global contexts: */ | |||
#define GL_HEADING 0x1 | |||
/* Aggregate contexts: */ | |||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN) | |||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN) | |||
#define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | |||
#define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | |||
/* Tag contexts: */ | |||
#define TAG_NAME 0x01 | |||
#define TAG_ATTR_READY 0x02 | |||
#define TAG_ATTR_NAME 0x04 | |||
#define TAG_ATTR_VALUE 0x08 | |||
#define TAG_QUOTED 0x10 | |||
#define TAG_NOTE_SPACE 0x20 | |||
#define TAG_NOTE_EQUALS 0x40 | |||
#define TAG_NOTE_QUOTE 0x80 | |||
/* Miscellaneous structs: */ | |||
struct Stack { | |||
PyObject* stack; | |||
uint64_t context; | |||
struct Textbuffer* textbuffer; | |||
struct Stack* next; | |||
}; | |||
typedef struct { | |||
PyObject* title; | |||
int level; | |||
} HeadingData; | |||
typedef struct { | |||
uint64_t context; | |||
struct Textbuffer* pad_first; | |||
struct Textbuffer* pad_before_eq; | |||
struct Textbuffer* pad_after_eq; | |||
Py_UNICODE quoter; | |||
Py_ssize_t reset; | |||
} TagData; | |||
typedef struct Stack Stack; | |||
/* Tokenizer object definition: */ | |||
typedef struct { | |||
PyObject_HEAD | |||
PyObject* text; /* text to tokenize */ | |||
Stack* topstack; /* topmost stack */ | |||
Py_ssize_t head; /* current position in text */ | |||
Py_ssize_t length; /* length of text */ | |||
int global; /* global context */ | |||
int depth; /* stack recursion depth */ | |||
int cycles; /* total number of stack recursions */ | |||
int skip_style_tags; /* temporary fix for the sometimes broken tag parser */ | |||
} Tokenizer; | |||
/* Macros related to Tokenizer functions: */ | |||
#define Tokenizer_READ(self, delta) (*PyUnicode_AS_UNICODE(Tokenizer_read(self, delta))) | |||
#define Tokenizer_READ_BACKWARDS(self, delta) \ | |||
(*PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, delta))) | |||
#define Tokenizer_CAN_RECURSE(self) (self->depth < MAX_DEPTH && self->cycles < MAX_CYCLES) | |||
#define Tokenizer_emit(self, token) Tokenizer_emit_token(self, token, 0) | |||
#define Tokenizer_emit_first(self, token) Tokenizer_emit_token(self, token, 1) | |||
#define Tokenizer_emit_kwargs(self, token, kwargs) Tokenizer_emit_token_kwargs(self, token, kwargs, 0) | |||
#define Tokenizer_emit_first_kwargs(self, token, kwargs) Tokenizer_emit_token_kwargs(self, token, kwargs, 1) | |||
/* Macros for accessing definitions: */ | |||
#define GET_HTML_TAG(markup) (markup == ':' ? "dd" : markup == ';' ? "dt" : "li") | |||
#define IS_PARSABLE(tag) (call_def_func("is_parsable", tag, NULL, NULL)) | |||
#define IS_SINGLE(tag) (call_def_func("is_single", tag, NULL, NULL)) | |||
#define IS_SINGLE_ONLY(tag) (call_def_func("is_single_only", tag, NULL, NULL)) | |||
#define IS_SCHEME(scheme, slashes, reverse) \ | |||
(call_def_func("is_scheme", scheme, slashes ? Py_True : Py_False, reverse ? Py_True : Py_False)) | |||
/* Function prototypes: */ | |||
static TagData* TagData_new(void); | |||
static void TagData_dealloc(TagData*); | |||
/* Functions */ | |||
static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*); | |||
static void Tokenizer_dealloc(Tokenizer*); | |||
static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*); | |||
static int Tokenizer_parse_entity(Tokenizer*); | |||
static int Tokenizer_parse_comment(Tokenizer*); | |||
static int Tokenizer_handle_dl_term(Tokenizer*); | |||
static int Tokenizer_parse_tag(Tokenizer*); | |||
static PyObject* Tokenizer_parse(Tokenizer*, uint64_t, int); | |||
static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); | |||
static int load_exceptions(void); | |||
/* Macros for Python 2/3 compatibility: */ | |||
/* Compatibility macros */ | |||
#ifdef IS_PY3K | |||
#define NEW_INT_FUNC PyLong_FromSsize_t | |||
#define IMPORT_NAME_FUNC PyUnicode_FromString | |||
#define CREATE_MODULE PyModule_Create(&module_def); | |||
#define ENTITYDEFS_MODULE "html.entities" | |||
#define INIT_FUNC_NAME PyInit__tokenizer | |||
#define INIT_ERROR return NULL | |||
#else | |||
#define NEW_INT_FUNC PyInt_FromSsize_t | |||
#define IMPORT_NAME_FUNC PyBytes_FromString | |||
#define CREATE_MODULE Py_InitModule("_tokenizer", NULL); | |||
#define ENTITYDEFS_MODULE "htmlentitydefs" | |||
@@ -277,8 +48,7 @@ static int load_exceptions(void); | |||
#define INIT_ERROR return | |||
#endif | |||
/* More structs for creating the Tokenizer type: */ | |||
/* Structs */ | |||
static PyMethodDef Tokenizer_methods[] = { | |||
{"tokenize", (PyCFunction) Tokenizer_tokenize, METH_VARARGS, | |||
@@ -0,0 +1,111 @@ | |||
/* | |||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
the Software without restriction, including without limitation the rights to | |||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||
of the Software, and to permit persons to whom the Software is furnished to do | |||
so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
*/ | |||
#include "tokens.h" | |||
/* Globals */ | |||
PyObject* Text; | |||
PyObject* TemplateOpen; | |||
PyObject* TemplateParamSeparator; | |||
PyObject* TemplateParamEquals; | |||
PyObject* TemplateClose; | |||
PyObject* ArgumentOpen; | |||
PyObject* ArgumentSeparator; | |||
PyObject* ArgumentClose; | |||
PyObject* WikilinkOpen; | |||
PyObject* WikilinkSeparator; | |||
PyObject* WikilinkClose; | |||
PyObject* ExternalLinkOpen; | |||
PyObject* ExternalLinkSeparator; | |||
PyObject* ExternalLinkClose; | |||
PyObject* HTMLEntityStart; | |||
PyObject* HTMLEntityNumeric; | |||
PyObject* HTMLEntityHex; | |||
PyObject* HTMLEntityEnd; | |||
PyObject* HeadingStart; | |||
PyObject* HeadingEnd; | |||
PyObject* CommentStart; | |||
PyObject* CommentEnd; | |||
PyObject* TagOpenOpen; | |||
PyObject* TagAttrStart; | |||
PyObject* TagAttrEquals; | |||
PyObject* TagAttrQuote; | |||
PyObject* TagCloseOpen; | |||
PyObject* TagCloseSelfclose; | |||
PyObject* TagOpenClose; | |||
PyObject* TagCloseClose; | |||
/* | |||
Load individual tokens into globals from the given Python module object. | |||
*/ | |||
void load_tokens_from_module(PyObject* module) | |||
{ | |||
Text = PyObject_GetAttrString(module, "Text"); | |||
TemplateOpen = PyObject_GetAttrString(module, "TemplateOpen"); | |||
TemplateParamSeparator = PyObject_GetAttrString(module, | |||
"TemplateParamSeparator"); | |||
TemplateParamEquals = PyObject_GetAttrString(module, | |||
"TemplateParamEquals"); | |||
TemplateClose = PyObject_GetAttrString(module, "TemplateClose"); | |||
ArgumentOpen = PyObject_GetAttrString(module, "ArgumentOpen"); | |||
ArgumentSeparator = PyObject_GetAttrString(module, "ArgumentSeparator"); | |||
ArgumentClose = PyObject_GetAttrString(module, "ArgumentClose"); | |||
WikilinkOpen = PyObject_GetAttrString(module, "WikilinkOpen"); | |||
WikilinkSeparator = PyObject_GetAttrString(module, "WikilinkSeparator"); | |||
WikilinkClose = PyObject_GetAttrString(module, "WikilinkClose"); | |||
ExternalLinkOpen = PyObject_GetAttrString(module, "ExternalLinkOpen"); | |||
ExternalLinkSeparator = PyObject_GetAttrString(module, | |||
"ExternalLinkSeparator"); | |||
ExternalLinkClose = PyObject_GetAttrString(module, "ExternalLinkClose"); | |||
HTMLEntityStart = PyObject_GetAttrString(module, "HTMLEntityStart"); | |||
HTMLEntityNumeric = PyObject_GetAttrString(module, "HTMLEntityNumeric"); | |||
HTMLEntityHex = PyObject_GetAttrString(module, "HTMLEntityHex"); | |||
HTMLEntityEnd = PyObject_GetAttrString(module, "HTMLEntityEnd"); | |||
HeadingStart = PyObject_GetAttrString(module, "HeadingStart"); | |||
HeadingEnd = PyObject_GetAttrString(module, "HeadingEnd"); | |||
CommentStart = PyObject_GetAttrString(module, "CommentStart"); | |||
CommentEnd = PyObject_GetAttrString(module, "CommentEnd"); | |||
TagOpenOpen = PyObject_GetAttrString(module, "TagOpenOpen"); | |||
TagAttrStart = PyObject_GetAttrString(module, "TagAttrStart"); | |||
TagAttrEquals = PyObject_GetAttrString(module, "TagAttrEquals"); | |||
TagAttrQuote = PyObject_GetAttrString(module, "TagAttrQuote"); | |||
TagCloseOpen = PyObject_GetAttrString(module, "TagCloseOpen"); | |||
TagCloseSelfclose = PyObject_GetAttrString(module, "TagCloseSelfclose"); | |||
TagOpenClose = PyObject_GetAttrString(module, "TagOpenClose"); | |||
TagCloseClose = PyObject_GetAttrString(module, "TagCloseClose"); | |||
} |
@@ -0,0 +1,69 @@ | |||
/* | |||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
the Software without restriction, including without limitation the rights to | |||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |||
of the Software, and to permit persons to whom the Software is furnished to do | |||
so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
*/ | |||
#pragma once | |||
#include "common.h" | |||
/* Token globals */ | |||
extern PyObject* Text; | |||
extern PyObject* TemplateOpen; | |||
extern PyObject* TemplateParamSeparator; | |||
extern PyObject* TemplateParamEquals; | |||
extern PyObject* TemplateClose; | |||
extern PyObject* ArgumentOpen; | |||
extern PyObject* ArgumentSeparator; | |||
extern PyObject* ArgumentClose; | |||
extern PyObject* WikilinkOpen; | |||
extern PyObject* WikilinkSeparator; | |||
extern PyObject* WikilinkClose; | |||
extern PyObject* ExternalLinkOpen; | |||
extern PyObject* ExternalLinkSeparator; | |||
extern PyObject* ExternalLinkClose; | |||
extern PyObject* HTMLEntityStart; | |||
extern PyObject* HTMLEntityNumeric; | |||
extern PyObject* HTMLEntityHex; | |||
extern PyObject* HTMLEntityEnd; | |||
extern PyObject* HeadingStart; | |||
extern PyObject* HeadingEnd; | |||
extern PyObject* CommentStart; | |||
extern PyObject* CommentEnd; | |||
extern PyObject* TagOpenOpen; | |||
extern PyObject* TagAttrStart; | |||
extern PyObject* TagAttrEquals; | |||
extern PyObject* TagAttrQuote; | |||
extern PyObject* TagCloseOpen; | |||
extern PyObject* TagCloseSelfclose; | |||
extern PyObject* TagOpenClose; | |||
extern PyObject* TagCloseClose; | |||
/* Functions */ | |||
void load_tokens_from_module(PyObject*); |