From 5e6e5b6301f5f50ca8585a5b73f72af49898cdf2 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 21 Aug 2013 01:07:32 -0400 Subject: [PATCH] tag_defs.py -> definitions.py; more outline stuff --- mwparserfromhell/{tag_defs.py => definitions.py} | 2 +- mwparserfromhell/nodes/tag.py | 6 ++-- mwparserfromhell/parser/tokenizer.c | 16 +++++----- mwparserfromhell/parser/tokenizer.h | 8 ++--- mwparserfromhell/parser/tokenizer.py | 37 ++++++++++++++++++------ 5 files changed, 44 insertions(+), 25 deletions(-) rename mwparserfromhell/{tag_defs.py => definitions.py} (97%) diff --git a/mwparserfromhell/tag_defs.py b/mwparserfromhell/definitions.py similarity index 97% rename from mwparserfromhell/tag_defs.py rename to mwparserfromhell/definitions.py index 2395fc6..2d7ab0c 100644 --- a/mwparserfromhell/tag_defs.py +++ b/mwparserfromhell/definitions.py @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -"""Contains data regarding certain HTML tags.""" +"""Contains data about certain markup, like HTML tags and external links.""" from __future__ import unicode_literals diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index b4aec3e..80b8a88 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -25,7 +25,7 @@ from __future__ import unicode_literals from . import Node, Text from .extras import Attribute from ..compat import str -from ..tag_defs import is_visible +from ..definitions import is_visible from ..utils import parse_anything __all__ = ["Tag"] @@ -152,7 +152,7 @@ class Tag(Node): This makes the tag look like a lone close tag. It is technically invalid and is only parsable Wikicode when the tag itself is single-only, like ``
`` and ````. See - :py:func:`.tag_defs.is_single_only`. + :py:func:`.definitions.is_single_only`. """ return self._invalid @@ -161,7 +161,7 @@ class Tag(Node): """Whether the tag is implicitly self-closing, with no ending slash. This is only possible for specific "single" tags like ``
`` and - ``
  • ``. See :py:func:`.tag_defs.is_single`. This field only has an + ``
  • ``. See :py:func:`.definitions.is_single`. This field only has an effect if :py:attr:`self_closing` is also ``True``. """ return self._implicit diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 267e7c5..2b74f6b 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -37,12 +37,12 @@ static int heading_level_from_context(int n) } /* - Call the given function in tag_defs, using 'tag' as a parameter, and return - its output as a bool. + Call the given function in definitions.py, using 'tag' as a parameter, and + return its output as a bool. */ -static int call_tag_def_func(const char* funcname, PyObject* tag) +static int call_def_func(const char* funcname, PyObject* tag) { - PyObject* func = PyObject_GetAttrString(tag_defs, funcname); + PyObject* func = PyObject_GetAttrString(definitions, funcname); PyObject* result = PyObject_CallFunctionObjArgs(func, tag, NULL); int ans = (result == Py_True) ? 1 : 0; @@ -2416,13 +2416,13 @@ static int load_tokens(void) return 0; } -static int load_tag_defs(void) +static int load_definitions(void) { PyObject *tempmod, *globals = PyEval_GetGlobals(), *locals = PyEval_GetLocals(), *fromlist = PyList_New(1), - *modname = IMPORT_NAME_FUNC("tag_defs"); + *modname = IMPORT_NAME_FUNC("definitions"); char *name = "mwparserfromhell"; if (!fromlist || !modname) @@ -2432,7 +2432,7 @@ static int load_tag_defs(void) Py_DECREF(fromlist); if (!tempmod) return -1; - tag_defs = PyObject_GetAttrString(tempmod, "tag_defs"); + definitions = PyObject_GetAttrString(tempmod, "definitions"); Py_DECREF(tempmod); return 0; } @@ -2455,7 +2455,7 @@ PyMODINIT_FUNC INIT_FUNC_NAME(void) NOARGS = PyTuple_New(0); if (!EMPTY || !NOARGS) INIT_ERROR; - if (load_entitydefs() || load_tokens() || load_tag_defs()) + if (load_entitydefs() || load_tokens() || load_definitions()) INIT_ERROR; #ifdef IS_PY3K return module; diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index 16c76eb..41c1c1b 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -62,7 +62,7 @@ static char** entitydefs; static PyObject* EMPTY; static PyObject* NOARGS; -static PyObject* tag_defs; +static PyObject* definitions; /* Tokens: */ @@ -241,9 +241,9 @@ typedef struct { /* Macros for accessing HTML tag definitions: */ #define GET_HTML_TAG(markup) (markup == *":" ? "dd" : markup == *";" ? "dt" : "li") -#define IS_PARSABLE(tag) (call_tag_def_func("is_parsable", tag)) -#define IS_SINGLE(tag) (call_tag_def_func("is_single", tag)) -#define IS_SINGLE_ONLY(tag) (call_tag_def_func("is_single_only", tag)) +#define IS_PARSABLE(tag) (call_def_func("is_parsable", tag)) +#define IS_SINGLE(tag) (call_def_func("is_single", tag)) +#define IS_SINGLE_ONLY(tag) (call_def_func("is_single_only", tag)) /* Function prototypes: */ diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 9f675ac..07ae0b1 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -26,7 +26,7 @@ import re from . import contexts, tokens from ..compat import htmlentities -from ..tag_defs import get_html_tag, is_parsable, is_single, is_single_only +from ..definitions import get_html_tag, is_parsable, is_single, is_single_only __all__ = ["Tokenizer"] @@ -60,7 +60,7 @@ class Tokenizer(object): START = object() END = object() MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";", - ":", "/", "-", "\n", END] + ":", "/", "-", "\n", START, END] MAX_DEPTH = 40 MAX_CYCLES = 100000 regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE) @@ -311,10 +311,30 @@ class Tokenizer(object): self._head += 1 return self._pop() + def _really_parse_external_link(self, brackets): + """Really parse an external link.""" + # link = self._parse(contexts.EXT_LINK_URL) + raise BadRoute() + def _parse_external_link(self, brackets): """Parse an external link at the head of the wikicode string.""" - self._emit_text(self._read()) - # raise NotImplementedError() + reset = self._head + self._head += 1 + try: + bad_context = self._context & contexts.INVALID_LINK + if bad_context or not self._can_recurse(): + raise BadRoute() + link = self._really_parse_external_link(brackets) + except BadRoute: + self._head = reset + if not brackets and self._context & contexts.DL_TERM: + self._handle_dl_term() + else: + self._emit_text(self._read()) + else: + self._emit(tokens.ExternalLinkOpen(brackets)) + self._emit_all(link) + self._emit(tokens.ExternalLinkClose()) def _parse_heading(self): """Parse a section heading at the head of the wikicode string.""" @@ -912,11 +932,10 @@ class Tokenizer(object): self._handle_wikilink_separator() elif this == next == "]" and self._context & contexts.WIKILINK: return self._handle_wikilink_end() - elif this == "[" and not self._context & contexts.INVALID_LINK: ## or this == ":" - if self._can_recurse(): - self._parse_external_link(brackets=this == "[") - else: - self._emit_text("[") + elif this == "[": + self._parse_external_link(True) + elif this == ":" and self._read(-1) not in self.MARKERS: + self._parse_external_link(False) elif this == "=" and not self._global & contexts.GL_HEADING: if self._read(-1) in ("\n", self.START): self._parse_heading()