Browse Source

tag_defs.py -> definitions.py; more outline stuff

tags/v0.3
Ben Kurtovic 10 years ago
parent
commit
5e6e5b6301
5 changed files with 44 additions and 25 deletions
  1. +1
    -1
      mwparserfromhell/definitions.py
  2. +3
    -3
      mwparserfromhell/nodes/tag.py
  3. +8
    -8
      mwparserfromhell/parser/tokenizer.c
  4. +4
    -4
      mwparserfromhell/parser/tokenizer.h
  5. +28
    -9
      mwparserfromhell/parser/tokenizer.py

mwparserfromhell/tag_defs.py → mwparserfromhell/definitions.py View File

@@ -20,7 +20,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.


"""Contains data regarding certain HTML tags."""
"""Contains data about certain markup, like HTML tags and external links."""


from __future__ import unicode_literals from __future__ import unicode_literals



+ 3
- 3
mwparserfromhell/nodes/tag.py View File

@@ -25,7 +25,7 @@ from __future__ import unicode_literals
from . import Node, Text from . import Node, Text
from .extras import Attribute from .extras import Attribute
from ..compat import str from ..compat import str
from ..tag_defs import is_visible
from ..definitions import is_visible
from ..utils import parse_anything from ..utils import parse_anything


__all__ = ["Tag"] __all__ = ["Tag"]
@@ -152,7 +152,7 @@ class Tag(Node):
This makes the tag look like a lone close tag. It is technically This makes the tag look like a lone close tag. It is technically
invalid and is only parsable Wikicode when the tag itself is invalid and is only parsable Wikicode when the tag itself is
single-only, like ``<br>`` and ``<img>``. See single-only, like ``<br>`` and ``<img>``. See
:py:func:`.tag_defs.is_single_only`.
:py:func:`.definitions.is_single_only`.
""" """
return self._invalid return self._invalid


@@ -161,7 +161,7 @@ class Tag(Node):
"""Whether the tag is implicitly self-closing, with no ending slash. """Whether the tag is implicitly self-closing, with no ending slash.


This is only possible for specific "single" tags like ``<br>`` and This is only possible for specific "single" tags like ``<br>`` and
``<li>``. See :py:func:`.tag_defs.is_single`. This field only has an
``<li>``. See :py:func:`.definitions.is_single`. This field only has an
effect if :py:attr:`self_closing` is also ``True``. effect if :py:attr:`self_closing` is also ``True``.
""" """
return self._implicit return self._implicit


+ 8
- 8
mwparserfromhell/parser/tokenizer.c View File

@@ -37,12 +37,12 @@ static int heading_level_from_context(int n)
} }


/* /*
Call the given function in tag_defs, using 'tag' as a parameter, and return
its output as a bool.
Call the given function in definitions.py, using 'tag' as a parameter, and
return its output as a bool.
*/ */
static int call_tag_def_func(const char* funcname, PyObject* tag)
static int call_def_func(const char* funcname, PyObject* tag)
{ {
PyObject* func = PyObject_GetAttrString(tag_defs, funcname);
PyObject* func = PyObject_GetAttrString(definitions, funcname);
PyObject* result = PyObject_CallFunctionObjArgs(func, tag, NULL); PyObject* result = PyObject_CallFunctionObjArgs(func, tag, NULL);
int ans = (result == Py_True) ? 1 : 0; int ans = (result == Py_True) ? 1 : 0;


@@ -2416,13 +2416,13 @@ static int load_tokens(void)
return 0; return 0;
} }


static int load_tag_defs(void)
static int load_definitions(void)
{ {
PyObject *tempmod, PyObject *tempmod,
*globals = PyEval_GetGlobals(), *globals = PyEval_GetGlobals(),
*locals = PyEval_GetLocals(), *locals = PyEval_GetLocals(),
*fromlist = PyList_New(1), *fromlist = PyList_New(1),
*modname = IMPORT_NAME_FUNC("tag_defs");
*modname = IMPORT_NAME_FUNC("definitions");
char *name = "mwparserfromhell"; char *name = "mwparserfromhell";


if (!fromlist || !modname) if (!fromlist || !modname)
@@ -2432,7 +2432,7 @@ static int load_tag_defs(void)
Py_DECREF(fromlist); Py_DECREF(fromlist);
if (!tempmod) if (!tempmod)
return -1; return -1;
tag_defs = PyObject_GetAttrString(tempmod, "tag_defs");
definitions = PyObject_GetAttrString(tempmod, "definitions");
Py_DECREF(tempmod); Py_DECREF(tempmod);
return 0; return 0;
} }
@@ -2455,7 +2455,7 @@ PyMODINIT_FUNC INIT_FUNC_NAME(void)
NOARGS = PyTuple_New(0); NOARGS = PyTuple_New(0);
if (!EMPTY || !NOARGS) if (!EMPTY || !NOARGS)
INIT_ERROR; INIT_ERROR;
if (load_entitydefs() || load_tokens() || load_tag_defs())
if (load_entitydefs() || load_tokens() || load_definitions())
INIT_ERROR; INIT_ERROR;
#ifdef IS_PY3K #ifdef IS_PY3K
return module; return module;


+ 4
- 4
mwparserfromhell/parser/tokenizer.h View File

@@ -62,7 +62,7 @@ static char** entitydefs;


static PyObject* EMPTY; static PyObject* EMPTY;
static PyObject* NOARGS; static PyObject* NOARGS;
static PyObject* tag_defs;
static PyObject* definitions;




/* Tokens: */ /* Tokens: */
@@ -241,9 +241,9 @@ typedef struct {
/* Macros for accessing HTML tag definitions: */ /* Macros for accessing HTML tag definitions: */


#define GET_HTML_TAG(markup) (markup == *":" ? "dd" : markup == *";" ? "dt" : "li") #define GET_HTML_TAG(markup) (markup == *":" ? "dd" : markup == *";" ? "dt" : "li")
#define IS_PARSABLE(tag) (call_tag_def_func("is_parsable", tag))
#define IS_SINGLE(tag) (call_tag_def_func("is_single", tag))
#define IS_SINGLE_ONLY(tag) (call_tag_def_func("is_single_only", tag))
#define IS_PARSABLE(tag) (call_def_func("is_parsable", tag))
#define IS_SINGLE(tag) (call_def_func("is_single", tag))
#define IS_SINGLE_ONLY(tag) (call_def_func("is_single_only", tag))




/* Function prototypes: */ /* Function prototypes: */


+ 28
- 9
mwparserfromhell/parser/tokenizer.py View File

@@ -26,7 +26,7 @@ import re


from . import contexts, tokens from . import contexts, tokens
from ..compat import htmlentities from ..compat import htmlentities
from ..tag_defs import get_html_tag, is_parsable, is_single, is_single_only
from ..definitions import get_html_tag, is_parsable, is_single, is_single_only


__all__ = ["Tokenizer"] __all__ = ["Tokenizer"]


@@ -60,7 +60,7 @@ class Tokenizer(object):
START = object() START = object()
END = object() END = object()
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";", MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";",
":", "/", "-", "\n", END]
":", "/", "-", "\n", START, END]
MAX_DEPTH = 40 MAX_DEPTH = 40
MAX_CYCLES = 100000 MAX_CYCLES = 100000
regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE) regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE)
@@ -311,10 +311,30 @@ class Tokenizer(object):
self._head += 1 self._head += 1
return self._pop() return self._pop()


def _really_parse_external_link(self, brackets):
"""Really parse an external link."""
# link = self._parse(contexts.EXT_LINK_URL)
raise BadRoute()

def _parse_external_link(self, brackets): def _parse_external_link(self, brackets):
"""Parse an external link at the head of the wikicode string.""" """Parse an external link at the head of the wikicode string."""
self._emit_text(self._read())
# raise NotImplementedError()
reset = self._head
self._head += 1
try:
bad_context = self._context & contexts.INVALID_LINK
if bad_context or not self._can_recurse():
raise BadRoute()
link = self._really_parse_external_link(brackets)
except BadRoute:
self._head = reset
if not brackets and self._context & contexts.DL_TERM:
self._handle_dl_term()
else:
self._emit_text(self._read())
else:
self._emit(tokens.ExternalLinkOpen(brackets))
self._emit_all(link)
self._emit(tokens.ExternalLinkClose())


def _parse_heading(self): def _parse_heading(self):
"""Parse a section heading at the head of the wikicode string.""" """Parse a section heading at the head of the wikicode string."""
@@ -912,11 +932,10 @@ class Tokenizer(object):
self._handle_wikilink_separator() self._handle_wikilink_separator()
elif this == next == "]" and self._context & contexts.WIKILINK: elif this == next == "]" and self._context & contexts.WIKILINK:
return self._handle_wikilink_end() return self._handle_wikilink_end()
elif this == "[" and not self._context & contexts.INVALID_LINK: ## or this == ":"
if self._can_recurse():
self._parse_external_link(brackets=this == "[")
else:
self._emit_text("[")
elif this == "[":
self._parse_external_link(True)
elif this == ":" and self._read(-1) not in self.MARKERS:
self._parse_external_link(False)
elif this == "=" and not self._global & contexts.GL_HEADING: elif this == "=" and not self._global & contexts.GL_HEADING:
if self._read(-1) in ("\n", self.START): if self._read(-1) in ("\n", self.START):
self._parse_heading() self._parse_heading()


Loading…
Cancel
Save