Browse Source

tag_defs.py -> definitions.py; more outline stuff

tags/v0.3
Ben Kurtovic 10 years ago
parent
commit
5e6e5b6301
5 changed files with 44 additions and 25 deletions
  1. +1
    -1
      mwparserfromhell/definitions.py
  2. +3
    -3
      mwparserfromhell/nodes/tag.py
  3. +8
    -8
      mwparserfromhell/parser/tokenizer.c
  4. +4
    -4
      mwparserfromhell/parser/tokenizer.h
  5. +28
    -9
      mwparserfromhell/parser/tokenizer.py

mwparserfromhell/tag_defs.py → mwparserfromhell/definitions.py View File

@@ -20,7 +20,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""Contains data regarding certain HTML tags."""
"""Contains data about certain markup, like HTML tags and external links."""

from __future__ import unicode_literals


+ 3
- 3
mwparserfromhell/nodes/tag.py View File

@@ -25,7 +25,7 @@ from __future__ import unicode_literals
from . import Node, Text
from .extras import Attribute
from ..compat import str
from ..tag_defs import is_visible
from ..definitions import is_visible
from ..utils import parse_anything

__all__ = ["Tag"]
@@ -152,7 +152,7 @@ class Tag(Node):
This makes the tag look like a lone close tag. It is technically
invalid and is only parsable Wikicode when the tag itself is
single-only, like ``<br>`` and ``<img>``. See
:py:func:`.tag_defs.is_single_only`.
:py:func:`.definitions.is_single_only`.
"""
return self._invalid

@@ -161,7 +161,7 @@ class Tag(Node):
"""Whether the tag is implicitly self-closing, with no ending slash.

This is only possible for specific "single" tags like ``<br>`` and
``<li>``. See :py:func:`.tag_defs.is_single`. This field only has an
``<li>``. See :py:func:`.definitions.is_single`. This field only has an
effect if :py:attr:`self_closing` is also ``True``.
"""
return self._implicit


+ 8
- 8
mwparserfromhell/parser/tokenizer.c View File

@@ -37,12 +37,12 @@ static int heading_level_from_context(int n)
}

/*
Call the given function in tag_defs, using 'tag' as a parameter, and return
its output as a bool.
Call the given function in definitions.py, using 'tag' as a parameter, and
return its output as a bool.
*/
static int call_tag_def_func(const char* funcname, PyObject* tag)
static int call_def_func(const char* funcname, PyObject* tag)
{
PyObject* func = PyObject_GetAttrString(tag_defs, funcname);
PyObject* func = PyObject_GetAttrString(definitions, funcname);
PyObject* result = PyObject_CallFunctionObjArgs(func, tag, NULL);
int ans = (result == Py_True) ? 1 : 0;

@@ -2416,13 +2416,13 @@ static int load_tokens(void)
return 0;
}

static int load_tag_defs(void)
static int load_definitions(void)
{
PyObject *tempmod,
*globals = PyEval_GetGlobals(),
*locals = PyEval_GetLocals(),
*fromlist = PyList_New(1),
*modname = IMPORT_NAME_FUNC("tag_defs");
*modname = IMPORT_NAME_FUNC("definitions");
char *name = "mwparserfromhell";

if (!fromlist || !modname)
@@ -2432,7 +2432,7 @@ static int load_tag_defs(void)
Py_DECREF(fromlist);
if (!tempmod)
return -1;
tag_defs = PyObject_GetAttrString(tempmod, "tag_defs");
definitions = PyObject_GetAttrString(tempmod, "definitions");
Py_DECREF(tempmod);
return 0;
}
@@ -2455,7 +2455,7 @@ PyMODINIT_FUNC INIT_FUNC_NAME(void)
NOARGS = PyTuple_New(0);
if (!EMPTY || !NOARGS)
INIT_ERROR;
if (load_entitydefs() || load_tokens() || load_tag_defs())
if (load_entitydefs() || load_tokens() || load_definitions())
INIT_ERROR;
#ifdef IS_PY3K
return module;


+ 4
- 4
mwparserfromhell/parser/tokenizer.h View File

@@ -62,7 +62,7 @@ static char** entitydefs;

static PyObject* EMPTY;
static PyObject* NOARGS;
static PyObject* tag_defs;
static PyObject* definitions;


/* Tokens: */
@@ -241,9 +241,9 @@ typedef struct {
/* Macros for accessing HTML tag definitions: */

#define GET_HTML_TAG(markup) (markup == *":" ? "dd" : markup == *";" ? "dt" : "li")
#define IS_PARSABLE(tag) (call_tag_def_func("is_parsable", tag))
#define IS_SINGLE(tag) (call_tag_def_func("is_single", tag))
#define IS_SINGLE_ONLY(tag) (call_tag_def_func("is_single_only", tag))
#define IS_PARSABLE(tag) (call_def_func("is_parsable", tag))
#define IS_SINGLE(tag) (call_def_func("is_single", tag))
#define IS_SINGLE_ONLY(tag) (call_def_func("is_single_only", tag))


/* Function prototypes: */


+ 28
- 9
mwparserfromhell/parser/tokenizer.py View File

@@ -26,7 +26,7 @@ import re

from . import contexts, tokens
from ..compat import htmlentities
from ..tag_defs import get_html_tag, is_parsable, is_single, is_single_only
from ..definitions import get_html_tag, is_parsable, is_single, is_single_only

__all__ = ["Tokenizer"]

@@ -60,7 +60,7 @@ class Tokenizer(object):
START = object()
END = object()
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";",
":", "/", "-", "\n", END]
":", "/", "-", "\n", START, END]
MAX_DEPTH = 40
MAX_CYCLES = 100000
regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE)
@@ -311,10 +311,30 @@ class Tokenizer(object):
self._head += 1
return self._pop()

def _really_parse_external_link(self, brackets):
"""Really parse an external link."""
# link = self._parse(contexts.EXT_LINK_URL)
raise BadRoute()

def _parse_external_link(self, brackets):
"""Parse an external link at the head of the wikicode string."""
self._emit_text(self._read())
# raise NotImplementedError()
reset = self._head
self._head += 1
try:
bad_context = self._context & contexts.INVALID_LINK
if bad_context or not self._can_recurse():
raise BadRoute()
link = self._really_parse_external_link(brackets)
except BadRoute:
self._head = reset
if not brackets and self._context & contexts.DL_TERM:
self._handle_dl_term()
else:
self._emit_text(self._read())
else:
self._emit(tokens.ExternalLinkOpen(brackets))
self._emit_all(link)
self._emit(tokens.ExternalLinkClose())

def _parse_heading(self):
"""Parse a section heading at the head of the wikicode string."""
@@ -912,11 +932,10 @@ class Tokenizer(object):
self._handle_wikilink_separator()
elif this == next == "]" and self._context & contexts.WIKILINK:
return self._handle_wikilink_end()
elif this == "[" and not self._context & contexts.INVALID_LINK: ## or this == ":"
if self._can_recurse():
self._parse_external_link(brackets=this == "[")
else:
self._emit_text("[")
elif this == "[":
self._parse_external_link(True)
elif this == ":" and self._read(-1) not in self.MARKERS:
self._parse_external_link(False)
elif this == "=" and not self._global & contexts.GL_HEADING:
if self._read(-1) in ("\n", self.START):
self._parse_heading()


Loading…
Cancel
Save