diff --git a/mwparserfromhell/tag_defs.py b/mwparserfromhell/definitions.py
similarity index 97%
rename from mwparserfromhell/tag_defs.py
rename to mwparserfromhell/definitions.py
index 2395fc6..2d7ab0c 100644
--- a/mwparserfromhell/tag_defs.py
+++ b/mwparserfromhell/definitions.py
@@ -20,7 +20,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
-"""Contains data regarding certain HTML tags."""
+"""Contains data about certain markup, like HTML tags and external links."""
from __future__ import unicode_literals
diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py
index b4aec3e..80b8a88 100644
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -25,7 +25,7 @@ from __future__ import unicode_literals
from . import Node, Text
from .extras import Attribute
from ..compat import str
-from ..tag_defs import is_visible
+from ..definitions import is_visible
from ..utils import parse_anything
__all__ = ["Tag"]
@@ -152,7 +152,7 @@ class Tag(Node):
This makes the tag look like a lone close tag. It is technically
invalid and is only parsable Wikicode when the tag itself is
single-only, like ``
`` and ````. See
- :py:func:`.tag_defs.is_single_only`.
+ :py:func:`.definitions.is_single_only`.
"""
return self._invalid
@@ -161,7 +161,7 @@ class Tag(Node):
"""Whether the tag is implicitly self-closing, with no ending slash.
This is only possible for specific "single" tags like ``
`` and
- ``
``. See :py:func:`.tag_defs.is_single`. This field only has an
+ ````. See :py:func:`.definitions.is_single`. This field only has an
effect if :py:attr:`self_closing` is also ``True``.
"""
return self._implicit
diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 267e7c5..2b74f6b 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -37,12 +37,12 @@ static int heading_level_from_context(int n)
}
/*
- Call the given function in tag_defs, using 'tag' as a parameter, and return
- its output as a bool.
+ Call the given function in definitions.py, using 'tag' as a parameter, and
+ return its output as a bool.
*/
-static int call_tag_def_func(const char* funcname, PyObject* tag)
+static int call_def_func(const char* funcname, PyObject* tag)
{
- PyObject* func = PyObject_GetAttrString(tag_defs, funcname);
+ PyObject* func = PyObject_GetAttrString(definitions, funcname);
PyObject* result = PyObject_CallFunctionObjArgs(func, tag, NULL);
int ans = (result == Py_True) ? 1 : 0;
@@ -2416,13 +2416,13 @@ static int load_tokens(void)
return 0;
}
-static int load_tag_defs(void)
+static int load_definitions(void)
{
PyObject *tempmod,
*globals = PyEval_GetGlobals(),
*locals = PyEval_GetLocals(),
*fromlist = PyList_New(1),
- *modname = IMPORT_NAME_FUNC("tag_defs");
+ *modname = IMPORT_NAME_FUNC("definitions");
char *name = "mwparserfromhell";
if (!fromlist || !modname)
@@ -2432,7 +2432,7 @@ static int load_tag_defs(void)
Py_DECREF(fromlist);
if (!tempmod)
return -1;
- tag_defs = PyObject_GetAttrString(tempmod, "tag_defs");
+ definitions = PyObject_GetAttrString(tempmod, "definitions");
Py_DECREF(tempmod);
return 0;
}
@@ -2455,7 +2455,7 @@ PyMODINIT_FUNC INIT_FUNC_NAME(void)
NOARGS = PyTuple_New(0);
if (!EMPTY || !NOARGS)
INIT_ERROR;
- if (load_entitydefs() || load_tokens() || load_tag_defs())
+ if (load_entitydefs() || load_tokens() || load_definitions())
INIT_ERROR;
#ifdef IS_PY3K
return module;
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 16c76eb..41c1c1b 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -62,7 +62,7 @@ static char** entitydefs;
static PyObject* EMPTY;
static PyObject* NOARGS;
-static PyObject* tag_defs;
+static PyObject* definitions;
/* Tokens: */
@@ -241,9 +241,9 @@ typedef struct {
/* Macros for accessing HTML tag definitions: */
#define GET_HTML_TAG(markup) (markup == *":" ? "dd" : markup == *";" ? "dt" : "li")
-#define IS_PARSABLE(tag) (call_tag_def_func("is_parsable", tag))
-#define IS_SINGLE(tag) (call_tag_def_func("is_single", tag))
-#define IS_SINGLE_ONLY(tag) (call_tag_def_func("is_single_only", tag))
+#define IS_PARSABLE(tag) (call_def_func("is_parsable", tag))
+#define IS_SINGLE(tag) (call_def_func("is_single", tag))
+#define IS_SINGLE_ONLY(tag) (call_def_func("is_single_only", tag))
/* Function prototypes: */
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 9f675ac..07ae0b1 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -26,7 +26,7 @@ import re
from . import contexts, tokens
from ..compat import htmlentities
-from ..tag_defs import get_html_tag, is_parsable, is_single, is_single_only
+from ..definitions import get_html_tag, is_parsable, is_single, is_single_only
__all__ = ["Tokenizer"]
@@ -60,7 +60,7 @@ class Tokenizer(object):
START = object()
END = object()
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";",
- ":", "/", "-", "\n", END]
+ ":", "/", "-", "\n", START, END]
MAX_DEPTH = 40
MAX_CYCLES = 100000
regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE)
@@ -311,10 +311,30 @@ class Tokenizer(object):
self._head += 1
return self._pop()
+ def _really_parse_external_link(self, brackets):
+ """Really parse an external link."""
+ # link = self._parse(contexts.EXT_LINK_URL)
+ raise BadRoute()
+
def _parse_external_link(self, brackets):
"""Parse an external link at the head of the wikicode string."""
- self._emit_text(self._read())
- # raise NotImplementedError()
+ reset = self._head
+ self._head += 1
+ try:
+ bad_context = self._context & contexts.INVALID_LINK
+ if bad_context or not self._can_recurse():
+ raise BadRoute()
+ link = self._really_parse_external_link(brackets)
+ except BadRoute:
+ self._head = reset
+ if not brackets and self._context & contexts.DL_TERM:
+ self._handle_dl_term()
+ else:
+ self._emit_text(self._read())
+ else:
+ self._emit(tokens.ExternalLinkOpen(brackets))
+ self._emit_all(link)
+ self._emit(tokens.ExternalLinkClose())
def _parse_heading(self):
"""Parse a section heading at the head of the wikicode string."""
@@ -912,11 +932,10 @@ class Tokenizer(object):
self._handle_wikilink_separator()
elif this == next == "]" and self._context & contexts.WIKILINK:
return self._handle_wikilink_end()
- elif this == "[" and not self._context & contexts.INVALID_LINK: ## or this == ":"
- if self._can_recurse():
- self._parse_external_link(brackets=this == "[")
- else:
- self._emit_text("[")
+ elif this == "[":
+ self._parse_external_link(True)
+ elif this == ":" and self._read(-1) not in self.MARKERS:
+ self._parse_external_link(False)
elif this == "=" and not self._global & contexts.GL_HEADING:
if self._read(-1) in ("\n", self.START):
self._parse_heading()