diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 1e1e0f0..5e4ebc2 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -20,14 +20,14 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals - """ `mwparserfromhell `_ (the MediaWiki Parser from Hell) is a Python package that provides an easy-to-use and outrageously powerful parser for `MediaWiki `_ wikicode. """ +from __future__ import unicode_literals + __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012 Ben Kurtovic" __license__ = "MIT License" diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index 9fb2562..ddd11a0 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -29,6 +29,7 @@ __all__ = ["Heading"] class Heading(Node): """Represents a section heading in wikicode, like ``== Foo ==``.""" + def __init__(self, title, level): super(Heading, self).__init__() self._title = title diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index d46c3d3..a488a4c 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -29,6 +29,7 @@ __all__ = ["HTMLEntity"] class HTMLEntity(Node): """Represents an HTML entity, like `` ``, either named or unnamed.""" + def __init__(self, value, named=None, hexadecimal=False, hex_char="x"): super(HTMLEntity, self).__init__() self._value = value diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 765fc31..68ea833 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -34,6 +34,8 @@ __all__ = ["Template"] FLAGS = re.DOTALL | re.UNICODE class Template(Node): + """Represents a template in wikicode, like ``{{foo}}``.""" + def __init__(self, name, params=None): super(Template, self).__init__() self._name = name @@ -73,28 +75,56 @@ class Template(Node): write("}}") def _surface_escape(self, code, char): + """Return *code* with *char* escaped as an HTML entity. + + The main use of this is to escape pipes (``|``) or equal signs (``=``) + in parameter names or values so they are not mistaken for new + parameters. + """ replacement = HTMLEntity(value=ord(char)) for node in code.filter_text(recursive=False): if char in node: code.replace(node, node.replace(char, replacement)) def _blank_param_value(self, value): + """Remove the content from *value* while keeping its whitespace. + + Replace *value*\ 's nodes with two text nodes, the first containing + whitespace from before its content and the second containing whitespace + from after its content. + """ match = re.search(r"^(\s*).*?(\s*)$", str(value), FLAGS) value.nodes = [Text(match.group(1)), Text(match.group(2))] def _select_theory(self, theories): + """Return the most likely spacing convention given different options. + + Given a dictionary of convention options as keys and their occurance as + values, return the convention that occurs the most, or ``None`` if + there is no clear preferred style. + """ if theories: - best = max(theories.values()) - confidence = float(best) / sum(theories.values()) + values = tuple(theories.values()) + best = max(values) + confidence = float(best) / sum(values) if confidence > 0.75: - keys = tuple(theories.keys()) - return keys[tuple(theories.values()).index(best)] + return tuple(theories.keys())[values.index(best)] - def _get_spacing_conventions(self): + def _get_spacing_conventions(self, use_names): + """Try to determine the whitespace conventions for parameters. + + This will examine the existing parameters and use + :py:meth:`_select_theory` to determine if there are any preferred + styles for how much whitespace to put before or after the value. + """ before_theories = defaultdict(lambda: 0) after_theories = defaultdict(lambda: 0) for param in self.params: - match = re.search(r"^(\s*).*?(\s*)$", str(param.value), FLAGS) + if use_names: + component = str(param.name) + else: + component = str(param.value) + match = re.search(r"^(\s*).*?(\s*)$", component, FLAGS) before, after = match.group(1), match.group(2) before_theories[before] += 1 after_theories[after] += 1 @@ -104,6 +134,7 @@ class Template(Node): return before, after def _remove_with_field(self, param, i, name): + """Return True if a parameter name should be kept, otherwise False.""" if param.showkey: following = self.params[i+1:] better_matches = [after.name.strip() == name and not after.showkey for after in following] @@ -112,6 +143,7 @@ class Template(Node): return True def _remove_without_field(self, param, i, force_no_field): + """Return False if a parameter name should be kept, otherwise True.""" if not param.showkey and not force_no_field: dependents = [not after.showkey for after in self.params[i+1:]] if any(dependents): @@ -120,10 +152,12 @@ class Template(Node): @property def name(self): + """The name of the template, as a ``Wikicode`` object.""" return self._name @property def params(self): + """The list of parameters contained within the template.""" return self._params @name.setter @@ -131,6 +165,13 @@ class Template(Node): self._name = parse_anything(value) def has_param(self, name, ignore_empty=True): + """Return ``True`` if any parameter in the template is named *name*. + + With *ignore_empty*, ``False`` will be returned even if the template + contains a parameter with the name *name*, if the parameter's value + is empty. Note that a template may have multiple parameters with the + same name. + """ name = name.strip() if isinstance(name, basestring) else str(name) for param in self.params: if param.name.strip() == name: @@ -140,6 +181,15 @@ class Template(Node): return False def get(self, name): + """Get the parameter whose name is *name*. + + The returned object is a + :py:class:`~mwparserfromhell.nodes.extras.parameter.Parameter` + instance. Raises :py:exc:`ValueError` if no parameter has this name. + Since multiple parameters can have the same name, we'll return the last + match, since the last parameter is the only one read by the MediaWiki + parser. + """ name = name.strip() if isinstance(name, basestring) else str(name) for param in reversed(self.params): if param.name.strip() == name: @@ -147,6 +197,20 @@ class Template(Node): raise ValueError(name) def add(self, name, value, showkey=None, force_nonconformity=False): + """Add a parameter to the template with a given *name* and *value. + + *name* and *value* can be anything parasable by + :py:func:`mwparserfromhell.utils.parse_anything`; pipes (and equal + signs, if appropriate) are automatically escaped from *value* where + applicable. If *showkey* is given, this will determine whether or not + to show the parameter's name (e.g., ``{{foo|bar}}``'s parameter has a + name of ``"1"`` but it is hidden); otherwise, we'll make a safe and + intelligent guess. If *name* is already a parameter, we'll replace its + value while keeping the same spacing rules unless *force_nonconformity* + is ``True``. We will also try to guess the dominant spacing convention + when adding a new parameter using :py:meth:`_get_spacing_conventions` + unless *force_nonconformity* is ``True``. + """ name, value = parse_anything(name), parse_anything(value) self._surface_escape(value, "|") @@ -182,19 +246,42 @@ class Template(Node): showkey = True if not showkey: self._surface_escape(value, "=") + if not force_nonconformity: - before, after = self._get_spacing_conventions() - if before and after: - value = parse_anything([before, value, after]) - elif before: - value = parse_anything([before, value]) - elif after: - value = parse_anything([value, after]) + before_n, after_n = self._get_spacing_conventions(use_names=True) + if before_n and after_n: + name = parse_anything([before_n, value, after_n]) + elif before_n: + name = parse_anything([before_n, value]) + elif after_n: + name = parse_anything([value, after_n]) + + before_v, after_v = self._get_spacing_conventions(use_names=False) + if before_v and after_v: + value = parse_anything([before_v, value, after_v]) + elif before_v: + value = parse_anything([before_v, value]) + elif after_v: + value = parse_anything([value, after_v]) + param = Parameter(name, value, showkey) self.params.append(param) return param def remove(self, name, keep_field=False, force_no_field=False): + """Remove a parameter from the template whose name is *name*. + + If *keep_field* is ``True``, we will keep the parameter's name, but + blank its value. Otherwise, we will remove the parameter completely + *unless* other parameters are dependent on it (e.g. removing ``bar`` + from ``{{foo|bar|baz}}`` is unsafe because ``{{foo|baz}}`` is not what + we expected, so ``{{foo||baz}}`` will be produced instead), unless + *force_no_field* is also ``True``. If the parameter shows up multiple + times in the template, we will remove all instances of it (and keep + one if *keep_field* is ``True`` - that being the first instance if + none of the instances have dependents, otherwise that instance will be + kept). + """ name = name.strip() if isinstance(name, basestring) else str(name) removed = False for i, param in enumerate(self.params): diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index 0213c74..783d8eb 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -28,16 +28,22 @@ from ..compat import str __all__ = ["Text"] class Text(Node): + """Represents ordinary, unformatted text with no special properties.""" def __init__(self, value): super(Text, self).__init__() self._value = value def __unicode__(self): - return str(self.value) + return self.value def __strip__(self, normalize, collapse): return self @property def value(self): + """The actual text itself.""" return self._value + + @value.setter + def value(self, newval): + self._value = str(newval) diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py index 4704e84..4e7bf8a 100644 --- a/mwparserfromhell/utils.py +++ b/mwparserfromhell/utils.py @@ -28,7 +28,7 @@ provide additional functionality. from __future__ import unicode_literals import mwparserfromhell -from .compat import basestring, bytes, str +from .compat import bytes, str from .nodes import Node from .smart_list import SmartList