Procházet zdrojové kódy

Document all of Template, plus some other fixes.

tags/v0.1
Ben Kurtovic před 12 roky
rodič
revize
b0418189d6
6 změnil soubory, kde provedl 112 přidání a 17 odebrání
  1. +2
    -2
      mwparserfromhell/__init__.py
  2. +1
    -0
      mwparserfromhell/nodes/heading.py
  3. +1
    -0
      mwparserfromhell/nodes/html_entity.py
  4. +100
    -13
      mwparserfromhell/nodes/template.py
  5. +7
    -1
      mwparserfromhell/nodes/text.py
  6. +1
    -1
      mwparserfromhell/utils.py

+ 2
- 2
mwparserfromhell/__init__.py Zobrazit soubor

@@ -20,14 +20,14 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import unicode_literals

"""
`mwparserfromhell <https://github.com/earwig/mwparserfromhell>`_ (the MediaWiki
Parser from Hell) is a Python package that provides an easy-to-use and
outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode.
"""

from __future__ import unicode_literals

__author__ = "Ben Kurtovic"
__copyright__ = "Copyright (C) 2012 Ben Kurtovic"
__license__ = "MIT License"


+ 1
- 0
mwparserfromhell/nodes/heading.py Zobrazit soubor

@@ -29,6 +29,7 @@ __all__ = ["Heading"]

class Heading(Node):
"""Represents a section heading in wikicode, like ``== Foo ==``."""

def __init__(self, title, level):
super(Heading, self).__init__()
self._title = title


+ 1
- 0
mwparserfromhell/nodes/html_entity.py Zobrazit soubor

@@ -29,6 +29,7 @@ __all__ = ["HTMLEntity"]

class HTMLEntity(Node):
"""Represents an HTML entity, like ``&nbsp;``, either named or unnamed."""

def __init__(self, value, named=None, hexadecimal=False, hex_char="x"):
super(HTMLEntity, self).__init__()
self._value = value


+ 100
- 13
mwparserfromhell/nodes/template.py Zobrazit soubor

@@ -34,6 +34,8 @@ __all__ = ["Template"]
FLAGS = re.DOTALL | re.UNICODE

class Template(Node):
"""Represents a template in wikicode, like ``{{foo}}``."""

def __init__(self, name, params=None):
super(Template, self).__init__()
self._name = name
@@ -73,28 +75,56 @@ class Template(Node):
write("}}")

def _surface_escape(self, code, char):
"""Return *code* with *char* escaped as an HTML entity.

The main use of this is to escape pipes (``|``) or equal signs (``=``)
in parameter names or values so they are not mistaken for new
parameters.
"""
replacement = HTMLEntity(value=ord(char))
for node in code.filter_text(recursive=False):
if char in node:
code.replace(node, node.replace(char, replacement))

def _blank_param_value(self, value):
"""Remove the content from *value* while keeping its whitespace.

Replace *value*\ 's nodes with two text nodes, the first containing
whitespace from before its content and the second containing whitespace
from after its content.
"""
match = re.search(r"^(\s*).*?(\s*)$", str(value), FLAGS)
value.nodes = [Text(match.group(1)), Text(match.group(2))]

def _select_theory(self, theories):
"""Return the most likely spacing convention given different options.

Given a dictionary of convention options as keys and their occurance as
values, return the convention that occurs the most, or ``None`` if
there is no clear preferred style.
"""
if theories:
best = max(theories.values())
confidence = float(best) / sum(theories.values())
values = tuple(theories.values())
best = max(values)
confidence = float(best) / sum(values)
if confidence > 0.75:
keys = tuple(theories.keys())
return keys[tuple(theories.values()).index(best)]
return tuple(theories.keys())[values.index(best)]

def _get_spacing_conventions(self):
def _get_spacing_conventions(self, use_names):
"""Try to determine the whitespace conventions for parameters.

This will examine the existing parameters and use
:py:meth:`_select_theory` to determine if there are any preferred
styles for how much whitespace to put before or after the value.
"""
before_theories = defaultdict(lambda: 0)
after_theories = defaultdict(lambda: 0)
for param in self.params:
match = re.search(r"^(\s*).*?(\s*)$", str(param.value), FLAGS)
if use_names:
component = str(param.name)
else:
component = str(param.value)
match = re.search(r"^(\s*).*?(\s*)$", component, FLAGS)
before, after = match.group(1), match.group(2)
before_theories[before] += 1
after_theories[after] += 1
@@ -104,6 +134,7 @@ class Template(Node):
return before, after

def _remove_with_field(self, param, i, name):
"""Return True if a parameter name should be kept, otherwise False."""
if param.showkey:
following = self.params[i+1:]
better_matches = [after.name.strip() == name and not after.showkey for after in following]
@@ -112,6 +143,7 @@ class Template(Node):
return True

def _remove_without_field(self, param, i, force_no_field):
"""Return False if a parameter name should be kept, otherwise True."""
if not param.showkey and not force_no_field:
dependents = [not after.showkey for after in self.params[i+1:]]
if any(dependents):
@@ -120,10 +152,12 @@ class Template(Node):

@property
def name(self):
"""The name of the template, as a ``Wikicode`` object."""
return self._name

@property
def params(self):
"""The list of parameters contained within the template."""
return self._params

@name.setter
@@ -131,6 +165,13 @@ class Template(Node):
self._name = parse_anything(value)

def has_param(self, name, ignore_empty=True):
"""Return ``True`` if any parameter in the template is named *name*.

With *ignore_empty*, ``False`` will be returned even if the template
contains a parameter with the name *name*, if the parameter's value
is empty. Note that a template may have multiple parameters with the
same name.
"""
name = name.strip() if isinstance(name, basestring) else str(name)
for param in self.params:
if param.name.strip() == name:
@@ -140,6 +181,15 @@ class Template(Node):
return False

def get(self, name):
"""Get the parameter whose name is *name*.

The returned object is a
:py:class:`~mwparserfromhell.nodes.extras.parameter.Parameter`
instance. Raises :py:exc:`ValueError` if no parameter has this name.
Since multiple parameters can have the same name, we'll return the last
match, since the last parameter is the only one read by the MediaWiki
parser.
"""
name = name.strip() if isinstance(name, basestring) else str(name)
for param in reversed(self.params):
if param.name.strip() == name:
@@ -147,6 +197,20 @@ class Template(Node):
raise ValueError(name)

def add(self, name, value, showkey=None, force_nonconformity=False):
"""Add a parameter to the template with a given *name* and *value.

*name* and *value* can be anything parasable by
:py:func:`mwparserfromhell.utils.parse_anything`; pipes (and equal
signs, if appropriate) are automatically escaped from *value* where
applicable. If *showkey* is given, this will determine whether or not
to show the parameter's name (e.g., ``{{foo|bar}}``'s parameter has a
name of ``"1"`` but it is hidden); otherwise, we'll make a safe and
intelligent guess. If *name* is already a parameter, we'll replace its
value while keeping the same spacing rules unless *force_nonconformity*
is ``True``. We will also try to guess the dominant spacing convention
when adding a new parameter using :py:meth:`_get_spacing_conventions`
unless *force_nonconformity* is ``True``.
"""
name, value = parse_anything(name), parse_anything(value)
self._surface_escape(value, "|")

@@ -182,19 +246,42 @@ class Template(Node):
showkey = True
if not showkey:
self._surface_escape(value, "=")

if not force_nonconformity:
before, after = self._get_spacing_conventions()
if before and after:
value = parse_anything([before, value, after])
elif before:
value = parse_anything([before, value])
elif after:
value = parse_anything([value, after])
before_n, after_n = self._get_spacing_conventions(use_names=True)
if before_n and after_n:
name = parse_anything([before_n, value, after_n])
elif before_n:
name = parse_anything([before_n, value])
elif after_n:
name = parse_anything([value, after_n])

before_v, after_v = self._get_spacing_conventions(use_names=False)
if before_v and after_v:
value = parse_anything([before_v, value, after_v])
elif before_v:
value = parse_anything([before_v, value])
elif after_v:
value = parse_anything([value, after_v])

param = Parameter(name, value, showkey)
self.params.append(param)
return param

def remove(self, name, keep_field=False, force_no_field=False):
"""Remove a parameter from the template whose name is *name*.

If *keep_field* is ``True``, we will keep the parameter's name, but
blank its value. Otherwise, we will remove the parameter completely
*unless* other parameters are dependent on it (e.g. removing ``bar``
from ``{{foo|bar|baz}}`` is unsafe because ``{{foo|baz}}`` is not what
we expected, so ``{{foo||baz}}`` will be produced instead), unless
*force_no_field* is also ``True``. If the parameter shows up multiple
times in the template, we will remove all instances of it (and keep
one if *keep_field* is ``True`` - that being the first instance if
none of the instances have dependents, otherwise that instance will be
kept).
"""
name = name.strip() if isinstance(name, basestring) else str(name)
removed = False
for i, param in enumerate(self.params):


+ 7
- 1
mwparserfromhell/nodes/text.py Zobrazit soubor

@@ -28,16 +28,22 @@ from ..compat import str
__all__ = ["Text"]

class Text(Node):
"""Represents ordinary, unformatted text with no special properties."""
def __init__(self, value):
super(Text, self).__init__()
self._value = value

def __unicode__(self):
return str(self.value)
return self.value

def __strip__(self, normalize, collapse):
return self

@property
def value(self):
"""The actual text itself."""
return self._value

@value.setter
def value(self, newval):
self._value = str(newval)

+ 1
- 1
mwparserfromhell/utils.py Zobrazit soubor

@@ -28,7 +28,7 @@ provide additional functionality.
from __future__ import unicode_literals

import mwparserfromhell
from .compat import basestring, bytes, str
from .compat import bytes, str
from .nodes import Node
from .smart_list import SmartList



Načítá se…
Zrušit
Uložit