Browse Source

add some functionality for English Wikipedia

pull/222/head
Victor Bulatov 4 years ago
parent
commit
b0002a0eca
3 changed files with 39 additions and 3 deletions
  1. +2
    -0
      mwparserfromhell/nodes/tag.py
  2. +14
    -3
      mwparserfromhell/nodes/template.py
  3. +23
    -0
      mwparserfromhell/nodes/wikilink.py

+ 2
- 0
mwparserfromhell/nodes/tag.py View File

@@ -92,6 +92,8 @@ class Tag(Node):

def __strip__(self, **kwargs):
if self.contents and is_visible(self.tag):
if self.tag == "ref":
return " " + self.contents.strip_code(**kwargs) + " "
return self.contents.strip_code(**kwargs)
return None



+ 14
- 3
mwparserfromhell/nodes/template.py View File

@@ -33,6 +33,18 @@ __all__ = ["Template"]

FLAGS = re.DOTALL | re.UNICODE

TEMPLATES = {
"Esp": lambda x: f"* 10^{x.params[0]}",
"smallcaps": lambda x: f"{x.params[0]}",
"Unicode": lambda x: f"{x.params[0]}",
"IPA": lambda x: f"{x.params[0]}",
"transl": lambda x: f"{x.params[-1]}",
"IAST": lambda x: f"{x.params[0]}",
"ssub": lambda x: f"{x.params[0]}",
"SubatomicParticle": lambda x: f"{x.params[0]}",
"convert": lambda x: f"{x.params[0]} {x.params[1]}",
}

class Template(Node):
"""Represents a template in wikicode, like ``{{foo}}``."""

@@ -59,9 +71,8 @@ class Template(Node):
yield param.value

def __strip__(self, **kwargs):
if kwargs.get("keep_template_params"):
parts = [param.value.strip_code(**kwargs) for param in self.params]
return " ".join(part for part in parts if part)
if str(self.name) in TEMPLATES:
return TEMPLATES[str(self.name)](self)
return None

def __showtree__(self, write, get, mark):


+ 23
- 0
mwparserfromhell/nodes/wikilink.py View File

@@ -28,6 +28,9 @@ from ..utils import parse_anything

__all__ = ["Wikilink"]

TECHNICAL = set(
"en ceb sv de fr nl ru it es pl war vi ja zh pt ar uk fa sr ca no id ko fi hu sh cs ro eu tr ms eo hy bg ce da he sk zh-min-nan kk min hr et lt be el sl gl azb az nn simple ur th hi ka uz la ta vo cy mk ast tg lv mg tt oc af bs ky sq tl bn zh-yue new te be-tarask br ml pms su nds lb jv ht mr sco szl sw ga ba pnb is my fy cv lmo an ne yo pa bar io gu als ku scn kn bpy ckb wuu ia arz qu mn bat-smg si wa cdo or yi am gd nap bug ilo mai hsb map-bms fo xmf mzn li vec sd eml sah os diq sa ps mrj mhr zh-classical hif nv roa-tara bcl ace hak frr pam nso km se rue mi vls nah bh nds-nl crh gan vep sc ab as bo glk myv co so tk fiu-vro lrc csb kv gv sn udm zea ay ie pcd nrm kab ug stq lez ha kw mwl gom haw gn rm lij lfn lad lo koi mt frp fur dsb dty ext ang ln olo cbk-zam dv bjn ksh gag pi pfl pag av bxr gor xal krc za pap kaa pdc tyv rw to kl nov jam arc kbp kbd tpi tet ig sat ki zu wo na jbo roa-rup lbe bi ty mdf kg lg tcy srn inh xh atj ltg chr sm pih om ak tn cu ts tw rmy bm st chy rn got tum ny ss ch pnt fj iu ady ve ee ks ik sg ff dz ti din cr ng cho kj mh ho ii aa mus hz kr shn hyw".split() + ["category", "file"])

class Wikilink(Node):
"""Represents an internal wikilink, like ``[[Foo|Bar]]``."""

@@ -47,6 +50,26 @@ class Wikilink(Node):
yield self.text

def __strip__(self, **kwargs):
interwiki_id = self.title.partition(":")[0].lower().strip()
if interwiki_id in TECHNICAL:
return ""
if interwiki_id == "image" and self.text:
NOT_CAPTION = [
"thumb", "frame", "border", "right", "left", "center", "none",
"baseline", "middle", "sub", "super", "text-top", "text-bottom", "top", "bottom",
"upright"
]

caption = []
for entry in self.text.split("|"):
if entry[-2:] == "px":
continue
if any(entry.startswith(prefix) for prefix in NOT_CAPTION):
continue
caption.append(entry)
caption = "|".join(caption)
return parse_anything(caption).strip_code(**kwargs)

if self.text is not None:
return self.text.strip_code(**kwargs)
return self.title.strip_code(**kwargs)


Loading…
Cancel
Save