A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

355 lines
15 KiB

  1. #
  2. # Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
  3. #
  4. # Permission is hereby granted, free of charge, to any person obtaining a copy
  5. # of this software and associated documentation files (the "Software"), to deal
  6. # in the Software without restriction, including without limitation the rights
  7. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. # copies of the Software, and to permit persons to whom the Software is
  9. # furnished to do so, subject to the following conditions:
  10. #
  11. # The above copyright notice and this permission notice shall be included in
  12. # all copies or substantial portions of the Software.
  13. #
  14. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20. # SOFTWARE.
  21. import pytest
  22. from mwparserfromhell.nodes import Tag, Template, Text
  23. from mwparserfromhell.nodes.extras import Attribute
  24. from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
  25. agen = lambda name, value: Attribute(wraptext(name), wraptext(value))
  26. agennv = lambda name: Attribute(wraptext(name))
  27. agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), None)
  28. agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, '"', a, b, c)
  29. agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c)
  30. class TestTag(TreeEqualityTestCase):
  31. """Test cases for the Tag node."""
  32. def test_unicode(self):
  33. """test Tag.__unicode__()"""
  34. node1 = Tag(wraptext("ref"))
  35. node2 = Tag(wraptext("span"), wraptext("foo"),
  36. [agen("style", "color: red;")])
  37. node3 = Tag(wraptext("ref"),
  38. attrs=[agennq("name", "foo"),
  39. agenpnv("some_attr", " ", "", "")],
  40. self_closing=True)
  41. node4 = Tag(wraptext("br"), self_closing=True, padding=" ")
  42. node5 = Tag(wraptext("br"), self_closing=True, implicit=True)
  43. node6 = Tag(wraptext("br"), self_closing=True, invalid=True,
  44. implicit=True)
  45. node7 = Tag(wraptext("br"), self_closing=True, invalid=True,
  46. padding=" ")
  47. node8 = Tag(wraptext("hr"), wiki_markup="----", self_closing=True)
  48. node9 = Tag(wraptext("i"), wraptext("italics!"), wiki_markup="''")
  49. assert "<ref></ref>" == str(node1)
  50. assert '<span style="color: red;">foo</span>' == str(node2)
  51. assert "<ref name=foo some_attr/>" == str(node3)
  52. assert "<br />" == str(node4)
  53. assert "<br>" == str(node5)
  54. assert "</br>" == str(node6)
  55. assert "</br />" == str(node7)
  56. assert "----" == str(node8)
  57. assert "''italics!''" == str(node9)
  58. def test_children(self):
  59. """test Tag.__children__()"""
  60. # <ref>foobar</ref>
  61. node1 = Tag(wraptext("ref"), wraptext("foobar"))
  62. # '''bold text'''
  63. node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''")
  64. # <img id="foo" class="bar" selected />
  65. node3 = Tag(wraptext("img"),
  66. attrs=[agen("id", "foo"), agen("class", "bar"),
  67. agennv("selected")],
  68. self_closing=True, padding=" ")
  69. gen1 = node1.__children__()
  70. gen2 = node2.__children__()
  71. gen3 = node3.__children__()
  72. assert node1.tag == next(gen1)
  73. assert node3.tag == next(gen3)
  74. assert node3.attributes[0].name == next(gen3)
  75. assert node3.attributes[0].value == next(gen3)
  76. assert node3.attributes[1].name == next(gen3)
  77. assert node3.attributes[1].value == next(gen3)
  78. assert node3.attributes[2].name == next(gen3)
  79. assert node1.contents == next(gen1)
  80. assert node2.contents == next(gen2)
  81. assert node1.closing_tag == next(gen1)
  82. with pytest.raises(StopIteration):
  83. next(gen1)
  84. with pytest.raises(StopIteration):
  85. next(gen2)
  86. with pytest.raises(StopIteration):
  87. next(gen3)
  88. def test_strip(self):
  89. """test Tag.__strip__()"""
  90. node1 = Tag(wraptext("i"), wraptext("foobar"))
  91. node2 = Tag(wraptext("math"), wraptext("foobar"))
  92. node3 = Tag(wraptext("br"), self_closing=True)
  93. assert "foobar" == node1.__strip__()
  94. assert None == node2.__strip__()
  95. assert None == node3.__strip__()
  96. def test_showtree(self):
  97. """test Tag.__showtree__()"""
  98. output = []
  99. getter, marker = object(), object()
  100. get = lambda code: output.append((getter, code))
  101. mark = lambda: output.append(marker)
  102. node1 = Tag(wraptext("ref"), wraptext("text"),
  103. [agen("name", "foo"), agennv("selected")])
  104. node2 = Tag(wraptext("br"), self_closing=True, padding=" ")
  105. node3 = Tag(wraptext("br"), self_closing=True, invalid=True,
  106. implicit=True, padding=" ")
  107. node1.__showtree__(output.append, get, mark)
  108. node2.__showtree__(output.append, get, mark)
  109. node3.__showtree__(output.append, get, mark)
  110. valid = [
  111. "<", (getter, node1.tag), (getter, node1.attributes[0].name),
  112. " = ", marker, (getter, node1.attributes[0].value),
  113. (getter, node1.attributes[1].name), ">", (getter, node1.contents),
  114. "</", (getter, node1.closing_tag), ">", "<", (getter, node2.tag),
  115. "/>", "</", (getter, node3.tag), ">"]
  116. assert valid == output
  117. def test_tag(self):
  118. """test getter/setter for the tag attribute"""
  119. tag = wraptext("ref")
  120. node = Tag(tag, wraptext("text"))
  121. assert tag is node.tag
  122. assert tag is node.closing_tag
  123. node.tag = "span"
  124. self.assertWikicodeEqual(wraptext("span"), node.tag)
  125. self.assertWikicodeEqual(wraptext("span"), node.closing_tag)
  126. assert "<span>text</span>" == node
  127. def test_contents(self):
  128. """test getter/setter for the contents attribute"""
  129. contents = wraptext("text")
  130. node = Tag(wraptext("ref"), contents)
  131. assert contents is node.contents
  132. node.contents = "text and a {{template}}"
  133. parsed = wrap([Text("text and a "), Template(wraptext("template"))])
  134. self.assertWikicodeEqual(parsed, node.contents)
  135. assert "<ref>text and a {{template}}</ref>" == node
  136. def test_attributes(self):
  137. """test getter for the attributes attribute"""
  138. attrs = [agen("name", "bar")]
  139. node1 = Tag(wraptext("ref"), wraptext("foo"))
  140. node2 = Tag(wraptext("ref"), wraptext("foo"), attrs)
  141. assert [] == node1.attributes
  142. assert attrs is node2.attributes
  143. def test_wiki_markup(self):
  144. """test getter/setter for the wiki_markup attribute"""
  145. node = Tag(wraptext("i"), wraptext("italic text"))
  146. assert None is node.wiki_markup
  147. node.wiki_markup = "''"
  148. assert "''" == node.wiki_markup
  149. assert "''italic text''" == node
  150. node.wiki_markup = False
  151. assert node.wiki_markup is None
  152. assert "<i>italic text</i>" == node
  153. def test_self_closing(self):
  154. """test getter/setter for the self_closing attribute"""
  155. node = Tag(wraptext("ref"), wraptext("foobar"))
  156. assert node.self_closing is False
  157. node.self_closing = True
  158. assert node.self_closing is True
  159. assert "<ref/>" == node
  160. node.self_closing = 0
  161. assert node.self_closing is False
  162. assert "<ref>foobar</ref>" == node
  163. def test_invalid(self):
  164. """test getter/setter for the invalid attribute"""
  165. node = Tag(wraptext("br"), self_closing=True, implicit=True)
  166. assert node.invalid is False
  167. node.invalid = True
  168. assert node.invalid is True
  169. assert "</br>" == node
  170. node.invalid = 0
  171. assert node.invalid is False
  172. assert "<br>" == node
  173. def test_implicit(self):
  174. """test getter/setter for the implicit attribute"""
  175. node = Tag(wraptext("br"), self_closing=True)
  176. assert node.implicit is False
  177. node.implicit = True
  178. assert node.implicit is True
  179. assert "<br>" == node
  180. node.implicit = 0
  181. assert node.implicit is False
  182. assert "<br/>" == node
  183. def test_padding(self):
  184. """test getter/setter for the padding attribute"""
  185. node = Tag(wraptext("ref"), wraptext("foobar"))
  186. assert "" == node.padding
  187. node.padding = " "
  188. assert " " == node.padding
  189. assert "<ref >foobar</ref>" == node
  190. node.padding = None
  191. assert "" == node.padding
  192. assert "<ref>foobar</ref>" == node
  193. with pytest.raises(ValueError):
  194. node.__setattr__("padding", True)
  195. def test_closing_tag(self):
  196. """test getter/setter for the closing_tag attribute"""
  197. tag = wraptext("ref")
  198. node = Tag(tag, wraptext("foobar"))
  199. assert tag is node.closing_tag
  200. node.closing_tag = "ref {{ignore me}}"
  201. parsed = wrap([Text("ref "), Template(wraptext("ignore me"))])
  202. self.assertWikicodeEqual(parsed, node.closing_tag)
  203. assert "<ref>foobar</ref {{ignore me}}>" == node
  204. def test_wiki_style_separator(self):
  205. """test getter/setter for wiki_style_separator attribute"""
  206. node = Tag(wraptext("table"), wraptext("\n"))
  207. assert None is node.wiki_style_separator
  208. node.wiki_style_separator = "|"
  209. assert "|" == node.wiki_style_separator
  210. node.wiki_markup = "{"
  211. assert "{|\n{" == node
  212. node2 = Tag(wraptext("table"), wraptext("\n"), wiki_style_separator="|")
  213. assert "|" == node.wiki_style_separator
  214. def test_closing_wiki_markup(self):
  215. """test getter/setter for closing_wiki_markup attribute"""
  216. node = Tag(wraptext("table"), wraptext("\n"))
  217. assert None is node.closing_wiki_markup
  218. node.wiki_markup = "{|"
  219. assert "{|" == node.closing_wiki_markup
  220. node.closing_wiki_markup = "|}"
  221. assert "|}" == node.closing_wiki_markup
  222. assert "{|\n|}" == node
  223. node.wiki_markup = "!!"
  224. assert "|}" == node.closing_wiki_markup
  225. assert "!!\n|}" == node
  226. node.wiki_markup = False
  227. assert node.closing_wiki_markup is None
  228. assert "<table>\n</table>" == node
  229. node2 = Tag(wraptext("table"), wraptext("\n"),
  230. attrs=[agen("id", "foo")], wiki_markup="{|",
  231. closing_wiki_markup="|}")
  232. assert "|}" == node2.closing_wiki_markup
  233. assert '{| id="foo"\n|}' == node2
  234. def test_has(self):
  235. """test Tag.has()"""
  236. node = Tag(wraptext("ref"), wraptext("cite"), [agen("name", "foo")])
  237. assert node.has("name") is True
  238. assert node.has(" name ") is True
  239. assert node.has(wraptext("name")) is True
  240. assert node.has("Name") is False
  241. assert node.has("foo") is False
  242. attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"),
  243. agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")]
  244. node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True)
  245. assert node2.has("id") is True
  246. assert node2.has("class") is True
  247. assert node2.has(attrs[1].pad_first + str(attrs[1].name) +
  248. attrs[1].pad_before_eq) is True
  249. assert node2.has(attrs[3]) is True
  250. assert node2.has(str(attrs[3])) is True
  251. assert node2.has("idclass") is False
  252. assert node2.has("id class") is False
  253. assert node2.has("id=foo") is False
  254. def test_get(self):
  255. """test Tag.get()"""
  256. attrs = [agen("name", "foo")]
  257. node = Tag(wraptext("ref"), wraptext("cite"), attrs)
  258. assert attrs[0] is node.get("name")
  259. assert attrs[0] is node.get(" name ")
  260. assert attrs[0] is node.get(wraptext("name"))
  261. with pytest.raises(ValueError):
  262. node.get("Name")
  263. with pytest.raises(ValueError):
  264. node.get("foo")
  265. attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"),
  266. agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")]
  267. node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True)
  268. assert attrs[0] is node2.get("id")
  269. assert attrs[1] is node2.get("class")
  270. assert attrs[1] is node2.get(
  271. attrs[1].pad_first + str(attrs[1].name) + attrs[1].pad_before_eq)
  272. assert attrs[3] is node2.get(attrs[3])
  273. assert attrs[3] is node2.get(str(attrs[3]))
  274. assert attrs[3] is node2.get(" foo")
  275. with pytest.raises(ValueError):
  276. node2.get("idclass")
  277. with pytest.raises(ValueError):
  278. node2.get("id class")
  279. with pytest.raises(ValueError):
  280. node2.get("id=foo")
  281. def test_add(self):
  282. """test Tag.add()"""
  283. node = Tag(wraptext("ref"), wraptext("cite"))
  284. node.add("name", "value")
  285. node.add("name", "value", quotes=None)
  286. node.add("name", "value", quotes="'")
  287. node.add("name")
  288. node.add(1, False)
  289. node.add("style", "{{foobar}}")
  290. node.add("name", "value", '"', "\n", " ", " ")
  291. attr1 = ' name="value"'
  292. attr2 = " name=value"
  293. attr3 = " name='value'"
  294. attr4 = " name"
  295. attr5 = ' 1="False"'
  296. attr6 = ' style="{{foobar}}"'
  297. attr7 = '\nname = "value"'
  298. assert attr1 == node.attributes[0]
  299. assert attr2 == node.attributes[1]
  300. assert attr3 == node.attributes[2]
  301. assert attr4 == node.attributes[3]
  302. assert attr5 == node.attributes[4]
  303. assert attr6 == node.attributes[5]
  304. assert attr7 == node.attributes[6]
  305. assert attr7 == node.get("name")
  306. self.assertWikicodeEqual(wrap([Template(wraptext("foobar"))]),
  307. node.attributes[5].value)
  308. assert "".join(("<ref", attr1, attr2, attr3, attr4, attr5,
  309. attr6, attr7, ">cite</ref>")) == node
  310. with pytest.raises(ValueError):
  311. node.add("name", "foo", quotes="bar")
  312. with pytest.raises(ValueError):
  313. node.add("name", "a bc d", quotes=None)
  314. def test_remove(self):
  315. """test Tag.remove()"""
  316. attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"),
  317. agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")]
  318. node = Tag(wraptext("div"), attrs=attrs, self_closing=True)
  319. node.remove("class")
  320. assert '<div id="foo" foo="bar" foo \n />' == node
  321. node.remove("foo")
  322. assert '<div id="foo"/>' == node
  323. with pytest.raises(ValueError):
  324. node.remove("foo")
  325. node.remove("id")
  326. assert '<div/>' == node