A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

345 rivejä
15 KiB

  1. # Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
  2. #
  3. # Permission is hereby granted, free of charge, to any person obtaining a copy
  4. # of this software and associated documentation files (the "Software"), to deal
  5. # in the Software without restriction, including without limitation the rights
  6. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. # copies of the Software, and to permit persons to whom the Software is
  8. # furnished to do so, subject to the following conditions:
  9. #
  10. # The above copyright notice and this permission notice shall be included in
  11. # all copies or substantial portions of the Software.
  12. #
  13. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  19. # SOFTWARE.
  20. import unittest
  21. from mwparserfromhell.nodes import Tag, Template, Text
  22. from mwparserfromhell.nodes.extras import Attribute
  23. from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
  24. agen = lambda name, value: Attribute(wraptext(name), wraptext(value))
  25. agennv = lambda name: Attribute(wraptext(name))
  26. agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), None)
  27. agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, '"', a, b, c)
  28. agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c)
  29. class TestTag(TreeEqualityTestCase):
  30. """Test cases for the Tag node."""
  31. def test_str(self):
  32. """test Tag.__str__()"""
  33. node1 = Tag(wraptext("ref"))
  34. node2 = Tag(wraptext("span"), wraptext("foo"),
  35. [agen("style", "color: red;")])
  36. node3 = Tag(wraptext("ref"),
  37. attrs=[agennq("name", "foo"),
  38. agenpnv("some_attr", " ", "", "")],
  39. self_closing=True)
  40. node4 = Tag(wraptext("br"), self_closing=True, padding=" ")
  41. node5 = Tag(wraptext("br"), self_closing=True, implicit=True)
  42. node6 = Tag(wraptext("br"), self_closing=True, invalid=True,
  43. implicit=True)
  44. node7 = Tag(wraptext("br"), self_closing=True, invalid=True,
  45. padding=" ")
  46. node8 = Tag(wraptext("hr"), wiki_markup="----", self_closing=True)
  47. node9 = Tag(wraptext("i"), wraptext("italics!"), wiki_markup="''")
  48. self.assertEqual("<ref></ref>", str(node1))
  49. self.assertEqual('<span style="color: red;">foo</span>', str(node2))
  50. self.assertEqual("<ref name=foo some_attr/>", str(node3))
  51. self.assertEqual("<br />", str(node4))
  52. self.assertEqual("<br>", str(node5))
  53. self.assertEqual("</br>", str(node6))
  54. self.assertEqual("</br />", str(node7))
  55. self.assertEqual("----", str(node8))
  56. self.assertEqual("''italics!''", str(node9))
  57. def test_children(self):
  58. """test Tag.__children__()"""
  59. # <ref>foobar</ref>
  60. node1 = Tag(wraptext("ref"), wraptext("foobar"))
  61. # '''bold text'''
  62. node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''")
  63. # <img id="foo" class="bar" selected />
  64. node3 = Tag(wraptext("img"),
  65. attrs=[agen("id", "foo"), agen("class", "bar"),
  66. agennv("selected")],
  67. self_closing=True, padding=" ")
  68. gen1 = node1.__children__()
  69. gen2 = node2.__children__()
  70. gen3 = node3.__children__()
  71. self.assertEqual(node1.tag, next(gen1))
  72. self.assertEqual(node3.tag, next(gen3))
  73. self.assertEqual(node3.attributes[0].name, next(gen3))
  74. self.assertEqual(node3.attributes[0].value, next(gen3))
  75. self.assertEqual(node3.attributes[1].name, next(gen3))
  76. self.assertEqual(node3.attributes[1].value, next(gen3))
  77. self.assertEqual(node3.attributes[2].name, next(gen3))
  78. self.assertEqual(node1.contents, next(gen1))
  79. self.assertEqual(node2.contents, next(gen2))
  80. self.assertEqual(node1.closing_tag, next(gen1))
  81. self.assertRaises(StopIteration, next, gen1)
  82. self.assertRaises(StopIteration, next, gen2)
  83. self.assertRaises(StopIteration, next, gen3)
  84. def test_strip(self):
  85. """test Tag.__strip__()"""
  86. node1 = Tag(wraptext("i"), wraptext("foobar"))
  87. node2 = Tag(wraptext("math"), wraptext("foobar"))
  88. node3 = Tag(wraptext("br"), self_closing=True)
  89. self.assertEqual("foobar", node1.__strip__())
  90. self.assertEqual(None, node2.__strip__())
  91. self.assertEqual(None, node3.__strip__())
  92. def test_showtree(self):
  93. """test Tag.__showtree__()"""
  94. output = []
  95. getter, marker = object(), object()
  96. get = lambda code: output.append((getter, code))
  97. mark = lambda: output.append(marker)
  98. node1 = Tag(wraptext("ref"), wraptext("text"),
  99. [agen("name", "foo"), agennv("selected")])
  100. node2 = Tag(wraptext("br"), self_closing=True, padding=" ")
  101. node3 = Tag(wraptext("br"), self_closing=True, invalid=True,
  102. implicit=True, padding=" ")
  103. node1.__showtree__(output.append, get, mark)
  104. node2.__showtree__(output.append, get, mark)
  105. node3.__showtree__(output.append, get, mark)
  106. valid = [
  107. "<", (getter, node1.tag), (getter, node1.attributes[0].name),
  108. " = ", marker, (getter, node1.attributes[0].value),
  109. (getter, node1.attributes[1].name), ">", (getter, node1.contents),
  110. "</", (getter, node1.closing_tag), ">", "<", (getter, node2.tag),
  111. "/>", "</", (getter, node3.tag), ">"]
  112. self.assertEqual(valid, output)
  113. def test_tag(self):
  114. """test getter/setter for the tag attribute"""
  115. tag = wraptext("ref")
  116. node = Tag(tag, wraptext("text"))
  117. self.assertIs(tag, node.tag)
  118. self.assertIs(tag, node.closing_tag)
  119. node.tag = "span"
  120. self.assertWikicodeEqual(wraptext("span"), node.tag)
  121. self.assertWikicodeEqual(wraptext("span"), node.closing_tag)
  122. self.assertEqual("<span>text</span>", node)
  123. def test_contents(self):
  124. """test getter/setter for the contents attribute"""
  125. contents = wraptext("text")
  126. node = Tag(wraptext("ref"), contents)
  127. self.assertIs(contents, node.contents)
  128. node.contents = "text and a {{template}}"
  129. parsed = wrap([Text("text and a "), Template(wraptext("template"))])
  130. self.assertWikicodeEqual(parsed, node.contents)
  131. self.assertEqual("<ref>text and a {{template}}</ref>", node)
  132. def test_attributes(self):
  133. """test getter for the attributes attribute"""
  134. attrs = [agen("name", "bar")]
  135. node1 = Tag(wraptext("ref"), wraptext("foo"))
  136. node2 = Tag(wraptext("ref"), wraptext("foo"), attrs)
  137. self.assertEqual([], node1.attributes)
  138. self.assertIs(attrs, node2.attributes)
  139. def test_wiki_markup(self):
  140. """test getter/setter for the wiki_markup attribute"""
  141. node = Tag(wraptext("i"), wraptext("italic text"))
  142. self.assertIs(None, node.wiki_markup)
  143. node.wiki_markup = "''"
  144. self.assertEqual("''", node.wiki_markup)
  145. self.assertEqual("''italic text''", node)
  146. node.wiki_markup = False
  147. self.assertFalse(node.wiki_markup)
  148. self.assertEqual("<i>italic text</i>", node)
  149. def test_self_closing(self):
  150. """test getter/setter for the self_closing attribute"""
  151. node = Tag(wraptext("ref"), wraptext("foobar"))
  152. self.assertFalse(node.self_closing)
  153. node.self_closing = True
  154. self.assertTrue(node.self_closing)
  155. self.assertEqual("<ref/>", node)
  156. node.self_closing = 0
  157. self.assertFalse(node.self_closing)
  158. self.assertEqual("<ref>foobar</ref>", node)
  159. def test_invalid(self):
  160. """test getter/setter for the invalid attribute"""
  161. node = Tag(wraptext("br"), self_closing=True, implicit=True)
  162. self.assertFalse(node.invalid)
  163. node.invalid = True
  164. self.assertTrue(node.invalid)
  165. self.assertEqual("</br>", node)
  166. node.invalid = 0
  167. self.assertFalse(node.invalid)
  168. self.assertEqual("<br>", node)
  169. def test_implicit(self):
  170. """test getter/setter for the implicit attribute"""
  171. node = Tag(wraptext("br"), self_closing=True)
  172. self.assertFalse(node.implicit)
  173. node.implicit = True
  174. self.assertTrue(node.implicit)
  175. self.assertEqual("<br>", node)
  176. node.implicit = 0
  177. self.assertFalse(node.implicit)
  178. self.assertEqual("<br/>", node)
  179. def test_padding(self):
  180. """test getter/setter for the padding attribute"""
  181. node = Tag(wraptext("ref"), wraptext("foobar"))
  182. self.assertEqual("", node.padding)
  183. node.padding = " "
  184. self.assertEqual(" ", node.padding)
  185. self.assertEqual("<ref >foobar</ref>", node)
  186. node.padding = None
  187. self.assertEqual("", node.padding)
  188. self.assertEqual("<ref>foobar</ref>", node)
  189. self.assertRaises(ValueError, setattr, node, "padding", True)
  190. def test_closing_tag(self):
  191. """test getter/setter for the closing_tag attribute"""
  192. tag = wraptext("ref")
  193. node = Tag(tag, wraptext("foobar"))
  194. self.assertIs(tag, node.closing_tag)
  195. node.closing_tag = "ref {{ignore me}}"
  196. parsed = wrap([Text("ref "), Template(wraptext("ignore me"))])
  197. self.assertWikicodeEqual(parsed, node.closing_tag)
  198. self.assertEqual("<ref>foobar</ref {{ignore me}}>", node)
  199. def test_wiki_style_separator(self):
  200. """test getter/setter for wiki_style_separator attribute"""
  201. node = Tag(wraptext("table"), wraptext("\n"))
  202. self.assertIs(None, node.wiki_style_separator)
  203. node.wiki_style_separator = "|"
  204. self.assertEqual("|", node.wiki_style_separator)
  205. node.wiki_markup = "{"
  206. self.assertEqual("{|\n{", node)
  207. node2 = Tag(wraptext("table"), wraptext("\n"), wiki_style_separator="|")
  208. self.assertEqual("|", node2.wiki_style_separator)
  209. def test_closing_wiki_markup(self):
  210. """test getter/setter for closing_wiki_markup attribute"""
  211. node = Tag(wraptext("table"), wraptext("\n"))
  212. self.assertIs(None, node.closing_wiki_markup)
  213. node.wiki_markup = "{|"
  214. self.assertEqual("{|", node.closing_wiki_markup)
  215. node.closing_wiki_markup = "|}"
  216. self.assertEqual("|}", node.closing_wiki_markup)
  217. self.assertEqual("{|\n|}", node)
  218. node.wiki_markup = "!!"
  219. self.assertEqual("|}", node.closing_wiki_markup)
  220. self.assertEqual("!!\n|}", node)
  221. node.wiki_markup = False
  222. self.assertFalse(node.closing_wiki_markup)
  223. self.assertEqual("<table>\n</table>", node)
  224. node2 = Tag(wraptext("table"), wraptext("\n"),
  225. attrs=[agen("id", "foo")], wiki_markup="{|",
  226. closing_wiki_markup="|}")
  227. self.assertEqual("|}", node2.closing_wiki_markup)
  228. self.assertEqual('{| id="foo"\n|}', node2)
  229. def test_has(self):
  230. """test Tag.has()"""
  231. node = Tag(wraptext("ref"), wraptext("cite"), [agen("name", "foo")])
  232. self.assertTrue(node.has("name"))
  233. self.assertTrue(node.has(" name "))
  234. self.assertTrue(node.has(wraptext("name")))
  235. self.assertFalse(node.has("Name"))
  236. self.assertFalse(node.has("foo"))
  237. attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"),
  238. agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")]
  239. node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True)
  240. self.assertTrue(node2.has("id"))
  241. self.assertTrue(node2.has("class"))
  242. self.assertTrue(node2.has(attrs[1].pad_first + str(attrs[1].name) +
  243. attrs[1].pad_before_eq))
  244. self.assertTrue(node2.has(attrs[3]))
  245. self.assertTrue(node2.has(str(attrs[3])))
  246. self.assertFalse(node2.has("idclass"))
  247. self.assertFalse(node2.has("id class"))
  248. self.assertFalse(node2.has("id=foo"))
  249. def test_get(self):
  250. """test Tag.get()"""
  251. attrs = [agen("name", "foo")]
  252. node = Tag(wraptext("ref"), wraptext("cite"), attrs)
  253. self.assertIs(attrs[0], node.get("name"))
  254. self.assertIs(attrs[0], node.get(" name "))
  255. self.assertIs(attrs[0], node.get(wraptext("name")))
  256. self.assertRaises(ValueError, node.get, "Name")
  257. self.assertRaises(ValueError, node.get, "foo")
  258. attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"),
  259. agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")]
  260. node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True)
  261. self.assertIs(attrs[0], node2.get("id"))
  262. self.assertIs(attrs[1], node2.get("class"))
  263. self.assertIs(attrs[1], node2.get(
  264. attrs[1].pad_first + str(attrs[1].name) + attrs[1].pad_before_eq))
  265. self.assertIs(attrs[3], node2.get(attrs[3]))
  266. self.assertIs(attrs[3], node2.get(str(attrs[3])))
  267. self.assertIs(attrs[3], node2.get(" foo"))
  268. self.assertRaises(ValueError, node2.get, "idclass")
  269. self.assertRaises(ValueError, node2.get, "id class")
  270. self.assertRaises(ValueError, node2.get, "id=foo")
  271. def test_add(self):
  272. """test Tag.add()"""
  273. node = Tag(wraptext("ref"), wraptext("cite"))
  274. node.add("name", "value")
  275. node.add("name", "value", quotes=None)
  276. node.add("name", "value", quotes="'")
  277. node.add("name")
  278. node.add(1, False)
  279. node.add("style", "{{foobar}}")
  280. node.add("name", "value", '"', "\n", " ", " ")
  281. attr1 = ' name="value"'
  282. attr2 = " name=value"
  283. attr3 = " name='value'"
  284. attr4 = " name"
  285. attr5 = ' 1="False"'
  286. attr6 = ' style="{{foobar}}"'
  287. attr7 = '\nname = "value"'
  288. self.assertEqual(attr1, node.attributes[0])
  289. self.assertEqual(attr2, node.attributes[1])
  290. self.assertEqual(attr3, node.attributes[2])
  291. self.assertEqual(attr4, node.attributes[3])
  292. self.assertEqual(attr5, node.attributes[4])
  293. self.assertEqual(attr6, node.attributes[5])
  294. self.assertEqual(attr7, node.attributes[6])
  295. self.assertEqual(attr7, node.get("name"))
  296. self.assertWikicodeEqual(wrap([Template(wraptext("foobar"))]),
  297. node.attributes[5].value)
  298. self.assertEqual("".join(("<ref", attr1, attr2, attr3, attr4, attr5,
  299. attr6, attr7, ">cite</ref>")), node)
  300. self.assertRaises(ValueError, node.add, "name", "foo", quotes="bar")
  301. self.assertRaises(ValueError, node.add, "name", "a bc d", quotes=None)
  302. def test_remove(self):
  303. """test Tag.remove()"""
  304. attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"),
  305. agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")]
  306. node = Tag(wraptext("div"), attrs=attrs, self_closing=True)
  307. node.remove("class")
  308. self.assertEqual('<div id="foo" foo="bar" foo \n />', node)
  309. node.remove("foo")
  310. self.assertEqual('<div id="foo"/>', node)
  311. self.assertRaises(ValueError, node.remove, "foo")
  312. node.remove("id")
  313. self.assertEqual('<div/>', node)
  314. if __name__ == "__main__":
  315. unittest.main(verbosity=2)