A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

313 line
14 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import unicode_literals
  23. try:
  24. import unittest2 as unittest
  25. except ImportError:
  26. import unittest
  27. from mwparserfromhell.compat import str
  28. from mwparserfromhell.nodes import Tag, Template, Text
  29. from mwparserfromhell.nodes.extras import Attribute
  30. from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
  31. agen = lambda name, value: Attribute(wraptext(name), wraptext(value))
  32. agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), False)
  33. agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, True, a, b, c)
  34. agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, True, a, b, c)
  35. class TestTag(TreeEqualityTestCase):
  36. """Test cases for the Tag node."""
  37. def test_unicode(self):
  38. """test Tag.__unicode__()"""
  39. node1 = Tag(wraptext("ref"))
  40. node2 = Tag(wraptext("span"), wraptext("foo"),
  41. [agen("style", "color: red;")])
  42. node3 = Tag(wraptext("ref"),
  43. attrs=[agennq("name", "foo"),
  44. agenpnv("some_attr", " ", "", "")],
  45. self_closing=True)
  46. node4 = Tag(wraptext("br"), self_closing=True, padding=" ")
  47. node5 = Tag(wraptext("br"), self_closing=True, implicit=True)
  48. node6 = Tag(wraptext("br"), self_closing=True, invalid=True,
  49. implicit=True)
  50. node7 = Tag(wraptext("br"), self_closing=True, invalid=True,
  51. padding=" ")
  52. node8 = Tag(wraptext("hr"), wiki_markup="----", self_closing=True)
  53. node9 = Tag(wraptext("i"), wraptext("italics!"), wiki_markup="''")
  54. self.assertEqual("<ref></ref>", str(node1))
  55. self.assertEqual('<span style="color: red;">foo</span>', str(node2))
  56. self.assertEqual("<ref name=foo some_attr/>", str(node3))
  57. self.assertEqual("<br />", str(node4))
  58. self.assertEqual("<br>", str(node5))
  59. self.assertEqual("</br>", str(node6))
  60. self.assertEqual("</br />", str(node7))
  61. self.assertEqual("----", str(node8))
  62. self.assertEqual("''italics!''", str(node9))
  63. def test_children(self):
  64. """test Tag.__children__()"""
  65. # <ref>foobar</ref>
  66. node1 = Tag(wraptext("ref"), wraptext("foobar"))
  67. # '''bold text'''
  68. node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''")
  69. # <img id="foo" class="bar" />
  70. node3 = Tag(wraptext("img"),
  71. attrs=[Attribute(wraptext("id"), wraptext("foo")),
  72. Attribute(wraptext("class"), wraptext("bar"))],
  73. self_closing=True, padding=" ")
  74. gen1 = node1.__children__()
  75. gen2 = node2.__children__()
  76. gen3 = node3.__children__()
  77. self.assertEqual(node1.tag, next(gen1))
  78. self.assertEqual(node3.tag, next(gen3))
  79. self.assertEqual(node3.attributes[0].name, next(gen3))
  80. self.assertEqual(node3.attributes[0].value, next(gen3))
  81. self.assertEqual(node3.attributes[1].name, next(gen3))
  82. self.assertEqual(node3.attributes[1].value, next(gen3))
  83. self.assertEqual(node1.contents, next(gen1))
  84. self.assertEqual(node2.contents, next(gen2))
  85. self.assertEqual(node1.closing_tag, next(gen1))
  86. self.assertRaises(StopIteration, next, gen1)
  87. self.assertRaises(StopIteration, next, gen2)
  88. self.assertRaises(StopIteration, next, gen3)
  89. def test_strip(self):
  90. """test Tag.__strip__()"""
  91. node1 = Tag(wraptext("i"), wraptext("foobar"))
  92. node2 = Tag(wraptext("math"), wraptext("foobar"))
  93. node3 = Tag(wraptext("br"), self_closing=True)
  94. for a in (True, False):
  95. for b in (True, False):
  96. self.assertEqual("foobar", node1.__strip__(a, b))
  97. self.assertEqual(None, node2.__strip__(a, b))
  98. self.assertEqual(None, node3.__strip__(a, b))
  99. def test_showtree(self):
  100. """test Tag.__showtree__()"""
  101. output = []
  102. getter, marker = object(), object()
  103. get = lambda code: output.append((getter, code))
  104. mark = lambda: output.append(marker)
  105. node1 = Tag(wraptext("ref"), wraptext("text"), [agen("name", "foo")])
  106. node2 = Tag(wraptext("br"), self_closing=True, padding=" ")
  107. node3 = Tag(wraptext("br"), self_closing=True, invalid=True,
  108. implicit=True, padding=" ")
  109. node1.__showtree__(output.append, get, mark)
  110. node2.__showtree__(output.append, get, mark)
  111. node3.__showtree__(output.append, get, mark)
  112. valid = [
  113. "<", (getter, node1.tag), (getter, node1.attributes[0].name),
  114. " = ", marker, (getter, node1.attributes[0].value), ">",
  115. (getter, node1.contents), "</", (getter, node1.closing_tag), ">",
  116. "<", (getter, node2.tag), "/>", "</", (getter, node3.tag), ">"]
  117. self.assertEqual(valid, output)
  118. def test_tag(self):
  119. """test getter/setter for the tag attribute"""
  120. tag = wraptext("ref")
  121. node = Tag(tag, wraptext("text"))
  122. self.assertIs(tag, node.tag)
  123. self.assertIs(tag, node.closing_tag)
  124. node.tag = "span"
  125. self.assertWikicodeEqual(wraptext("span"), node.tag)
  126. self.assertWikicodeEqual(wraptext("span"), node.closing_tag)
  127. self.assertEqual("<span>text</span>", node)
  128. def test_contents(self):
  129. """test getter/setter for the contents attribute"""
  130. contents = wraptext("text")
  131. node = Tag(wraptext("ref"), contents)
  132. self.assertIs(contents, node.contents)
  133. node.contents = "text and a {{template}}"
  134. parsed = wrap([Text("text and a "), Template(wraptext("template"))])
  135. self.assertWikicodeEqual(parsed, node.contents)
  136. self.assertEqual("<ref>text and a {{template}}</ref>", node)
  137. def test_attributes(self):
  138. """test getter for the attributes attribute"""
  139. attrs = [agen("name", "bar")]
  140. node1 = Tag(wraptext("ref"), wraptext("foo"))
  141. node2 = Tag(wraptext("ref"), wraptext("foo"), attrs)
  142. self.assertEqual([], node1.attributes)
  143. self.assertIs(attrs, node2.attributes)
  144. def test_wiki_markup(self):
  145. """test getter/setter for the wiki_markup attribute"""
  146. node = Tag(wraptext("i"), wraptext("italic text"))
  147. self.assertIs(None, node.wiki_markup)
  148. node.wiki_markup = "''"
  149. self.assertEqual("''", node.wiki_markup)
  150. self.assertEqual("''italic text''", node)
  151. node.wiki_markup = False
  152. self.assertFalse(node.wiki_markup)
  153. self.assertEqual("<i>italic text</i>", node)
  154. def test_self_closing(self):
  155. """test getter/setter for the self_closing attribute"""
  156. node = Tag(wraptext("ref"), wraptext("foobar"))
  157. self.assertFalse(node.self_closing)
  158. node.self_closing = True
  159. self.assertTrue(node.self_closing)
  160. self.assertEqual("<ref/>", node)
  161. node.self_closing = 0
  162. self.assertFalse(node.self_closing)
  163. self.assertEqual("<ref>foobar</ref>", node)
  164. def test_invalid(self):
  165. """test getter/setter for the invalid attribute"""
  166. node = Tag(wraptext("br"), self_closing=True, implicit=True)
  167. self.assertFalse(node.invalid)
  168. node.invalid = True
  169. self.assertTrue(node.invalid)
  170. self.assertEqual("</br>", node)
  171. node.invalid = 0
  172. self.assertFalse(node.invalid)
  173. self.assertEqual("<br>", node)
  174. def test_implicit(self):
  175. """test getter/setter for the implicit attribute"""
  176. node = Tag(wraptext("br"), self_closing=True)
  177. self.assertFalse(node.implicit)
  178. node.implicit = True
  179. self.assertTrue(node.implicit)
  180. self.assertEqual("<br>", node)
  181. node.implicit = 0
  182. self.assertFalse(node.implicit)
  183. self.assertEqual("<br/>", node)
  184. def test_padding(self):
  185. """test getter/setter for the padding attribute"""
  186. node = Tag(wraptext("ref"), wraptext("foobar"))
  187. self.assertEqual("", node.padding)
  188. node.padding = " "
  189. self.assertEqual(" ", node.padding)
  190. self.assertEqual("<ref >foobar</ref>", node)
  191. node.padding = None
  192. self.assertEqual("", node.padding)
  193. self.assertEqual("<ref>foobar</ref>", node)
  194. self.assertRaises(ValueError, setattr, node, "padding", True)
  195. def test_closing_tag(self):
  196. """test getter/setter for the closing_tag attribute"""
  197. tag = wraptext("ref")
  198. node = Tag(tag, wraptext("foobar"))
  199. self.assertIs(tag, node.closing_tag)
  200. node.closing_tag = "ref {{ignore me}}"
  201. parsed = wrap([Text("ref "), Template(wraptext("ignore me"))])
  202. self.assertWikicodeEqual(parsed, node.closing_tag)
  203. self.assertEqual("<ref>foobar</ref {{ignore me}}>", node)
  204. def test_has(self):
  205. """test Tag.has()"""
  206. node = Tag(wraptext("ref"), wraptext("cite"), [agen("name", "foo")])
  207. self.assertTrue(node.has("name"))
  208. self.assertTrue(node.has(" name "))
  209. self.assertTrue(node.has(wraptext("name")))
  210. self.assertFalse(node.has("Name"))
  211. self.assertFalse(node.has("foo"))
  212. attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"),
  213. agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")]
  214. node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True)
  215. self.assertTrue(node2.has("id"))
  216. self.assertTrue(node2.has("class"))
  217. self.assertTrue(node2.has(attrs[1].pad_first + str(attrs[1].name) +
  218. attrs[1].pad_before_eq))
  219. self.assertTrue(node2.has(attrs[3]))
  220. self.assertTrue(node2.has(str(attrs[3])))
  221. self.assertFalse(node2.has("idclass"))
  222. self.assertFalse(node2.has("id class"))
  223. self.assertFalse(node2.has("id=foo"))
  224. def test_get(self):
  225. """test Tag.get()"""
  226. attrs = [agen("name", "foo")]
  227. node = Tag(wraptext("ref"), wraptext("cite"), attrs)
  228. self.assertIs(attrs[0], node.get("name"))
  229. self.assertIs(attrs[0], node.get(" name "))
  230. self.assertIs(attrs[0], node.get(wraptext("name")))
  231. self.assertRaises(ValueError, node.get, "Name")
  232. self.assertRaises(ValueError, node.get, "foo")
  233. attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"),
  234. agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")]
  235. node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True)
  236. self.assertIs(attrs[0], node2.get("id"))
  237. self.assertIs(attrs[1], node2.get("class"))
  238. self.assertIs(attrs[1], node2.get(
  239. attrs[1].pad_first + str(attrs[1].name) + attrs[1].pad_before_eq))
  240. self.assertIs(attrs[3], node2.get(attrs[3]))
  241. self.assertIs(attrs[3], node2.get(str(attrs[3])))
  242. self.assertIs(attrs[3], node2.get(" foo"))
  243. self.assertRaises(ValueError, node2.get, "idclass")
  244. self.assertRaises(ValueError, node2.get, "id class")
  245. self.assertRaises(ValueError, node2.get, "id=foo")
  246. def test_add(self):
  247. """test Tag.add()"""
  248. node = Tag(wraptext("ref"), wraptext("cite"))
  249. node.add("name", "value")
  250. node.add("name", "value", quoted=False)
  251. node.add("name")
  252. node.add(1, False)
  253. node.add("style", "{{foobar}}")
  254. node.add("name", "value", True, "\n", " ", " ")
  255. attr1 = ' name="value"'
  256. attr2 = " name=value"
  257. attr3 = " name"
  258. attr4 = ' 1="False"'
  259. attr5 = ' style="{{foobar}}"'
  260. attr6 = '\nname = "value"'
  261. self.assertEqual(attr1, node.attributes[0])
  262. self.assertEqual(attr2, node.attributes[1])
  263. self.assertEqual(attr3, node.attributes[2])
  264. self.assertEqual(attr4, node.attributes[3])
  265. self.assertEqual(attr5, node.attributes[4])
  266. self.assertEqual(attr6, node.attributes[5])
  267. self.assertEqual(attr6, node.get("name"))
  268. self.assertWikicodeEqual(wrap([Template(wraptext("foobar"))]),
  269. node.attributes[4].value)
  270. self.assertEqual("".join(("<ref", attr1, attr2, attr3, attr4, attr5,
  271. attr6, ">cite</ref>")), node)
  272. def test_remove(self):
  273. """test Tag.remove()"""
  274. attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"),
  275. agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")]
  276. node = Tag(wraptext("div"), attrs=attrs, self_closing=True)
  277. node.remove("class")
  278. self.assertEqual('<div id="foo" foo="bar" foo \n />', node)
  279. node.remove("foo")
  280. self.assertEqual('<div id="foo"/>', node)
  281. self.assertRaises(ValueError, node.remove, "foo")
  282. node.remove("id")
  283. self.assertEqual('<div/>', node)
  284. if __name__ == "__main__":
  285. unittest.main(verbosity=2)