A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

327 regels
14 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import unicode_literals
  23. import re
  24. from types import GeneratorType
  25. import unittest
  26. from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
  27. Node, Tag, Template, Text, Wikilink)
  28. from mwparserfromhell.smart_list import SmartList
  29. from mwparserfromhell.wikicode import Wikicode
  30. from mwparserfromhell import parse
  31. from mwparserfromhell.compat import py3k, str
  32. from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
  33. class TestWikicode(TreeEqualityTestCase):
  34. """Tests for the Wikicode class, which manages a list of nodes."""
  35. def test_unicode(self):
  36. """test Wikicode.__unicode__()"""
  37. code1 = parse("foobar")
  38. code2 = parse("Have a {{template}} and a [[page|link]]")
  39. self.assertEqual("foobar", str(code1))
  40. self.assertEqual("Have a {{template}} and a [[page|link]]", str(code2))
  41. def test_nodes(self):
  42. """test getter/setter for the nodes attribute"""
  43. code = parse("Have a {{template}}")
  44. self.assertEqual(["Have a ", "{{template}}"], code.nodes)
  45. L1 = SmartList([Text("foobar"), Template(wraptext("abc"))])
  46. L2 = [Text("barfoo"), Template(wraptext("cba"))]
  47. L3 = "abc{{def}}"
  48. code.nodes = L1
  49. self.assertIs(L1, code.nodes)
  50. code.nodes = L2
  51. self.assertIs(L2, code.nodes)
  52. code.nodes = L3
  53. self.assertEqual(["abc", "{{def}}"], code.nodes)
  54. self.assertRaises(ValueError, setattr, code, "nodes", object)
  55. def test_get(self):
  56. """test Wikicode.get()"""
  57. code = parse("Have a {{template}} and a [[page|link]]")
  58. self.assertIs(code.nodes[0], code.get(0))
  59. self.assertIs(code.nodes[2], code.get(2))
  60. self.assertRaises(IndexError, code.get, 4)
  61. def test_set(self):
  62. """test Wikicode.set()"""
  63. code = parse("Have a {{template}} and a [[page|link]]")
  64. code.set(1, "{{{argument}}}")
  65. self.assertEqual("Have a {{{argument}}} and a [[page|link]]", code)
  66. self.assertIsInstance(code.get(1), Argument)
  67. code.set(2, None)
  68. self.assertEqual("Have a {{{argument}}}[[page|link]]", code)
  69. code.set(-3, "This is an ")
  70. self.assertEqual("This is an {{{argument}}}[[page|link]]", code)
  71. self.assertRaises(ValueError, code.set, 1, "foo {{bar}}")
  72. self.assertRaises(IndexError, code.set, 3, "{{baz}}")
  73. self.assertRaises(IndexError, code.set, -4, "{{baz}}")
  74. def test_index(self):
  75. """test Wikicode.index()"""
  76. code = parse("Have a {{template}} and a [[page|link]]")
  77. self.assertEqual(0, code.index("Have a "))
  78. self.assertEqual(3, code.index("[[page|link]]"))
  79. self.assertEqual(1, code.index(code.get(1)))
  80. self.assertRaises(ValueError, code.index, "foo")
  81. code = parse("{{foo}}{{bar|{{baz}}}}")
  82. self.assertEqual(1, code.index("{{bar|{{baz}}}}"))
  83. self.assertEqual(1, code.index("{{baz}}", recursive=True))
  84. self.assertEqual(1, code.index(code.get(1).get(1).value,
  85. recursive=True))
  86. self.assertRaises(ValueError, code.index, "{{baz}}", recursive=False)
  87. self.assertRaises(ValueError, code.index,
  88. code.get(1).get(1).value, recursive=False)
  89. def test_insert(self):
  90. """test Wikicode.insert()"""
  91. code = parse("Have a {{template}} and a [[page|link]]")
  92. code.insert(1, "{{{argument}}}")
  93. self.assertEqual(
  94. "Have a {{{argument}}}{{template}} and a [[page|link]]", code)
  95. self.assertIsInstance(code.get(1), Argument)
  96. code.insert(2, None)
  97. self.assertEqual(
  98. "Have a {{{argument}}}{{template}} and a [[page|link]]", code)
  99. code.insert(-3, Text("foo"))
  100. self.assertEqual(
  101. "Have a {{{argument}}}foo{{template}} and a [[page|link]]", code)
  102. code2 = parse("{{foo}}{{bar}}{{baz}}")
  103. code2.insert(1, "abc{{def}}ghi[[jk]]")
  104. self.assertEqual("{{foo}}abc{{def}}ghi[[jk]]{{bar}}{{baz}}", code2)
  105. self.assertEqual(["{{foo}}", "abc", "{{def}}", "ghi", "[[jk]]",
  106. "{{bar}}", "{{baz}}"], code2.nodes)
  107. code3 = parse("{{foo}}bar")
  108. code3.insert(1000, "[[baz]]")
  109. code3.insert(-1000, "derp")
  110. self.assertEqual("derp{{foo}}bar[[baz]]", code3)
  111. def test_insert_before(self):
  112. """test Wikicode.insert_before()"""
  113. code = parse("{{a}}{{b}}{{c}}{{d}}")
  114. code.insert_before("{{b}}", "x", recursive=True)
  115. code.insert_before("{{d}}", "[[y]]", recursive=False)
  116. self.assertEqual("{{a}}x{{b}}{{c}}[[y]]{{d}}", code)
  117. code.insert_before(code.get(2), "z")
  118. self.assertEqual("{{a}}xz{{b}}{{c}}[[y]]{{d}}", code)
  119. self.assertRaises(ValueError, code.insert_before, "{{r}}", "n",
  120. recursive=True)
  121. self.assertRaises(ValueError, code.insert_before, "{{r}}", "n",
  122. recursive=False)
  123. code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}")
  124. code2.insert_before(code2.get(0).params[0].value.get(0), "x",
  125. recursive=True)
  126. code2.insert_before("{{f}}", "y", recursive=True)
  127. self.assertEqual("{{a|x{{b}}|{{c|d=y{{f}}}}}}", code2)
  128. self.assertRaises(ValueError, code2.insert_before, "{{f}}", "y",
  129. recursive=False)
  130. def test_insert_after(self):
  131. """test Wikicode.insert_after()"""
  132. code = parse("{{a}}{{b}}{{c}}{{d}}")
  133. code.insert_after("{{b}}", "x", recursive=True)
  134. code.insert_after("{{d}}", "[[y]]", recursive=False)
  135. self.assertEqual("{{a}}{{b}}x{{c}}{{d}}[[y]]", code)
  136. code.insert_after(code.get(2), "z")
  137. self.assertEqual("{{a}}{{b}}xz{{c}}{{d}}[[y]]", code)
  138. self.assertRaises(ValueError, code.insert_after, "{{r}}", "n",
  139. recursive=True)
  140. self.assertRaises(ValueError, code.insert_after, "{{r}}", "n",
  141. recursive=False)
  142. code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}")
  143. code2.insert_after(code2.get(0).params[0].value.get(0), "x",
  144. recursive=True)
  145. code2.insert_after("{{f}}", "y", recursive=True)
  146. self.assertEqual("{{a|{{b}}x|{{c|d={{f}}y}}}}", code2)
  147. self.assertRaises(ValueError, code2.insert_after, "{{f}}", "y",
  148. recursive=False)
  149. def test_replace(self):
  150. """test Wikicode.replace()"""
  151. code = parse("{{a}}{{b}}{{c}}{{d}}")
  152. code.replace("{{b}}", "x", recursive=True)
  153. code.replace("{{d}}", "[[y]]", recursive=False)
  154. self.assertEqual("{{a}}x{{c}}[[y]]", code)
  155. code.replace(code.get(1), "z")
  156. self.assertEqual("{{a}}z{{c}}[[y]]", code)
  157. self.assertRaises(ValueError, code.replace, "{{r}}", "n",
  158. recursive=True)
  159. self.assertRaises(ValueError, code.replace, "{{r}}", "n",
  160. recursive=False)
  161. code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}")
  162. code2.replace(code2.get(0).params[0].value.get(0), "x", recursive=True)
  163. code2.replace("{{f}}", "y", recursive=True)
  164. self.assertEqual("{{a|x|{{c|d=y}}}}", code2)
  165. self.assertRaises(ValueError, code2.replace, "y", "z", recursive=False)
  166. def test_append(self):
  167. """test Wikicode.append()"""
  168. code = parse("Have a {{template}}")
  169. code.append("{{{argument}}}")
  170. self.assertEqual("Have a {{template}}{{{argument}}}", code)
  171. self.assertIsInstance(code.get(2), Argument)
  172. code.append(None)
  173. self.assertEqual("Have a {{template}}{{{argument}}}", code)
  174. code.append(Text(" foo"))
  175. self.assertEqual("Have a {{template}}{{{argument}}} foo", code)
  176. self.assertRaises(ValueError, code.append, slice(0, 1))
  177. def test_remove(self):
  178. """test Wikicode.remove()"""
  179. code = parse("{{a}}{{b}}{{c}}{{d}}")
  180. code.remove("{{b}}", recursive=True)
  181. code.remove(code.get(1), recursive=True)
  182. self.assertEqual("{{a}}{{d}}", code)
  183. self.assertRaises(ValueError, code.remove, "{{r}}", recursive=True)
  184. self.assertRaises(ValueError, code.remove, "{{r}}", recursive=False)
  185. code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}")
  186. code2.remove(code2.get(0).params[0].value.get(0), recursive=True)
  187. code2.remove("{{f}}", recursive=True)
  188. self.assertEqual("{{a||{{c|d={{h}}}}}}", code2)
  189. self.assertRaises(ValueError, code2.remove, "{{h}}", recursive=False)
  190. def test_filter_family(self):
  191. """test the Wikicode.i?filter() family of functions"""
  192. def genlist(gen):
  193. self.assertIsInstance(gen, GeneratorType)
  194. return list(gen)
  195. ifilter = lambda code: (lambda **kw: genlist(code.ifilter(**kw)))
  196. code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]")
  197. for func in (code.filter, ifilter(code)):
  198. self.assertEqual(["a", "{{b}}", "c", "[[d]]", "{{{e}}}", "{{f}}",
  199. "[[g]]"], func())
  200. self.assertEqual(["{{{e}}}"], func(forcetype=Argument))
  201. self.assertIs(code.get(4), func(forcetype=Argument)[0])
  202. self.assertEqual(["a", "c"], func(forcetype=Text))
  203. self.assertEqual([], func(forcetype=Heading))
  204. self.assertRaises(TypeError, func, forcetype=True)
  205. funcs = [
  206. lambda name, **kw: getattr(code, "filter_" + name)(**kw),
  207. lambda name, **kw: genlist(getattr(code, "ifilter_" + name)(**kw))
  208. ]
  209. for get_filter in funcs:
  210. self.assertEqual(["{{{e}}}"], get_filter("arguments"))
  211. self.assertIs(code.get(4), get_filter("arguments")[0])
  212. self.assertEqual([], get_filter("comments"))
  213. self.assertEqual([], get_filter("headings"))
  214. self.assertEqual([], get_filter("html_entities"))
  215. self.assertEqual([], get_filter("tags"))
  216. self.assertEqual(["{{b}}", "{{f}}"], get_filter("templates"))
  217. self.assertEqual(["a", "c"], get_filter("text"))
  218. self.assertEqual(["[[d]]", "[[g]]"], get_filter("wikilinks"))
  219. code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}")
  220. for func in (code2.filter, ifilter(code2)):
  221. self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"],
  222. func(recursive=False, forcetype=Template))
  223. self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}",
  224. "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"],
  225. func(recursive=True, forcetype=Template))
  226. code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}")
  227. for func in (code3.filter, ifilter(code3)):
  228. self.assertEqual(["{{foobar}}", "{{FOO}}"], func(matches=r"foo"))
  229. self.assertEqual(["{{foobar}}", "{{FOO}}"],
  230. func(matches=r"^{{foo.*?}}"))
  231. self.assertEqual(["{{foobar}}"],
  232. func(matches=r"^{{foo.*?}}", flags=re.UNICODE))
  233. self.assertEqual(["{{baz}}", "{{bz}}"], func(matches=r"^{{b.*?z"))
  234. self.assertEqual(["{{baz}}"], func(matches=r"^{{b.+?z}}"))
  235. self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"],
  236. code2.filter_templates(recursive=False))
  237. self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}",
  238. "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"],
  239. code2.filter_templates(recursive=True))
  240. self.assertEqual(["{{baz}}", "{{bz}}"],
  241. code3.filter_templates(matches=r"^{{b.*?z"))
  242. self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z"))
  243. self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z", flags=0))
  244. self.assertRaises(TypeError, code.filter_templates, 100)
  245. self.assertRaises(TypeError, code.filter_templates, a=42)
  246. self.assertRaises(TypeError, code.filter_templates, forcetype=Template)
  247. def test_get_sections(self):
  248. """test Wikicode.get_sections()"""
  249. page1 = ""
  250. page2 = "==Heading=="
  251. page3 = "===Heading===\nFoo bar baz\n====Gnidaeh====\n"
  252. page4 = """
  253. This is a lead.
  254. == Section I ==
  255. Section I body. {{and a|template}}
  256. === Section I.A ===
  257. Section I.A [[body]].
  258. === Section I.B ===
  259. ==== Section I.B.1 ====
  260. Section I.B.1 body.
  261. &bull;Some content.
  262. == Section II ==
  263. Section II body.
  264. == Section III ==
  265. === Section III.A ===
  266. Text.
  267. ===== Section III.A.1.a =====
  268. More text.
  269. ==== Section III.A.2 ====
  270. Even more text.
  271. ======= section III.A.2.a.i.1 =======
  272. An invalid section!"""
  273. self.assertEqual([], parse(page1).get_sections())
  274. self.assertEqual(["", "==Heading=="], parse(page2).get_sections())
  275. self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"], parse(page3).get_sections())
  276. def test_strip_code(self):
  277. """test Wikicode.strip_code()"""
  278. pass
  279. def test_get_tree(self):
  280. """test Wikicode.get_tree()"""
  281. # Since individual nodes have test cases for their __showtree___
  282. # methods, and the docstring covers all possibilities, this doesn't
  283. # need to test anything other than it:
  284. code = parse("Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}")
  285. expected = "Lorem ipsum \n{{\n\t foo\n\t| 1\n\t= bar\n\t| 2\n\t= " + \
  286. "{{\n\t\t\tbaz\n\t }}\n\t| spam\n\t= eggs\n}}"
  287. self.assertEqual(expected.expandtabs(4), code.get_tree())
  288. if __name__ == "__main__":
  289. unittest.main(verbosity=2)