A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 
 
 

173 linhas
7.2 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import unicode_literals
  23. import unittest
  24. from mwparserfromhell.compat import str
  25. from mwparserfromhell.nodes import HTMLEntity
  26. from mwparserfromhell.smart_list import SmartList
  27. from mwparserfromhell.wikicode import Wikicode
  28. from ._test_tree_equality import TreeEqualityTestCase
  29. wrap = lambda L: Wikicode(SmartList(L))
  30. class TestHTMLEntity(TreeEqualityTestCase):
  31. """Test cases for the HTMLEntity node."""
  32. def test_unicode(self):
  33. """test HTMLEntity.__unicode__()"""
  34. node1 = HTMLEntity("nbsp", named=True, hexadecimal=False)
  35. node2 = HTMLEntity("107", named=False, hexadecimal=False)
  36. node3 = HTMLEntity("6b", named=False, hexadecimal=True)
  37. node4 = HTMLEntity("6C", named=False, hexadecimal=True, hex_char="X")
  38. self.assertEqual("&nbsp;", str(node1))
  39. self.assertEqual("&#107;", str(node2))
  40. self.assertEqual("&#x6b;", str(node3))
  41. self.assertEqual("&#X6C;", str(node4))
  42. def test_strip(self):
  43. """test HTMLEntity.__strip__()"""
  44. node1 = HTMLEntity("nbsp", named=True, hexadecimal=False)
  45. node2 = HTMLEntity("107", named=False, hexadecimal=False)
  46. node3 = HTMLEntity("e9", named=False, hexadecimal=True)
  47. self.assertEqual("\xa0", node1.__strip__(True, True))
  48. self.assertEqual("\xa0", node1.__strip__(True, False))
  49. self.assertEqual("&nbsp;", node1.__strip__(False, True))
  50. self.assertEqual("&nbsp;", node1.__strip__(False, False))
  51. self.assertEqual("k", node2.__strip__(True, True))
  52. self.assertEqual("k", node2.__strip__(True, False))
  53. self.assertEqual("&#107;", node2.__strip__(False, True))
  54. self.assertEqual("&#107;", node2.__strip__(False, False))
  55. self.assertEqual("é", node3.__strip__(True, True))
  56. self.assertEqual("é", node3.__strip__(True, False))
  57. self.assertEqual("&#xe9;", node3.__strip__(False, True))
  58. self.assertEqual("&#xe9;", node3.__strip__(False, False))
  59. def test_showtree(self):
  60. """test HTMLEntity.__showtree__()"""
  61. output = []
  62. node1 = HTMLEntity("nbsp", named=True, hexadecimal=False)
  63. node2 = HTMLEntity("107", named=False, hexadecimal=False)
  64. node3 = HTMLEntity("e9", named=False, hexadecimal=True)
  65. node1.__showtree__(output.append, None, None)
  66. node2.__showtree__(output.append, None, None)
  67. node3.__showtree__(output.append, None, None)
  68. res = ["&nbsp;", "&#107;", "&#xe9;"]
  69. self.assertEqual(res, output)
  70. def test_value(self):
  71. """test HTMLEntity.value()"""
  72. node1 = HTMLEntity("nbsp")
  73. node2 = HTMLEntity("107")
  74. node3 = HTMLEntity("e9")
  75. self.assertEqual("nbsp", node1.value)
  76. self.assertEqual("107", node2.value)
  77. self.assertEqual("e9", node3.value)
  78. node1.value = "ffa4"
  79. node2.value = 72
  80. node3.value = "Sigma"
  81. self.assertEqual("ffa4", node1.value)
  82. self.assertFalse(node1.named)
  83. self.assertTrue(node1.hexadecimal)
  84. self.assertEqual("72", node2.value)
  85. self.assertFalse(node2.named)
  86. self.assertFalse(node2.hexadecimal)
  87. self.assertEqual("Sigma", node3.value)
  88. self.assertTrue(node3.named)
  89. self.assertFalse(node3.hexadecimal)
  90. node1.value = "10FFFF"
  91. node2.value = 110000
  92. node2.value = 1114111
  93. self.assertRaises(ValueError, setattr, node3, "value", "")
  94. self.assertRaises(ValueError, setattr, node3, "value", "foobar")
  95. self.assertRaises(ValueError, setattr, node3, "value", True)
  96. self.assertRaises(ValueError, setattr, node3, "value", -1)
  97. self.assertRaises(ValueError, setattr, node1, "value", 110000)
  98. self.assertRaises(ValueError, setattr, node1, "value", "1114112")
  99. def test_named(self):
  100. """test HTMLEntity.named()"""
  101. node1 = HTMLEntity("nbsp")
  102. node2 = HTMLEntity("107")
  103. node3 = HTMLEntity("e9")
  104. self.assertTrue(node1.named)
  105. self.assertFalse(node2.named)
  106. self.assertFalse(node3.named)
  107. node1.named = 1
  108. node2.named = 0
  109. node3.named = 0
  110. self.assertTrue(node1.named)
  111. self.assertFalse(node2.named)
  112. self.assertFalse(node3.named)
  113. self.assertRaises(ValueError, setattr, node1, "named", False)
  114. self.assertRaises(ValueError, setattr, node2, "named", True)
  115. self.assertRaises(ValueError, setattr, node3, "named", True)
  116. def test_hexadecimal(self):
  117. """test HTMLEntity.hexadecimal()"""
  118. node1 = HTMLEntity("nbsp")
  119. node2 = HTMLEntity("107")
  120. node3 = HTMLEntity("e9")
  121. self.assertFalse(node1.hexadecimal)
  122. self.assertFalse(node2.hexadecimal)
  123. self.assertTrue(node3.hexadecimal)
  124. node1.hexadecimal = False
  125. node2.hexadecimal = True
  126. node3.hexadecimal = False
  127. self.assertFalse(node1.hexadecimal)
  128. self.assertTrue(node2.hexadecimal)
  129. self.assertFalse(node3.hexadecimal)
  130. self.assertRaises(ValueError, setattr, node1, "hexadecimal", True)
  131. def test_hex_char(self):
  132. """test HTMLEntity.hex_char()"""
  133. node1 = HTMLEntity("e9")
  134. node2 = HTMLEntity("e9", hex_char="X")
  135. self.assertEqual("x", node1.hex_char)
  136. self.assertEqual("X", node2.hex_char)
  137. node1.hex_char = "X"
  138. node2.hex_char = "x"
  139. self.assertEqual("X", node1.hex_char)
  140. self.assertEqual("x", node2.hex_char)
  141. self.assertRaises(ValueError, setattr, node1, "hex_char", 123)
  142. self.assertRaises(ValueError, setattr, node1, "hex_char", "foobar")
  143. self.assertRaises(ValueError, setattr, node1, "hex_char", True)
  144. def test_normalize(self):
  145. """test HTMLEntity.normalize()"""
  146. node1 = HTMLEntity("nbsp")
  147. node2 = HTMLEntity("107")
  148. node3 = HTMLEntity("e9")
  149. node4 = HTMLEntity("1f648")
  150. self.assertEqual("\xa0", node1.normalize())
  151. self.assertEqual("k", node2.normalize())
  152. self.assertEqual("é", node3.normalize())
  153. self.assertEqual("\U0001F648", node4.normalize())
  154. if __name__ == "__main__":
  155. unittest.main(verbosity=2)