A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

171 lines
7.2 KiB

  1. #
  2. # Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
  3. #
  4. # Permission is hereby granted, free of charge, to any person obtaining a copy
  5. # of this software and associated documentation files (the "Software"), to deal
  6. # in the Software without restriction, including without limitation the rights
  7. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. # copies of the Software, and to permit persons to whom the Software is
  9. # furnished to do so, subject to the following conditions:
  10. #
  11. # The above copyright notice and this permission notice shall be included in
  12. # all copies or substantial portions of the Software.
  13. #
  14. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20. # SOFTWARE.
  21. import unittest
  22. from mwparserfromhell.nodes import HTMLEntity
  23. from ._test_tree_equality import TreeEqualityTestCase
  24. class TestHTMLEntity(TreeEqualityTestCase):
  25. """Test cases for the HTMLEntity node."""
  26. def test_str(self):
  27. """test HTMLEntity.__str__()"""
  28. node1 = HTMLEntity("nbsp", named=True, hexadecimal=False)
  29. node2 = HTMLEntity("107", named=False, hexadecimal=False)
  30. node3 = HTMLEntity("6b", named=False, hexadecimal=True)
  31. node4 = HTMLEntity("6C", named=False, hexadecimal=True, hex_char="X")
  32. self.assertEqual("&nbsp;", str(node1))
  33. self.assertEqual("&#107;", str(node2))
  34. self.assertEqual("&#x6b;", str(node3))
  35. self.assertEqual("&#X6C;", str(node4))
  36. def test_children(self):
  37. """test HTMLEntity.__children__()"""
  38. node = HTMLEntity("nbsp", named=True, hexadecimal=False)
  39. gen = node.__children__()
  40. self.assertRaises(StopIteration, next, gen)
  41. def test_strip(self):
  42. """test HTMLEntity.__strip__()"""
  43. node1 = HTMLEntity("nbsp", named=True, hexadecimal=False)
  44. node2 = HTMLEntity("107", named=False, hexadecimal=False)
  45. node3 = HTMLEntity("e9", named=False, hexadecimal=True)
  46. self.assertEqual("\xa0", node1.__strip__(normalize=True))
  47. self.assertEqual("&nbsp;", node1.__strip__(normalize=False))
  48. self.assertEqual("k", node2.__strip__(normalize=True))
  49. self.assertEqual("&#107;", node2.__strip__(normalize=False))
  50. self.assertEqual("é", node3.__strip__(normalize=True))
  51. self.assertEqual("&#xe9;", node3.__strip__(normalize=False))
  52. def test_showtree(self):
  53. """test HTMLEntity.__showtree__()"""
  54. output = []
  55. node1 = HTMLEntity("nbsp", named=True, hexadecimal=False)
  56. node2 = HTMLEntity("107", named=False, hexadecimal=False)
  57. node3 = HTMLEntity("e9", named=False, hexadecimal=True)
  58. node1.__showtree__(output.append, None, None)
  59. node2.__showtree__(output.append, None, None)
  60. node3.__showtree__(output.append, None, None)
  61. res = ["&nbsp;", "&#107;", "&#xe9;"]
  62. self.assertEqual(res, output)
  63. def test_value(self):
  64. """test getter/setter for the value attribute"""
  65. node1 = HTMLEntity("nbsp")
  66. node2 = HTMLEntity("107")
  67. node3 = HTMLEntity("e9")
  68. self.assertEqual("nbsp", node1.value)
  69. self.assertEqual("107", node2.value)
  70. self.assertEqual("e9", node3.value)
  71. node1.value = "ffa4"
  72. node2.value = 72
  73. node3.value = "Sigma"
  74. self.assertEqual("ffa4", node1.value)
  75. self.assertFalse(node1.named)
  76. self.assertTrue(node1.hexadecimal)
  77. self.assertEqual("72", node2.value)
  78. self.assertFalse(node2.named)
  79. self.assertFalse(node2.hexadecimal)
  80. self.assertEqual("Sigma", node3.value)
  81. self.assertTrue(node3.named)
  82. self.assertFalse(node3.hexadecimal)
  83. node1.value = "10FFFF"
  84. node2.value = 110000
  85. node2.value = 1114111
  86. self.assertRaises(ValueError, setattr, node3, "value", "")
  87. self.assertRaises(ValueError, setattr, node3, "value", "foobar")
  88. self.assertRaises(ValueError, setattr, node3, "value", True)
  89. self.assertRaises(ValueError, setattr, node3, "value", -1)
  90. self.assertRaises(ValueError, setattr, node1, "value", 110000)
  91. self.assertRaises(ValueError, setattr, node1, "value", "1114112")
  92. self.assertRaises(ValueError, setattr, node1, "value", "12FFFF")
  93. def test_named(self):
  94. """test getter/setter for the named attribute"""
  95. node1 = HTMLEntity("nbsp")
  96. node2 = HTMLEntity("107")
  97. node3 = HTMLEntity("e9")
  98. self.assertTrue(node1.named)
  99. self.assertFalse(node2.named)
  100. self.assertFalse(node3.named)
  101. node1.named = 1
  102. node2.named = 0
  103. node3.named = 0
  104. self.assertTrue(node1.named)
  105. self.assertFalse(node2.named)
  106. self.assertFalse(node3.named)
  107. self.assertRaises(ValueError, setattr, node1, "named", False)
  108. self.assertRaises(ValueError, setattr, node2, "named", True)
  109. self.assertRaises(ValueError, setattr, node3, "named", True)
  110. def test_hexadecimal(self):
  111. """test getter/setter for the hexadecimal attribute"""
  112. node1 = HTMLEntity("nbsp")
  113. node2 = HTMLEntity("107")
  114. node3 = HTMLEntity("e9")
  115. self.assertFalse(node1.hexadecimal)
  116. self.assertFalse(node2.hexadecimal)
  117. self.assertTrue(node3.hexadecimal)
  118. node1.hexadecimal = False
  119. node2.hexadecimal = True
  120. node3.hexadecimal = False
  121. self.assertFalse(node1.hexadecimal)
  122. self.assertTrue(node2.hexadecimal)
  123. self.assertFalse(node3.hexadecimal)
  124. self.assertRaises(ValueError, setattr, node1, "hexadecimal", True)
  125. def test_hex_char(self):
  126. """test getter/setter for the hex_char attribute"""
  127. node1 = HTMLEntity("e9")
  128. node2 = HTMLEntity("e9", hex_char="X")
  129. self.assertEqual("x", node1.hex_char)
  130. self.assertEqual("X", node2.hex_char)
  131. node1.hex_char = "X"
  132. node2.hex_char = "x"
  133. self.assertEqual("X", node1.hex_char)
  134. self.assertEqual("x", node2.hex_char)
  135. self.assertRaises(ValueError, setattr, node1, "hex_char", 123)
  136. self.assertRaises(ValueError, setattr, node1, "hex_char", "foobar")
  137. self.assertRaises(ValueError, setattr, node1, "hex_char", True)
  138. def test_normalize(self):
  139. """test getter/setter for the normalize attribute"""
  140. node1 = HTMLEntity("nbsp")
  141. node2 = HTMLEntity("107")
  142. node3 = HTMLEntity("e9")
  143. node4 = HTMLEntity("1f648")
  144. node5 = HTMLEntity("-2")
  145. node6 = HTMLEntity("110000", named=False, hexadecimal=True)
  146. self.assertEqual("\xa0", node1.normalize())
  147. self.assertEqual("k", node2.normalize())
  148. self.assertEqual("é", node3.normalize())
  149. self.assertEqual("\U0001F648", node4.normalize())
  150. self.assertRaises(ValueError, node5.normalize)
  151. self.assertRaises(ValueError, node6.normalize)
  152. if __name__ == "__main__":
  153. unittest.main(verbosity=2)