A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

414 lines
15 KiB

  1. #
  2. # Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
  3. #
  4. # Permission is hereby granted, free of charge, to any person obtaining a copy
  5. # of this software and associated documentation files (the "Software"), to deal
  6. # in the Software without restriction, including without limitation the rights
  7. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. # copies of the Software, and to permit persons to whom the Software is
  9. # furnished to do so, subject to the following conditions:
  10. #
  11. # The above copyright notice and this permission notice shall be included in
  12. # all copies or substantial portions of the Software.
  13. #
  14. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20. # SOFTWARE.
  21. import pytest
  22. from sys import getdefaultencoding
  23. from types import GeneratorType
  24. from mwparserfromhell.string_mixin import StringMixIn
  25. class _FakeString(StringMixIn):
  26. def __init__(self, data):
  27. self._data = data
  28. def __unicode__(self):
  29. return self._data
  30. class TestStringMixIn:
  31. """Test cases for the StringMixIn class."""
  32. @pytest.mark.parametrize('method', [
  33. "capitalize", "casefold", "center", "count", "encode", "endswith",
  34. "expandtabs", "find", "format", "format_map", "index", "isalnum",
  35. "isalpha", "isdecimal", "isdigit", "isidentifier", "islower",
  36. "isnumeric", "isprintable", "isspace", "istitle", "isupper",
  37. "join", "ljust", "lower", "lstrip", "maketrans", "partition",
  38. "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit",
  39. "rstrip", "split", "splitlines", "startswith", "strip", "swapcase",
  40. "title", "translate", "upper", "zfill"
  41. ])
  42. def test_docs(self, method):
  43. """make sure the various methods of StringMixIn have docstrings"""
  44. expected = getattr("foo", method).__doc__
  45. actual = getattr(_FakeString("foo"), method).__doc__
  46. assert expected == actual
  47. def test_types(self):
  48. """make sure StringMixIns convert to different types correctly"""
  49. fstr = _FakeString("fake string")
  50. assert str(fstr) == "fake string"
  51. assert bytes(fstr) == b"fake string"
  52. assert repr(fstr) == "'fake string'"
  53. assert isinstance(str(fstr), str)
  54. assert isinstance(bytes(fstr), bytes)
  55. assert isinstance(repr(fstr), str)
  56. def test_comparisons(self):
  57. """make sure comparison operators work"""
  58. str1 = _FakeString("this is a fake string")
  59. str2 = _FakeString("this is a fake string")
  60. str3 = _FakeString("fake string, this is")
  61. str4 = "this is a fake string"
  62. str5 = "fake string, this is"
  63. assert str1 <= str2
  64. assert str1 >= str2
  65. assert str1 == str2
  66. assert str1 == str2
  67. assert str1 >= str2
  68. assert str1 <= str2
  69. assert str1 > str3
  70. assert str1 >= str3
  71. assert str1 != str3
  72. assert str1 != str3
  73. assert str1 >= str3
  74. assert str1 > str3
  75. assert str1 <= str4
  76. assert str1 >= str4
  77. assert str1 == str4
  78. assert str1 == str4
  79. assert str1 >= str4
  80. assert str1 <= str4
  81. assert str5 <= str1
  82. assert str5 < str1
  83. assert str5 != str1
  84. assert str5 != str1
  85. assert str5 < str1
  86. assert str5 <= str1
  87. def test_other_magics(self):
  88. """test other magically implemented features, like len() and iter()"""
  89. str1 = _FakeString("fake string")
  90. str2 = _FakeString("")
  91. expected = ["f", "a", "k", "e", " ", "s", "t", "r", "i", "n", "g"]
  92. assert bool(str1) is True
  93. assert bool(str2) is False
  94. assert 11 == len(str1)
  95. assert 0 == len(str2)
  96. out = []
  97. for ch in str1:
  98. out.append(ch)
  99. assert expected == out
  100. out = []
  101. for ch in str2:
  102. out.append(ch)
  103. assert [] == out
  104. gen1 = iter(str1)
  105. gen2 = iter(str2)
  106. assert isinstance(gen1, GeneratorType)
  107. assert isinstance(gen2, GeneratorType)
  108. out = []
  109. for i in range(len(str1)):
  110. out.append(next(gen1))
  111. with pytest.raises(StopIteration):
  112. next(gen1)
  113. assert expected == out
  114. with pytest.raises(StopIteration):
  115. next(gen2)
  116. assert "gnirts ekaf" == "".join(list(reversed(str1)))
  117. assert [] == list(reversed(str2))
  118. assert "f" == str1[0]
  119. assert " " == str1[4]
  120. assert "g" == str1[10]
  121. assert "n" == str1[-2]
  122. with pytest.raises(IndexError):
  123. str1[11]
  124. with pytest.raises(IndexError):
  125. str2[0]
  126. assert "k" in str1
  127. assert "fake" in str1
  128. assert "str" in str1
  129. assert "" in str1
  130. assert "" in str2
  131. assert "real" not in str1
  132. assert "s" not in str2
  133. def test_other_methods(self):
  134. """test the remaining non-magic methods of StringMixIn"""
  135. str1 = _FakeString("fake string")
  136. assert "Fake string" == str1.capitalize()
  137. assert " fake string " == str1.center(15)
  138. assert " fake string " == str1.center(16)
  139. assert "qqfake stringqq" == str1.center(15, "q")
  140. assert 1 == str1.count("e")
  141. assert 0 == str1.count("z")
  142. assert 1 == str1.count("r", 7)
  143. assert 0 == str1.count("r", 8)
  144. assert 1 == str1.count("r", 5, 9)
  145. assert 0 == str1.count("r", 5, 7)
  146. str3 = _FakeString("𐌲𐌿𐍄")
  147. actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84"
  148. assert b"fake string" == str1.encode()
  149. assert actual == str3.encode("utf-8")
  150. assert actual == str3.encode(encoding="utf-8")
  151. if getdefaultencoding() == "ascii":
  152. with pytest.raises(UnicodeEncodeError):
  153. str3.encode()
  154. elif getdefaultencoding() == "utf-8":
  155. assert actual == str3.encode()
  156. with pytest.raises(UnicodeEncodeError):
  157. str3.encode("ascii")
  158. with pytest.raises(UnicodeEncodeError):
  159. str3.encode("ascii", "strict")
  160. if getdefaultencoding() == "ascii":
  161. with pytest.raises(UnicodeEncodeError):
  162. str3.encode("ascii", errors="strict")
  163. elif getdefaultencoding() == "utf-8":
  164. assert actual == str3.encode(errors="strict")
  165. assert b"" == str3.encode("ascii", "ignore")
  166. if getdefaultencoding() == "ascii":
  167. assert b"" == str3.encode(errors="ignore")
  168. elif getdefaultencoding() == "utf-8":
  169. assert actual == str3.encode(errors="ignore")
  170. assert str1.endswith("ing") is True
  171. assert str1.endswith("ingh") is False
  172. str4 = _FakeString("\tfoobar")
  173. assert "fake string" == str1
  174. assert " foobar" == str4.expandtabs()
  175. assert " foobar" == str4.expandtabs(4)
  176. assert 3 == str1.find("e")
  177. assert -1 == str1.find("z")
  178. assert 7 == str1.find("r", 7)
  179. assert -1 == str1.find("r", 8)
  180. assert 7 == str1.find("r", 5, 9)
  181. assert -1 == str1.find("r", 5, 7)
  182. str5 = _FakeString("foo{0}baz")
  183. str6 = _FakeString("foo{abc}baz")
  184. str7 = _FakeString("foo{0}{abc}buzz")
  185. str8 = _FakeString("{0}{1}")
  186. assert "fake string" == str1.format()
  187. assert "foobarbaz" == str5.format("bar")
  188. assert "foobarbaz" == str6.format(abc="bar")
  189. assert "foobarbazbuzz" == str7.format("bar", abc="baz")
  190. with pytest.raises(IndexError):
  191. str8.format("abc")
  192. assert "fake string" == str1.format_map({})
  193. assert "foobarbaz" == str6.format_map({"abc": "bar"})
  194. with pytest.raises(ValueError):
  195. str5.format_map({0: "abc"})
  196. assert 3 == str1.index("e")
  197. with pytest.raises(ValueError):
  198. str1.index("z")
  199. assert 7 == str1.index("r", 7)
  200. with pytest.raises(ValueError):
  201. str1.index("r", 8)
  202. assert 7 == str1.index("r", 5, 9)
  203. with pytest.raises(ValueError):
  204. str1.index("r", 5, 7)
  205. str9 = _FakeString("foobar")
  206. str10 = _FakeString("foobar123")
  207. str11 = _FakeString("foo bar")
  208. assert str9.isalnum() is True
  209. assert str10.isalnum() is True
  210. assert str11.isalnum() is False
  211. assert str9.isalpha() is True
  212. assert str10.isalpha() is False
  213. assert str11.isalpha() is False
  214. str12 = _FakeString("123")
  215. str13 = _FakeString("\u2155")
  216. str14 = _FakeString("\u00B2")
  217. assert str9.isdecimal() is False
  218. assert str12.isdecimal() is True
  219. assert str13.isdecimal() is False
  220. assert str14.isdecimal() is False
  221. assert str9.isdigit() is False
  222. assert str12.isdigit() is True
  223. assert str13.isdigit() is False
  224. assert str14.isdigit() is True
  225. assert str9.isidentifier() is True
  226. assert str10.isidentifier() is True
  227. assert str11.isidentifier() is False
  228. assert str12.isidentifier() is False
  229. str15 = _FakeString("")
  230. str16 = _FakeString("FooBar")
  231. assert str9.islower() is True
  232. assert str15.islower() is False
  233. assert str16.islower() is False
  234. assert str9.isnumeric() is False
  235. assert str12.isnumeric() is True
  236. assert str13.isnumeric() is True
  237. assert str14.isnumeric() is True
  238. str16B = _FakeString("\x01\x02")
  239. assert str9.isprintable() is True
  240. assert str13.isprintable() is True
  241. assert str14.isprintable() is True
  242. assert str15.isprintable() is True
  243. assert str16B.isprintable() is False
  244. str17 = _FakeString(" ")
  245. str18 = _FakeString("\t \t \r\n")
  246. assert str1.isspace() is False
  247. assert str9.isspace() is False
  248. assert str17.isspace() is True
  249. assert str18.isspace() is True
  250. str19 = _FakeString("This Sentence Looks Like A Title")
  251. str20 = _FakeString("This sentence doesn't LookLikeATitle")
  252. assert str15.istitle() is False
  253. assert str19.istitle() is True
  254. assert str20.istitle() is False
  255. str21 = _FakeString("FOOBAR")
  256. assert str9.isupper() is False
  257. assert str15.isupper() is False
  258. assert str21.isupper() is True
  259. assert "foobar" == str15.join(["foo", "bar"])
  260. assert "foo123bar123baz" == str12.join(("foo", "bar", "baz"))
  261. assert "fake string " == str1.ljust(15)
  262. assert "fake string " == str1.ljust(16)
  263. assert "fake stringqqqq" == str1.ljust(15, "q")
  264. str22 = _FakeString("ß")
  265. assert "" == str15.lower()
  266. assert "foobar" == str16.lower()
  267. assert "ß" == str22.lower()
  268. assert "" == str15.casefold()
  269. assert "foobar" == str16.casefold()
  270. assert "ss" == str22.casefold()
  271. str23 = _FakeString(" fake string ")
  272. assert "fake string" == str1.lstrip()
  273. assert "fake string " == str23.lstrip()
  274. assert "ke string" == str1.lstrip("abcdef")
  275. assert ("fa", "ke", " string") == str1.partition("ke")
  276. assert ("fake string", "", "") == str1.partition("asdf")
  277. str24 = _FakeString("boo foo moo")
  278. assert "real string" == str1.replace("fake", "real")
  279. assert "bu fu moo" == str24.replace("oo", "u", 2)
  280. assert 3 == str1.rfind("e")
  281. assert -1 == str1.rfind("z")
  282. assert 7 == str1.rfind("r", 7)
  283. assert -1 == str1.rfind("r", 8)
  284. assert 7 == str1.rfind("r", 5, 9)
  285. assert -1 == str1.rfind("r", 5, 7)
  286. assert 3 == str1.rindex("e")
  287. with pytest.raises(ValueError):
  288. str1.rindex("z")
  289. assert 7 == str1.rindex("r", 7)
  290. with pytest.raises(ValueError):
  291. str1.rindex("r", 8)
  292. assert 7 == str1.rindex("r", 5, 9)
  293. with pytest.raises(ValueError):
  294. str1.rindex("r", 5, 7)
  295. assert " fake string" == str1.rjust(15)
  296. assert " fake string" == str1.rjust(16)
  297. assert "qqqqfake string" == str1.rjust(15, "q")
  298. assert ("fa", "ke", " string") == str1.rpartition("ke")
  299. assert ("", "", "fake string") == str1.rpartition("asdf")
  300. str25 = _FakeString(" this is a sentence with whitespace ")
  301. actual = ["this", "is", "a", "sentence", "with", "whitespace"]
  302. assert actual == str25.rsplit()
  303. assert actual == str25.rsplit(None)
  304. actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with",
  305. "", "whitespace", ""]
  306. assert actual == str25.rsplit(" ")
  307. actual = [" this is a", "sentence", "with", "whitespace"]
  308. assert actual == str25.rsplit(None, 3)
  309. actual = [" this is a sentence with", "", "whitespace", ""]
  310. assert actual == str25.rsplit(" ", 3)
  311. actual = [" this is a", "sentence", "with", "whitespace"]
  312. assert actual == str25.rsplit(maxsplit=3)
  313. assert "fake string" == str1.rstrip()
  314. assert " fake string" == str23.rstrip()
  315. assert "fake stri" == str1.rstrip("ngr")
  316. actual = ["this", "is", "a", "sentence", "with", "whitespace"]
  317. assert actual == str25.split()
  318. assert actual == str25.split(None)
  319. actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with",
  320. "", "whitespace", ""]
  321. assert actual == str25.split(" ")
  322. actual = ["this", "is", "a", "sentence with whitespace "]
  323. assert actual == str25.split(None, 3)
  324. actual = ["", "", "", "this is a sentence with whitespace "]
  325. assert actual == str25.split(" ", 3)
  326. actual = ["this", "is", "a", "sentence with whitespace "]
  327. assert actual == str25.split(maxsplit=3)
  328. str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere")
  329. assert ["lines", "of", "text", "are", "presented", "here"] \
  330. == str26.splitlines()
  331. assert ["lines\n", "of\n", "text\r\n", "are\r\n", "presented\n", "here"] \
  332. == str26.splitlines(True)
  333. assert str1.startswith("fake") is True
  334. assert str1.startswith("faker") is False
  335. assert "fake string" == str1.strip()
  336. assert "fake string" == str23.strip()
  337. assert "ke stri" == str1.strip("abcdefngr")
  338. assert "fOObAR" == str16.swapcase()
  339. assert "Fake String" == str1.title()
  340. table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3",
  341. 111: "4", 117: "5"})
  342. table2 = StringMixIn.maketrans("aeiou", "12345")
  343. table3 = StringMixIn.maketrans("aeiou", "12345", "rts")
  344. assert "f1k2 str3ng" == str1.translate(table1)
  345. assert "f1k2 str3ng" == str1.translate(table2)
  346. assert "f1k2 3ng" == str1.translate(table3)
  347. assert "" == str15.upper()
  348. assert "FOOBAR" == str16.upper()
  349. assert "123" == str12.zfill(3)
  350. assert "000123" == str12.zfill(6)