A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

104 line
4.4 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import print_function, unicode_literals
  23. from os import listdir, path
  24. from mwparserfromhell.compat import py3k
  25. from mwparserfromhell.parser import tokens
  26. class _TestParseError(Exception):
  27. """Raised internally when a test could not be parsed."""
  28. pass
  29. class TokenizerTestCase(object):
  30. @classmethod
  31. def _build_test_method(cls, funcname, data):
  32. def inner(self):
  33. actual = self.tokenizer().tokenize(data["input"])
  34. self.assertEqual(actual, data["output"])
  35. if not py3k:
  36. inner.__name__ = funcname.encode("utf8")
  37. inner.__doc__ = data["label"]
  38. return inner
  39. @classmethod
  40. def _load_tests(cls, filename, text):
  41. counter = 1
  42. tests = text.split("\n---\n")
  43. for test in tests:
  44. data = {"name": "", "label": "", "input": "", "output": []}
  45. try:
  46. for line in test.strip().splitlines():
  47. if line.startswith("name:"):
  48. data["name"] = line[len("name:"):].strip()
  49. elif line.startswith("label:"):
  50. data["label"] = line[len("label:"):].strip()
  51. elif line.startswith("input:"):
  52. raw = line[len("input:"):].strip()
  53. if raw[0] == '"' and raw[-1] == '"':
  54. raw = raw[1:-1]
  55. raw = raw.encode("raw_unicode_escape")
  56. data["input"] = raw.decode("unicode_escape")
  57. elif line.startswith("output:"):
  58. raw = line[len("output:"):].strip()
  59. try:
  60. data["output"] = eval(raw, vars(tokens))
  61. except Exception as err:
  62. raise _TestParseError(err)
  63. except _TestParseError as err:
  64. if data["name"]:
  65. error = "Could not parse test '{0}' in '{1}':\n\t{2}"
  66. print(error.format(data["name"], filename, err))
  67. else:
  68. error = "Could not parse a test in '{0}':\n\t{1}"
  69. print(error.format(filename, err))
  70. continue
  71. if not data["name"]:
  72. error = "A test in '{0}' was ignored because it lacked a name"
  73. print(error.format(filename))
  74. continue
  75. if not data["input"] or not data["output"]:
  76. error = "Test '{0}'' in '{1}' was ignored because it lacked an input or an output"
  77. print(error.format(data["name"], filename))
  78. continue
  79. fname = "test_{0}{1}_{2}".format(filename, counter, data["name"])
  80. meth = cls._build_test_method(fname, data)
  81. setattr(cls, fname, meth)
  82. counter += 1
  83. @classmethod
  84. def build(cls):
  85. directory = path.join(path.dirname(__file__), "tokenizer")
  86. extension = ".test"
  87. for filename in listdir(directory):
  88. if not filename.endswith(extension):
  89. continue
  90. with open(path.join(directory, filename), "r") as fp:
  91. text = fp.read()
  92. if not py3k:
  93. text = text.decode("utf8")
  94. cls._load_tests(filename[:0-len(extension)], text)
  95. TokenizerTestCase.build()