A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 

93 行
4.0 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import print_function, unicode_literals
  23. from os import listdir, path
  24. from mwparserfromhell.parser import tokens
  25. class _TestParseError(Exception):
  26. """Raised internally when a test could not be parsed."""
  27. pass
  28. class TokenizerTestCase(object):
  29. @classmethod
  30. def _build_test_method(cls, funcname, data):
  31. def inner(self):
  32. actual = self.tokenizer().tokenize(data["input"])
  33. self.assertEqual(actual, data["output"])
  34. inner.__name__ = funcname.encode("utf8")
  35. inner.__doc__ = data["label"]
  36. return inner
  37. @classmethod
  38. def _load_tests(cls, filename, text):
  39. tests = text.split("\n---\n")
  40. for test in tests:
  41. data = {"name": "", "label": "", "input": "", "output": []}
  42. try:
  43. for line in test.strip().splitlines():
  44. if line.startswith("name:"):
  45. data["name"] = line[len("name:"):].strip()
  46. elif line.startswith("label:"):
  47. data["label"] = line[len("label:"):].strip()
  48. elif line.startswith("input:"):
  49. raw = line[len("input:"):].strip()
  50. if raw[0] == '"' and raw[-1] == '"':
  51. raw = raw[1:-1]
  52. data["input"] = raw.decode("unicode_escape")
  53. elif line.startswith("output:"):
  54. raw = line[len("output:"):].strip()
  55. data["output"] = eval(raw, vars(tokens))
  56. except _TestParseError:
  57. if data["name"]:
  58. error = "Could not parse test {0} in {1}"
  59. print(error.format(data["name"], filename))
  60. else:
  61. print("Could not parse a test in {0}".format(filename))
  62. continue
  63. if not data["name"]:
  64. error = "A test in {0} was ignored because it lacked a name"
  65. print(error.format(filename))
  66. continue
  67. if not data["input"] or not data["output"]:
  68. error = "Test {0} in {1} was ignored because it lacked an input or an output"
  69. print(error.format(data["name"], filename))
  70. continue
  71. funcname = "test_" + filename + "_" + data["name"]
  72. meth = cls._build_test_method(funcname, data)
  73. setattr(cls, funcname, meth)
  74. @classmethod
  75. def build(cls):
  76. directory = path.join(path.dirname(__file__), "tokenizer")
  77. extension = ".test"
  78. for filename in listdir(directory):
  79. if not filename.endswith(extension):
  80. continue
  81. with open(path.join(directory, filename), "r") as fp:
  82. text = fp.read().decode("utf8")
  83. cls._load_tests(filename[:0-len(extension)], text)
  84. TokenizerTestCase.build()