A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.
 
 
 
 

99 rader
4.2 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import print_function, unicode_literals
  23. from os import listdir, path
  24. from mwparserfromhell.compat import py3k
  25. from mwparserfromhell.parser import tokens
  26. class _TestParseError(Exception):
  27. """Raised internally when a test could not be parsed."""
  28. pass
  29. class TokenizerTestCase(object):
  30. @classmethod
  31. def _build_test_method(cls, funcname, data):
  32. def inner(self):
  33. actual = self.tokenizer().tokenize(data["input"])
  34. self.assertEqual(actual, data["output"])
  35. if not py3k:
  36. inner.__name__ = funcname.encode("utf8")
  37. inner.__doc__ = data["label"]
  38. return inner
  39. @classmethod
  40. def _load_tests(cls, filename, text):
  41. counter = 1
  42. tests = text.split("\n---\n")
  43. for test in tests:
  44. data = {"name": "", "label": "", "input": "", "output": []}
  45. try:
  46. for line in test.strip().splitlines():
  47. if line.startswith("name:"):
  48. data["name"] = line[len("name:"):].strip()
  49. elif line.startswith("label:"):
  50. data["label"] = line[len("label:"):].strip()
  51. elif line.startswith("input:"):
  52. raw = line[len("input:"):].strip()
  53. if raw[0] == '"' and raw[-1] == '"':
  54. raw = raw[1:-1]
  55. data["input"] = raw.encode("raw_unicode_escape").decode("unicode_escape")
  56. elif line.startswith("output:"):
  57. raw = line[len("output:"):].strip()
  58. data["output"] = eval(raw, vars(tokens))
  59. except _TestParseError:
  60. if data["name"]:
  61. error = "Could not parse test {0} in {1}"
  62. print(error.format(data["name"], filename))
  63. else:
  64. print("Could not parse a test in {0}".format(filename))
  65. continue
  66. if not data["name"]:
  67. error = "A test in {0} was ignored because it lacked a name"
  68. print(error.format(filename))
  69. continue
  70. if not data["input"] or not data["output"]:
  71. error = "Test {0} in {1} was ignored because it lacked an input or an output"
  72. print(error.format(data["name"], filename))
  73. continue
  74. fname = "test_{0}{1}_{2}".format(filename, counter, data["name"])
  75. meth = cls._build_test_method(fname, data)
  76. setattr(cls, fname, meth)
  77. counter += 1
  78. @classmethod
  79. def build(cls):
  80. directory = path.join(path.dirname(__file__), "tokenizer")
  81. extension = ".test"
  82. for filename in listdir(directory):
  83. if not filename.endswith(extension):
  84. continue
  85. with open(path.join(directory, filename), "r") as fp:
  86. text = fp.read()
  87. if not py3k:
  88. text = text.decode("utf8")
  89. cls._load_tests(filename[:0-len(extension)], text)
  90. TokenizerTestCase.build()