A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

94 regels
3.4 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. import re
  23. from . import tokens
  24. from .build_stack import BuildStack
  25. from ..nodes import Template, Text
  26. from ..nodes.extras import Parameter
  27. from ..smart_list import SmartList
  28. from ..wikicode import Wikicode
  29. __all__ = ["Builder"]
  30. class Builder(object):
  31. def __init__(self):
  32. self._tokens = []
  33. self._stack = BuildStack()
  34. def _pop(self):
  35. return Wikicode(SmartList(stack.pop()))
  36. def _handle_parameter(self, key):
  37. showkey = False
  38. self._stack.push()
  39. while self._tokens:
  40. token = self._tokens.pop(0)
  41. if isinstance(token, tokens.TEMPLATE_PARAM_EQUALS):
  42. key = self._pop()
  43. showkey = True
  44. self._stack.push()
  45. elif isinstance(token, (tokens.TEMPLATE_PARAM_SEPARATOR,
  46. tokens.TEMPLATE_CLOSE)):
  47. self._tokens.insert(0, token)
  48. value = self._pop()
  49. return Parameter(key, value, showkey)
  50. else:
  51. self._stack.write(self._handle_token())
  52. def _handle_template(self):
  53. params = []
  54. int_keys = set()
  55. int_key_range = {1}
  56. self._stack.push()
  57. while self._tokens:
  58. token = self._tokens.pop(0)
  59. if isinstance(token, tokens.TEMPLATE_PARAM_SEPARATOR):
  60. if not params:
  61. name = self._pop()
  62. param = self._handle_parameter(min(int_key_range - int_keys))
  63. if re.match(r"[1-9][0-9]*$", param.key.strip()):
  64. int_keys.add(int(param.key))
  65. int_key_range.add(len(int_keys) + 1)
  66. params.append(param)
  67. elif isinstance(token, tokens.TEMPLATE_CLOSE):
  68. if not params:
  69. name = self._pop()
  70. return Template(name, params)
  71. else:
  72. self._stack.write(self._handle_token())
  73. def _handle_token(self):
  74. token = self._tokens.pop(0)
  75. if isinstance(token, tokens.TEXT):
  76. return Text(token.text)
  77. elif isinstance(token, tokens.TEMPLATE_OPEN):
  78. return self._handle_template()
  79. def build(self, tokens):
  80. self._tokens = tokens
  81. self._stack.push()
  82. while self._tokens:
  83. self._stack.write(self._handle_token())
  84. return self._pop()