A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 
 

76 lignes
2.5 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from . import tokens
  23. __all__ = ["Tokenizer"]
  24. class Tokenizer(object):
  25. START = object()
  26. END = object()
  27. def __init__(self):
  28. self._text = None
  29. self._head = 0
  30. self._stacks = []
  31. self._modifiers = []
  32. def _push(self):
  33. self._stacks.append([])
  34. def _pop(self):
  35. return self._stacks.pop()
  36. def _write(self, token, stack=None):
  37. if stack is None:
  38. stack = self._stacks[-1]
  39. if not stack:
  40. stack.append(token)
  41. return
  42. last = stack[-1]
  43. if isinstance(token, tokens.Text) and isinstance(last, tokens.Text):
  44. last.text += token.text
  45. else:
  46. stack.append(token)
  47. def _read(self, delta=0, wrap=False):
  48. index = self._head + delta
  49. if index < 0 and (not wrap or abs(index) > len(self._text)):
  50. return self.START
  51. if index >= len(self._text):
  52. return self.END
  53. return self._text[index]
  54. def _parse_until(self, stop):
  55. self._push()
  56. while True:
  57. if self._read() in (stop, self.END):
  58. return self._pop()
  59. else:
  60. self._write(tokens.Text(text=self._read()))
  61. self._head += 1
  62. def tokenize(self, text):
  63. self._text = list(text)
  64. return self._parse_until(stop=self.END)