diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py index dad0214..ff77b18 100644 --- a/mwparserfromhell/nodes/comment.py +++ b/mwparserfromhell/nodes/comment.py @@ -41,6 +41,6 @@ class Comment(Node): """The hidden text contained between ````.""" return self._contents - @value.setter + @contents.setter def contents(self, value): self._contents = str(value) diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index e597507..e03d94f 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -126,7 +126,7 @@ class Builder(object): self._write(self._handle_token(token)) def _handle_entity(self): - """Handle a case where a HTML entity is at the head of the tokens.""" + """Handle a case where an HTML entity is at the head of the tokens.""" token = self._tokens.pop() if isinstance(token, tokens.HTMLEntityNumeric): token = self._tokens.pop() diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index 5969239..e1e96e1 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -35,49 +35,53 @@ will cover ``BAR == 0b10`` and ``BAZ == 0b01``). Local (stack-specific) contexts: -* :py:const:`TEMPLATE` (``0b00000000111``) +* :py:const:`TEMPLATE` - * :py:const:`TEMPLATE_NAME` (``0b00000000001``) - * :py:const:`TEMPLATE_PARAM_KEY` (``0b00000000010``) - * :py:const:`TEMPLATE_PARAM_VALUE` (``0b00000000100``) + * :py:const:`TEMPLATE_NAME` + * :py:const:`TEMPLATE_PARAM_KEY` + * :py:const:`TEMPLATE_PARAM_VALUE` -* :py:const:`ARGUMENT` (``0b00000011000``) +* :py:const:`ARGUMENT` - * :py:const:`ARGUMENT_NAME` (``0b00000001000``) - * :py:const:`ARGUMENT_DEFAULT` (``0b00000010000``) + * :py:const:`ARGUMENT_NAME` + * :py:const:`ARGUMENT_DEFAULT` -* :py:const:`HEADING` (``0b111111000``) +* :py:const:`HEADING` - * :py:const:`HEADING_LEVEL_1` (``0b00000100000``) - * :py:const:`HEADING_LEVEL_2` (``0b00001000000``) - * :py:const:`HEADING_LEVEL_3` (``0b00010000000``) - * :py:const:`HEADING_LEVEL_4` (``0b00100000000``) - * :py:const:`HEADING_LEVEL_5` (``0b01000000000``) - * :py:const:`HEADING_LEVEL_6` (``0b10000000000``) + * :py:const:`HEADING_LEVEL_1` + * :py:const:`HEADING_LEVEL_2` + * :py:const:`HEADING_LEVEL_3` + * :py:const:`HEADING_LEVEL_4` + * :py:const:`HEADING_LEVEL_5` + * :py:const:`HEADING_LEVEL_6` + +* :py:const:`COMMENT` Global contexts: -* :py:const:`GL_HEADING` (``0b1``) +* :py:const:`GL_HEADING` """ # Local contexts: -TEMPLATE = 0b00000000111 -TEMPLATE_NAME = 0b00000000001 -TEMPLATE_PARAM_KEY = 0b00000000010 -TEMPLATE_PARAM_VALUE = 0b00000000100 - -ARGUMENT = 0b00000011000 -ARGUMENT_NAME = 0b00000001000 -ARGUMENT_DEFAULT = 0b00000010000 - -HEADING = 0b11111100000 -HEADING_LEVEL_1 = 0b00000100000 -HEADING_LEVEL_2 = 0b00001000000 -HEADING_LEVEL_3 = 0b00010000000 -HEADING_LEVEL_4 = 0b00100000000 -HEADING_LEVEL_5 = 0b01000000000 -HEADING_LEVEL_6 = 0b10000000000 +TEMPLATE = 0b000000000111 +TEMPLATE_NAME = 0b000000000001 +TEMPLATE_PARAM_KEY = 0b000000000010 +TEMPLATE_PARAM_VALUE = 0b000000000100 + +ARGUMENT = 0b000000011000 +ARGUMENT_NAME = 0b000000001000 +ARGUMENT_DEFAULT = 0b000000010000 + +HEADING = 0b011111100000 +HEADING_LEVEL_1 = 0b000000100000 +HEADING_LEVEL_2 = 0b000001000000 +HEADING_LEVEL_3 = 0b000010000000 +HEADING_LEVEL_4 = 0b000100000000 +HEADING_LEVEL_5 = 0b001000000000 +HEADING_LEVEL_6 = 0b010000000000 + +COMMENT = 0b100000000000 # Global contexts: diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 9e6ae11..e51a081 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -41,8 +41,8 @@ class Tokenizer(object): START = object() END = object() MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", - "/", "-", "\n", END] - regex = re.compile(r"([{}\[\]<>|=&#*;:/\-\n])", flags=re.IGNORECASE) + "/", "-", "!", "\n", END] + regex = re.compile(r"([{}\[\]<>|=&#*;:/\-!\n])", flags=re.IGNORECASE) def __init__(self): self._text = None @@ -327,7 +327,7 @@ class Tokenizer(object): return self._pop(), after_level def _really_parse_entity(self): - """Actually parse a HTML entity and ensure that it is valid.""" + """Actually parse an HTML entity and ensure that it is valid.""" self._write(tokens.HTMLEntityStart()) self._head += 1 @@ -369,7 +369,7 @@ class Tokenizer(object): self._write(tokens.HTMLEntityEnd()) def _parse_entity(self): - """Parse a HTML entity at the head of the wikicode string.""" + """Parse an HTML entity at the head of the wikicode string.""" reset = self._head self._push() try: @@ -380,6 +380,21 @@ class Tokenizer(object): else: self._write_all(self._pop()) + def _parse_comment(self): + """Parse an HTML comment at the head of the wikicode string.""" + self._head += 4 + reset = self._head - 1 + try: + comment = self._parse(contexts.COMMENT) + except BadRoute: + self._head = reset + self._write_text("