From 26d30f3d1a8c0caca854f7040d07555c6f794b0f Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 23 Dec 2012 19:18:09 -0500
Subject: [PATCH] Seems to be working for quoted attributes now.

---
 mwparserfromhell/parser/tokenizer.py | 40 ++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index d3cb40f..920d1cf 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -461,7 +461,7 @@ class Tokenizer(object):
         return padding
 
     def _actually_handle_chunk(self, chunks, is_new):
-        if is_new:
+        if is_new and not self._context & contexts.TAG_OPEN_ATTR_BODY_QUOTED:
             padding = 0
             while chunks:
                 if chunks[0] == "":
@@ -472,6 +472,15 @@ class Tokenizer(object):
             self._write(tokens.TagAttrStart(padding=" " * padding))
         if chunks:
             chunk = chunks.pop(0)
+            if self._context & contexts.TAG_OPEN_ATTR_BODY:
+                self._context ^= contexts.TAG_OPEN_ATTR_BODY
+                self._context |= contexts.TAG_OPEN_ATTR_NAME
+            if self._context & contexts.TAG_OPEN_ATTR_BODY_QUOTED:
+                if re.search(r'[^\\]"', chunk[:-1]):
+                    self._fail_route()
+                if re.search(r'[^\\]"$', chunk):
+                    self._write_text(chunk[:-1])
+                    return self._pop()  # Back to _handle_tag_attribute_body()
             self._write_text(chunk)
 
     def _handle_tag_chunk(self, text):
@@ -490,26 +499,35 @@ class Tokenizer(object):
             self._actually_handle_chunk(chunks, True)
         is_new = False
         while chunks:
-            self._actually_handle_chunk(chunks, is_new)
+            should_exit = self._actually_handle_chunk(chunks, is_new)
+            if should_exit:
+                return should_exit
             is_new = True
 
     def _handle_tag_attribute_body(self):
         self._context ^= contexts.TAG_OPEN_ATTR_NAME
         self._context |= contexts.TAG_OPEN_ATTR_BODY
-        self._write(TagAttrEquals())
+        self._write(tokens.TagAttrEquals())
         next = self._read(1)
         if next not in self.MARKERS and next.startswith('"'):
             if re.search(r'[^\\]"$', next[1:]):
                 if not re.search(r'[^\\]"', next[1:-1]):
-                    self._write(TagAttrQuote())
+                    self._write(tokens.TagAttrQuote())
                     self._write_text(next[1:-1])
                     self._head += 1
             else:
                 if not re.search(r'[^\\]"', next[1:]):
-                    self._push(contexts.TAG_OPEN_ATTR_BODY_QUOTED)
-                    self._write(TagAttrQuote())
-                    self._write_text(next[1:])
                     self._head += 1
+                    reset = self._head
+                    try:
+                        attr = self._parse(contexts.TAG_OPEN_ATTR_BODY_QUOTED)
+                    except BadRoute:
+                        self._head = reset
+                        self._write_text(next)
+                    else:
+                        self._write(tokens.TagAttrQuote())
+                        self._write_text(next[1:])
+                        self._write_all(attr)
 
     def _handle_tag_close_open(self):
         padding = self._actually_close_tag_opening()
@@ -543,7 +561,9 @@ class Tokenizer(object):
             this = self._read()
             if this not in self.MARKERS:
                 if self._context & contexts.TAG_OPEN:
-                    self._handle_tag_chunk(this)
+                    should_exit = self._handle_tag_chunk(this)
+                    if should_exit:
+                        return should_exit
                 else:
                     self._write_text(this)
                 self._head += 1
@@ -593,6 +613,8 @@ class Tokenizer(object):
             elif this == "=" and not self._global & contexts.GL_HEADING:
                 if self._read(-1) in ("\n", self.START):
                     self._parse_heading()
+                elif self._context & contexts.TAG_OPEN_ATTR_NAME:
+                    self._handle_tag_attribute_body()
                 else:
                     self._write_text("=")
             elif this == "=" and self._context & contexts.HEADING:
@@ -618,7 +640,7 @@ class Tokenizer(object):
                     self._handle_tag_close_open()
                 elif this == "/" and next == ">":
                     return self._handle_tag_selfclose()
-                elif this == "=":
+                elif this == "=" and self._context & contexts.TAG_OPEN_ATTR_NAME:
                     self._handle_tag_attribute_body()
             elif this == "<" and next == "/" and (
                                         self._context & contexts.TAG_BODY):