From 0be18bc5b8db231a33e083f298ac170743af7fe9 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 29 Sep 2012 12:40:59 -0400
Subject: [PATCH] Fix Py_UNICODE comparisons.

---
 mwparserfromhell/parser/tokenizer.c | 58 ++++++++++++++++++-------------------
 mwparserfromhell/parser/tokenizer.h | 10 +++----
 2 files changed, 33 insertions(+), 35 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 7ba7472..9f7e37d 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -373,7 +373,7 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
     self->head += 2;
     unsigned int braces = 2, i;
 
-    while (Tokenizer_READ(self, 0) == PU "{") {
+    while (*Tokenizer_READ(self, 0) == *"{") {
         self->head++;
         braces++;
     }
@@ -965,7 +965,7 @@ Tokenizer_parse_heading(Tokenizer* self)
     PyObject* text;
     int i;
 
-    while (Tokenizer_READ(self, 0) == PU "=") {
+    while (*Tokenizer_READ(self, 0) == *"=") {
         best++;
         self->head++;
     }
@@ -1088,7 +1088,7 @@ Tokenizer_handle_heading_end(Tokenizer* self)
     PyObject* text;
     int i;
 
-    while (Tokenizer_READ(self, 0) == PU "=") {
+    while (*Tokenizer_READ(self, 0) == *"=") {
         best++;
         self->head++;
     }
@@ -1268,7 +1268,7 @@ static PyObject*
 Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
 {
     PyObject *this;
-    Py_UNICODE *this_data, *next, *next_next, *last;
+    Py_UNICODE this_data, next, next_next, last;
     Py_ssize_t this_context;
     Py_ssize_t fail_contexts = LC_TEMPLATE | LC_ARGUMENT | LC_HEADING | LC_COMMENT;
     int is_marker, i;
@@ -1277,11 +1277,11 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
 
     while (1) {
         this = Tokenizer_read(self, 0);
-        this_data = PyUnicode_AS_UNICODE(this);
+        this_data = *PyUnicode_AS_UNICODE(this);
 
         is_marker = 0;
         for (i = 0; i < NUM_MARKERS; i++) {
-            if (MARKERS[i] == this_data) {
+            if (*MARKERS[i] == this_data) {
                 is_marker = 1;
                 break;
             }
@@ -1295,45 +1295,45 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
 
         this_context = Tokenizer_CONTEXT_VAL(self);
 
-        if (this == EMPTY) {
+        if (this_data == *"") {
             if (this_context & fail_contexts) {
                 Tokenizer_fail_route(self);
             }
             return Tokenizer_pop(self);
         }
 
-        next = Tokenizer_READ(self, 1);
+        next = *Tokenizer_READ(self, 1);
 
         if (this_context & LC_COMMENT) {
-            if (this_data == next && next == PU "-") {
-                if (Tokenizer_READ(self, 2) == PU ">") {
+            if (this_data == next && next == *"-") {
+                if (*Tokenizer_READ(self, 2) == *">") {
                     return Tokenizer_pop(self);
                 }
             }
             Tokenizer_write_text(self, this);
         }
-        else if (this_data == next && next == PU "{") {
+        else if (this_data == next && next == *"{") {
             Tokenizer_parse_template_or_argument(self);
         }
-        else if (this_data == PU "|" && this_context & LC_TEMPLATE) {
+        else if (this_data == *"|" && this_context & LC_TEMPLATE) {
             Tokenizer_handle_template_param(self);
         }
-        else if (this_data == PU "=" && this_context & LC_TEMPLATE_PARAM_KEY) {
+        else if (this_data == *"=" && this_context & LC_TEMPLATE_PARAM_KEY) {
             Tokenizer_handle_template_param_value(self);
         }
-        else if (this_data == next && next == PU "}" && this_context & LC_TEMPLATE) {
+        else if (this_data == next && next == *"}" && this_context & LC_TEMPLATE) {
             Tokenizer_handle_template_end(self);
         }
-        else if (this_data == PU "|" && this_context & LC_ARGUMENT_NAME) {
+        else if (this_data == *"|" && this_context & LC_ARGUMENT_NAME) {
             Tokenizer_handle_argument_separator(self);
         }
-        else if (this_data == next && next == PU "}" && this_context & LC_ARGUMENT) {
-            if (Tokenizer_READ(self, 2) == PU "}") {
+        else if (this_data == next && next == *"}" && this_context & LC_ARGUMENT) {
+            if (*Tokenizer_READ(self, 2) == *"}") {
                 return Tokenizer_handle_argument_end(self);
             }
             Tokenizer_write_text(self, this);
         }
-        else if (this_data == next && next == PU "[") {
+        else if (this_data == next && next == *"[") {
             if (!(this_context & LC_WIKILINK_TITLE)) {
                 Tokenizer_parse_wikilink(self);
             }
@@ -1341,33 +1341,33 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
                 Tokenizer_write_text(self, this);
             }
         }
-        else if (this_data == PU "|" && this_context & LC_WIKILINK_TITLE) {
+        else if (this_data == *"|" && this_context & LC_WIKILINK_TITLE) {
             Tokenizer_handle_wikilink_separator(self);
         }
-        else if (this_data == next && next == PU "]" && this_context & LC_WIKILINK) {
+        else if (this_data == next && next == *"]" && this_context & LC_WIKILINK) {
             return Tokenizer_handle_wikilink_end(self);
         }
-        else if (this_data == PU "=" && !(self->global & GL_HEADING)) {
-            last = PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1));
-            if (last == PU "\n" || last == PU "") {
+        else if (this_data == *"=" && !(self->global & GL_HEADING)) {
+            last = *PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1));
+            if (last == *"\n" || last == *"") {
                 Tokenizer_parse_heading(self);
             }
             else {
                 Tokenizer_write_text(self, this);
             }
         }
-        else if (this_data == PU "=" && this_context & LC_HEADING) {
+        else if (this_data == *"=" && this_context & LC_HEADING) {
             return (PyObject*) Tokenizer_handle_heading_end(self);
         }
-        else if (this_data == PU "\n" && this_context & LC_HEADING) {
+        else if (this_data == *"\n" && this_context & LC_HEADING) {
             Tokenizer_fail_route(self);
         }
-        else if (this_data == PU "&") {
+        else if (this_data == *"&") {
             Tokenizer_parse_entity(self);
         }
-        else if (this_data == PU "<" && next == PU "!") {
-            next_next = Tokenizer_READ(self, 2);
-            if (next_next == Tokenizer_READ(self, 3) && next_next == PU "-") {
+        else if (this_data == *"<" && next == *"!") {
+            next_next = *Tokenizer_READ(self, 2);
+            if (next_next == *Tokenizer_READ(self, 3) && next_next == *"-") {
                 Tokenizer_parse_comment(self);
             }
             else {
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 7ba9c40..d55e9d1 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -30,12 +30,10 @@ SOFTWARE.
 #include <setjmp.h>
 #include <structmember.h>
 
-#define PU (Py_UNICODE*)
-
-static const Py_UNICODE* MARKERS[] = {
-    PU "{", PU "}", PU "[", PU "]", PU "<", PU ">", PU "|", PU "=", PU "&",
-    PU "#", PU "*", PU ";", PU ":", PU "/", PU "-", PU "!", PU "\n", PU ""};
-static const int NUM_MARKERS = 17;
+static const char* MARKERS[] = {
+    "{",  "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-",
+    "!", "\n", ""};
+static const int NUM_MARKERS = 18;
 
 static jmp_buf exception_env;
 static const int BAD_ROUTE = 1;