From 23d97583bf5d72042969a8d7736616edf6cf7c3d Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 3 Oct 2015 15:11:03 -0500 Subject: [PATCH] Fix regression in C tokenizer (#125) --- mwparserfromhell/parser/ctokenizer/definitions.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mwparserfromhell/parser/ctokenizer/definitions.c b/mwparserfromhell/parser/ctokenizer/definitions.c index 38ed649..db18d47 100644 --- a/mwparserfromhell/parser/ctokenizer/definitions.c +++ b/mwparserfromhell/parser/ctokenizer/definitions.c @@ -65,8 +65,11 @@ static PyObject* unicode_to_lcase_ascii(PyObject *input, const char **string) return NULL; bytes = PyUnicode_AsASCIIString(lower); Py_DECREF(lower); - if (!bytes) + if (!bytes) { + if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) + PyErr_Clear(); return NULL; + } *string = PyBytes_AS_STRING(bytes); return bytes; }