From 4acb885b9f32206c16d69a096ea12629ab13d6a0 Mon Sep 17 00:00:00 2001 From: David Ebbo Date: Thu, 4 Jan 2024 05:50:08 +0100 Subject: [PATCH] Increase MAX_DEPTH to 100 (#314) * Increase MAX_DEPTH to 100 * Update nested_templates_and_style_tags to cover new limit --- src/mwparserfromhell/parser/ctokenizer/tok_support.h | 2 +- src/mwparserfromhell/parser/tokenizer.py | 2 +- tests/tokenizer/integration.mwtest | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mwparserfromhell/parser/ctokenizer/tok_support.h b/src/mwparserfromhell/parser/ctokenizer/tok_support.h index e877f7c..e8a9c63 100644 --- a/src/mwparserfromhell/parser/ctokenizer/tok_support.h +++ b/src/mwparserfromhell/parser/ctokenizer/tok_support.h @@ -49,7 +49,7 @@ Py_UCS4 Tokenizer_read_backwards(Tokenizer *, Py_ssize_t); /* Macros */ -#define MAX_DEPTH 40 +#define MAX_DEPTH 100 #define Tokenizer_CAN_RECURSE(self) (self->depth < MAX_DEPTH) #define Tokenizer_IS_CURRENT_STACK(self, id) \ (self->topstack->ident.head == (id).head && \ diff --git a/src/mwparserfromhell/parser/tokenizer.py b/src/mwparserfromhell/parser/tokenizer.py index dcdfcee..85032cb 100644 --- a/src/mwparserfromhell/parser/tokenizer.py +++ b/src/mwparserfromhell/parser/tokenizer.py @@ -92,7 +92,7 @@ class Tokenizer: END, ] URISCHEME = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" - MAX_DEPTH = 40 + MAX_DEPTH = 100 regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE) tag_splitter = re.compile(r"([\s\"\'\\]+)") diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest index 8c41f8b..4c499de 100644 --- a/tests/tokenizer/integration.mwtest +++ b/tests/tokenizer/integration.mwtest @@ -357,8 +357,8 @@ output: [Text(text="[["), name: nested_templates_and_style_tags label: many nested templates and style tags, testing edge case behavior and error recovery near the recursion depth limit (see issue #224) -input: "{{a|'''}}{{b|1='''c''}}{{d|1='''e''}}{{f|1='''g''}}{{h|1='''i''}}{{j|1='''k''}}{{l|1='''m''}}{{n|1='''o''}}{{p|1='''q''}}{{r|1=''s'''}}{{t|1='''u''}}{{v|1='''w''x'''y'''}}\n{|\n|-\n|'''\n|}" -output: [TemplateOpen(), Text(text="a"), TemplateParamSeparator(), Text(text="'''"), TemplateClose(), TemplateOpen(), Text(text="b"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="c"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="d"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="e"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="f"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="g"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="h"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="i"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="j"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="k"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="m"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="n"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="o''}}"), TemplateOpen(), Text(text="p"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="q"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="r"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="''s'''"), TemplateClose(), TemplateOpen(), Text(text="t"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="u"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), Text(text="{{v|1="), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text="w''x"), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="y"), TagOpenClose(), Text(text="b"), TagCloseClose(), TemplateClose(), Text(text="\n"), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="'''\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] +input: "{{a|'''}}{{b|1='''c''}}{{d|1='''e''}}{{f|1='''g''}}{{h|1='''i''}}{{j|1='''k''}}{{l|1='''m''}}{{n|1='''o''}}{{p|1='''q''}}{{r|1=''s'''}}{{t|1='''u''}}{{v|1='''w''x'''y'''}}{{A|'''}}{{B|1='''C''}}{{D|1='''E''}}{{F|1='''G''}}{{H|1='''I''}}{{J|1='''K''}}{{L|1='''M''}}{{N|1='''O''}}{{P|1='''Q''}}{{R|1=''S'''}}{{T|1='''U''}}{{V|1='''W''X'''Y'''}}{{aa|'''}}{{bb|1='''cc''}}{{dd|1='''ee''}}\n{|\n|-\n|'''\n|}" +output: [TemplateOpen(), Text(text='a'), TemplateParamSeparator(), Text(text="'''"), TemplateClose(), TemplateOpen(), Text(text='b'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='c'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='d'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='e'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='f'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='g'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='h'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='i'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='j'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='k'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='l'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='m'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='n'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='o'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='p'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='q'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='r'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="''s'''"), TemplateClose(), TemplateOpen(), Text(text='t'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='u'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='v'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), TagOpenOpen(wiki_markup="'''"), Text(text='b'), TagCloseOpen(), Text(text="w''x"), TagOpenClose(), Text(text='b'), TagCloseClose(), Text(text="y'''"), TemplateClose(), TemplateOpen(), Text(text='A'), TemplateParamSeparator(), Text(text="'''"), TemplateClose(), TemplateOpen(), Text(text='B'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='C'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='D'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='E'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='F'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='G'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='H'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='I'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='J'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='K'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='L'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='M'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='N'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='O'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='P'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='Q'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='R'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="''S'''"), TemplateClose(), TemplateOpen(), Text(text='T'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='U'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='V'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='W'), TagOpenClose(), Text(text='i'), TagCloseClose(), Text(text='X'), TagOpenOpen(wiki_markup="'''"), Text(text='b'), TagCloseOpen(), Text(text='Y'), TagOpenClose(), Text(text='b'), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text='aa'), TemplateParamSeparator(), TagOpenOpen(wiki_markup="'''"), Text(text='b'), TagCloseOpen(), Text(text='}}{{bb|1='), TagOpenClose(), Text(text='b'), TagCloseClose(), Text(text="cc''"), TemplateClose(), TemplateOpen(), Text(text='dd'), TemplateParamSeparator(), Text(text='1'), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text='i'), TagCloseOpen(), Text(text='ee'), TagOpenClose(), Text(text='i'), TagCloseClose(), TemplateClose(), Text(text='\n'), TagOpenOpen(wiki_markup='{|'), Text(text='table'), TagCloseOpen(padding='\n'), TagOpenOpen(wiki_markup='|-'), Text(text='tr'), TagCloseOpen(padding='\n'), TagOpenOpen(wiki_markup='|'), Text(text='td'), TagCloseOpen(padding=''), Text(text="'''\n"), TagOpenClose(wiki_markup=''), Text(text='td'), TagCloseClose(), TagOpenClose(wiki_markup=''), Text(text='tr'), TagCloseClose(), TagOpenClose(wiki_markup='|}'), Text(text='table'), TagCloseClose()] ---