From a07d12699a9e64637ba08249a78831c1a29eab09 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 26 Sep 2014 02:12:51 -0500 Subject: [PATCH] Finally fix #3; speed up highlighter with a deque. --- copyvios/highlighter.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/copyvios/highlighter.py b/copyvios/highlighter.py index a6abcd0..ae806de 100644 --- a/copyvios/highlighter.py +++ b/copyvios/highlighter.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +from collections import deque from re import sub, UNICODE from earwigbot.wiki.copyvios.markov import EMPTY_INTERSECTION @@ -10,7 +11,7 @@ __all__ = ["highlight_delta"] def highlight_delta(context, chain, delta): degree = chain.degree - 1 highlights = [False] * degree - block = [chain.START] * degree + block = deque([chain.START] * degree) if not delta: delta = EMPTY_INTERSECTION for word in chain.text.split() + ([chain.END] * degree): @@ -21,13 +22,13 @@ def highlight_delta(context, chain, delta): highlights.append(True) else: highlights.append(False) - block.pop(0) + block.popleft() block.append(word) i = degree numwords = len(chain.text.split()) result = [] - paragraphs = chain.text.split("\n") + paragraphs = deque(chain.text.split("\n")) while paragraphs: words = [] for i, word in enumerate(_get_next(paragraphs), i): @@ -45,14 +46,15 @@ def highlight_delta(context, chain, delta): return u"

".join(result) def _get_next(paragraphs): - paragraph = paragraphs.pop(0) - body = paragraph.split() - if len(body) <= 3: + body = [] + while paragraphs and not body: + body = paragraphs.popleft().split() + if body and len(body) <= 3: while paragraphs: next = paragraphs[0].split() if len(next) <= 3: body += next - paragraphs.pop(0) + paragraphs.popleft() else: break return body