Browse Source

Finally fix #3; speed up highlighter with a deque.

pull/24/head
Ben Kurtovic 9 years ago
parent
commit
a07d12699a
1 changed files with 9 additions and 7 deletions
  1. +9
    -7
      copyvios/highlighter.py

+ 9
- 7
copyvios/highlighter.py View File

@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-


from collections import deque
from re import sub, UNICODE from re import sub, UNICODE


from earwigbot.wiki.copyvios.markov import EMPTY_INTERSECTION from earwigbot.wiki.copyvios.markov import EMPTY_INTERSECTION
@@ -10,7 +11,7 @@ __all__ = ["highlight_delta"]
def highlight_delta(context, chain, delta): def highlight_delta(context, chain, delta):
degree = chain.degree - 1 degree = chain.degree - 1
highlights = [False] * degree highlights = [False] * degree
block = [chain.START] * degree
block = deque([chain.START] * degree)
if not delta: if not delta:
delta = EMPTY_INTERSECTION delta = EMPTY_INTERSECTION
for word in chain.text.split() + ([chain.END] * degree): for word in chain.text.split() + ([chain.END] * degree):
@@ -21,13 +22,13 @@ def highlight_delta(context, chain, delta):
highlights.append(True) highlights.append(True)
else: else:
highlights.append(False) highlights.append(False)
block.pop(0)
block.popleft()
block.append(word) block.append(word)


i = degree i = degree
numwords = len(chain.text.split()) numwords = len(chain.text.split())
result = [] result = []
paragraphs = chain.text.split("\n")
paragraphs = deque(chain.text.split("\n"))
while paragraphs: while paragraphs:
words = [] words = []
for i, word in enumerate(_get_next(paragraphs), i): for i, word in enumerate(_get_next(paragraphs), i):
@@ -45,14 +46,15 @@ def highlight_delta(context, chain, delta):
return u"<br /><br />".join(result) return u"<br /><br />".join(result)


def _get_next(paragraphs): def _get_next(paragraphs):
paragraph = paragraphs.pop(0)
body = paragraph.split()
if len(body) <= 3:
body = []
while paragraphs and not body:
body = paragraphs.popleft().split()
if body and len(body) <= 3:
while paragraphs: while paragraphs:
next = paragraphs[0].split() next = paragraphs[0].split()
if len(next) <= 3: if len(next) <= 3:
body += next body += next
paragraphs.pop(0)
paragraphs.popleft()
else: else:
break break
return body return body


Loading…
Cancel
Save