diff --git a/toolserver/copyvios/highlighter.py b/toolserver/copyvios/highlighter.py
index 67efb7f..ba4df87 100644
--- a/toolserver/copyvios/highlighter.py
+++ b/toolserver/copyvios/highlighter.py
@@ -6,6 +6,7 @@ from markupsafe import escape
def highlight_delta(context, chain, delta):
processed = []
+ dchain = delta.chain
prev_prev = prev = chain.START
i = 0
all_words = chain.text.split()
@@ -15,14 +16,22 @@ def highlight_delta(context, chain, delta):
words = paragraph.split(" ")
for i, word in enumerate(words, i):
try:
- next = _strip_word(all_words[i+1])
+ next = _strip_word(all_words[i + 1])
+ try:
+ next_next = _strip_word(all_words[i + 2])
+ except IndexError:
+ next_next = chain.END
except IndexError:
- next = chain.END
+ next = next_next = chain.END
sword = _strip_word(word)
- block = (prev_prev, prev) # Block for before
- alock = (prev, sword) # Block for after
- before = [block in delta.chain and sword in delta.chain[block]]
- after = [alock in delta.chain and next in delta.chain[alock]]
+ middle = (prev, sword) in dchain and next in dchain[(prev, sword)]
+ if middle:
+ before = after = True
+ else:
+ b_block = (prev_prev, prev)
+ a_block = (sword, next)
+ before = b_block in dchain and sword in dchain[b_block]
+ after = a_block in dchain and next_next in dchain[a_block]
is_first = i == 0
is_last = i + 1 == len(all_words)
res = _highlight_word(word, before, after, is_first, is_last)
@@ -38,7 +47,7 @@ def _highlight_word(word, before, after, is_first, is_last):
# Word is in the middle of a highlighted block, so don't change
# anything unless this is the first word (force block to start) or the
# last word (force block to end):
- res = escape(word)
+ res = unicode(escape(word))
if is_first:
res = u'' + res
if is_last:
@@ -59,18 +68,21 @@ def _highlight_word(word, before, after, is_first, is_last):
res += u""
else:
# Word is completely outside of a highlighted block, so do nothing:
- res = escape(word)
+ res = unicode(escape(word))
return res
def _fade_word(word, dir):
if len(word) <= 4:
- return u'{1}'.format(dir, escape(word))
+ word = unicode(escape(word))
+ return u'{1}'.format(dir, word)
if dir == u"out":
+ before, after = unicode(escape(word[:-4])), unicode(escape(word[-4:]))
base = u'{0}{1}'
- return base.format(escape(word[:-4]), escape(word[-4:]))
+ return base.format(before, after)
else:
+ before, after = unicode(escape(word[:4])), unicode(escape(word[4:]))
base = u'{0}{1}'
- return base.format(escape(word[:4]), escape(word[4:]))
+ return base.format(before, after)
def _strip_word(word):
return sub("[^\w\s-]", "", word.lower(), flags=UNICODE)