From 309d4e8fd4e1e5e77d50ea96d11cdb19273f8c70 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 4 Sep 2012 19:05:42 -0400 Subject: [PATCH] Try a new highlighting engine. --- toolserver/copyvios/highlighter.py | 73 ++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 39 deletions(-) diff --git a/toolserver/copyvios/highlighter.py b/toolserver/copyvios/highlighter.py index 7010c52..79c3ec9 100644 --- a/toolserver/copyvios/highlighter.py +++ b/toolserver/copyvios/highlighter.py @@ -5,66 +5,59 @@ from re import sub, UNICODE from markupsafe import escape def highlight_delta(context, chain, delta): + degree = chain.degree - 1 + highlights = [False] * degree + block = [chain.START] * degree + for word in chain.text.split() + ([chain.END] * degree): + word = _strip_word(chain, word) + tblock = tuple(block) + if tblock in delta.chain and word in delta.chain[tblock]: + highlights[-1 * degree:] = [True] * degree + highlights.append(True) + else: + highlights.append(False) + block.pop(0) + block.append(word) + + i = degree + numwords = len(chain.text.split()) processed = [] - dchain = delta.chain - prev_prev = prev = chain.START - i = 0 - all_words = chain.text.split() - paragraphs = chain.text.split("\n") - for paragraph in paragraphs: - processed_words = [] - words = paragraph.split(" ") - for i, word in enumerate(words, i): - try: - next = _strip_word(all_words[i + 1]) - try: - next_next = _strip_word(all_words[i + 2]) - except IndexError: - next_next = chain.END - except IndexError: - next = next_next = chain.END - sword = _strip_word(word) - middle = (prev, sword) in dchain and next in dchain[(prev, sword)] - if middle: - before = after = True - else: - b_block = (prev_prev, prev) - a_block = (sword, next) - before = b_block in dchain and sword in dchain[b_block] - after = a_block in dchain and next_next in dchain[a_block] - is_first = i == 0 - is_last = i + 1 == len(all_words) - res = _highlight_word(word, before, after, is_first, is_last) - processed_words.append(res) - prev_prev = prev - prev = sword - processed.append(u" ".join(processed_words)) + for paragraph in chain.text.split("\n"): + words = [] + for i, word in enumerate(paragraph.split(), i): + before = highlights[i - 1] + after = highlights[i + 1] + first = i == degree + last = i - degree + 1 == numwords + words.append(_highlight_word(word, before, after, first, last)) + processed.append(u" ".join(words)) i += 1 + return u"

" + u"

\n

".join(processed) + u"

" -def _highlight_word(word, before, after, is_first, is_last): +def _highlight_word(word, before, after, first, last): if before and after: # Word is in the middle of a highlighted block, so don't change # anything unless this is the first word (force block to start) or the # last word (force block to end): res = unicode(escape(word)) - if is_first: + if first: res = u'' + res - if is_last: + if last: res += u'' elif before: # Word is the last in a highlighted block, so fade it out and then end # the block; force open a block before the word if this is the first # word: res = _fade_word(word, u"out") + u"" - if is_first: + if first: res = u'' + res elif after: # Word is the first in a highlighted block, so start the block and then # fade it in; force close the block after the word if this is the last # word: res = u'' + _fade_word(word, u"in") - if is_last: + if last: res += u"" else: # Word is completely outside of a highlighted block, so do nothing: @@ -84,5 +77,7 @@ def _fade_word(word, dir): base = u'{0}{1}' return base.format(before, after) -def _strip_word(word): +def _strip_word(chain, word): + if word == chain.START or word == chain.END: + return word return sub("[^\w\s-]", "", word.lower(), flags=UNICODE)