Try a new highlighting engine.

12 jaren geleden · 309d4e8fd4
--- a/toolserver/copyvios/highlighter.py
+++ b/toolserver/copyvios/highlighter.py
@@ -5,66 +5,59 @@ from re import sub, UNICODE
 from markupsafe import escape

 def highlight_delta(context, chain, delta):
    degree = chain.degree - 1
    highlights = [False] * degree
    block = [chain.START] * degree
    for word in chain.text.split() + ([chain.END] * degree):
        word = _strip_word(chain, word)
        tblock = tuple(block)
        if tblock in delta.chain and word in delta.chain[tblock]:
            highlights[-1 * degree:] = [True] * degree
            highlights.append(True)
        else:
            highlights.append(False)
        block.pop(0)
        block.append(word)

    i = degree
    numwords = len(chain.text.split())
    processed = []
    dchain = delta.chain
    prev_prev = prev = chain.START
    i = 0
    all_words = chain.text.split()
    paragraphs = chain.text.split("\n")
    for paragraph in paragraphs:
        processed_words = []
        words = paragraph.split(" ")
        for i, word in enumerate(words, i):
            try:
                next = _strip_word(all_words[i + 1])
                try:
                    next_next = _strip_word(all_words[i + 2])
                except IndexError:
                    next_next = chain.END
            except IndexError:
                next = next_next = chain.END
            sword = _strip_word(word)
            middle = (prev, sword) in dchain and next in dchain[(prev, sword)]
            if middle:
                before = after = True
            else:
                b_block = (prev_prev, prev)
                a_block = (sword, next)
                before = b_block in dchain and sword in dchain[b_block]
                after = a_block in dchain and next_next in dchain[a_block]
            is_first = i == 0
            is_last = i + 1 == len(all_words)
            res = _highlight_word(word, before, after, is_first, is_last)
            processed_words.append(res)
            prev_prev = prev
            prev = sword
        processed.append(u" ".join(processed_words))
    for paragraph in chain.text.split("\n"):
        words = []
        for i, word in enumerate(paragraph.split(), i):
            before = highlights[i - 1]
            after = highlights[i + 1]
            first = i == degree
            last = i - degree + 1 == numwords
            words.append(_highlight_word(word, before, after, first, last))
        processed.append(u" ".join(words))
        i += 1

    return u"<p>" + u"</p>\n<p>".join(processed) + u"</p>"

 def _highlight_word(word, before, after, is_first, is_last):
 def _highlight_word(word, before, after, first, last):
    if before and after:
        # Word is in the middle of a highlighted block, so don't change
        # anything unless this is the first word (force block to start) or the
        # last word (force block to end):
        res = unicode(escape(word))
        if is_first:
        if first:
            res = u'<span class="cv-hl">' + res
        if is_last:
        if last:
            res += u'</span>'
    elif before:
        # Word is the last in a highlighted block, so fade it out and then end
        # the block; force open a block before the word if this is the first
        # word:
        res = _fade_word(word, u"out") + u"</span>"
        if is_first:
        if first:
            res = u'<span class="cv-hl">' + res
    elif after:
        # Word is the first in a highlighted block, so start the block and then
        # fade it in; force close the block after the word if this is the last
        # word:
        res = u'<span class="cv-hl">' + _fade_word(word, u"in")
        if is_last:
        if last:
            res += u"</span>"
    else:
        # Word is completely outside of a highlighted block, so do nothing:
@@ -84,5 +77,7 @@ def _fade_word(word, dir):
        base = u'<span class="cv-hl-in">{0}</span>{1}'
        return base.format(before, after)

 def _strip_word(word):
 def _strip_word(chain, word):
    if word == chain.START or word == chain.END:
        return word
    return sub("[^\w\s-]", "", word.lower(), flags=UNICODE)