Bladeren bron

Try a new highlighting engine.

pull/24/head
Ben Kurtovic 12 jaren geleden
bovenliggende
commit
309d4e8fd4
1 gewijzigde bestanden met toevoegingen van 34 en 39 verwijderingen
  1. +34
    -39
      toolserver/copyvios/highlighter.py

+ 34
- 39
toolserver/copyvios/highlighter.py Bestand weergeven

@@ -5,66 +5,59 @@ from re import sub, UNICODE
from markupsafe import escape

def highlight_delta(context, chain, delta):
degree = chain.degree - 1
highlights = [False] * degree
block = [chain.START] * degree
for word in chain.text.split() + ([chain.END] * degree):
word = _strip_word(chain, word)
tblock = tuple(block)
if tblock in delta.chain and word in delta.chain[tblock]:
highlights[-1 * degree:] = [True] * degree
highlights.append(True)
else:
highlights.append(False)
block.pop(0)
block.append(word)

i = degree
numwords = len(chain.text.split())
processed = []
dchain = delta.chain
prev_prev = prev = chain.START
i = 0
all_words = chain.text.split()
paragraphs = chain.text.split("\n")
for paragraph in paragraphs:
processed_words = []
words = paragraph.split(" ")
for i, word in enumerate(words, i):
try:
next = _strip_word(all_words[i + 1])
try:
next_next = _strip_word(all_words[i + 2])
except IndexError:
next_next = chain.END
except IndexError:
next = next_next = chain.END
sword = _strip_word(word)
middle = (prev, sword) in dchain and next in dchain[(prev, sword)]
if middle:
before = after = True
else:
b_block = (prev_prev, prev)
a_block = (sword, next)
before = b_block in dchain and sword in dchain[b_block]
after = a_block in dchain and next_next in dchain[a_block]
is_first = i == 0
is_last = i + 1 == len(all_words)
res = _highlight_word(word, before, after, is_first, is_last)
processed_words.append(res)
prev_prev = prev
prev = sword
processed.append(u" ".join(processed_words))
for paragraph in chain.text.split("\n"):
words = []
for i, word in enumerate(paragraph.split(), i):
before = highlights[i - 1]
after = highlights[i + 1]
first = i == degree
last = i - degree + 1 == numwords
words.append(_highlight_word(word, before, after, first, last))
processed.append(u" ".join(words))
i += 1

return u"<p>" + u"</p>\n<p>".join(processed) + u"</p>"

def _highlight_word(word, before, after, is_first, is_last):
def _highlight_word(word, before, after, first, last):
if before and after:
# Word is in the middle of a highlighted block, so don't change
# anything unless this is the first word (force block to start) or the
# last word (force block to end):
res = unicode(escape(word))
if is_first:
if first:
res = u'<span class="cv-hl">' + res
if is_last:
if last:
res += u'</span>'
elif before:
# Word is the last in a highlighted block, so fade it out and then end
# the block; force open a block before the word if this is the first
# word:
res = _fade_word(word, u"out") + u"</span>"
if is_first:
if first:
res = u'<span class="cv-hl">' + res
elif after:
# Word is the first in a highlighted block, so start the block and then
# fade it in; force close the block after the word if this is the last
# word:
res = u'<span class="cv-hl">' + _fade_word(word, u"in")
if is_last:
if last:
res += u"</span>"
else:
# Word is completely outside of a highlighted block, so do nothing:
@@ -84,5 +77,7 @@ def _fade_word(word, dir):
base = u'<span class="cv-hl-in">{0}</span>{1}'
return base.format(before, after)

def _strip_word(word):
def _strip_word(chain, word):
if word == chain.START or word == chain.END:
return word
return sub("[^\w\s-]", "", word.lower(), flags=UNICODE)

Laden…
Annuleren
Opslaan