Browse Source

Fix highligher for trigrams; fix HTML escaping.

pull/24/head
Ben Kurtovic 12 years ago
parent
commit
912e0dcce3
1 changed files with 23 additions and 11 deletions
  1. +23
    -11
      toolserver/copyvios/highlighter.py

+ 23
- 11
toolserver/copyvios/highlighter.py View File

@@ -6,6 +6,7 @@ from markupsafe import escape

def highlight_delta(context, chain, delta):
processed = []
dchain = delta.chain
prev_prev = prev = chain.START
i = 0
all_words = chain.text.split()
@@ -15,14 +16,22 @@ def highlight_delta(context, chain, delta):
words = paragraph.split(" ")
for i, word in enumerate(words, i):
try:
next = _strip_word(all_words[i+1])
next = _strip_word(all_words[i + 1])
try:
next_next = _strip_word(all_words[i + 2])
except IndexError:
next_next = chain.END
except IndexError:
next = chain.END
next = next_next = chain.END
sword = _strip_word(word)
block = (prev_prev, prev) # Block for before
alock = (prev, sword) # Block for after
before = [block in delta.chain and sword in delta.chain[block]]
after = [alock in delta.chain and next in delta.chain[alock]]
middle = (prev, sword) in dchain and next in dchain[(prev, sword)]
if middle:
before = after = True
else:
b_block = (prev_prev, prev)
a_block = (sword, next)
before = b_block in dchain and sword in dchain[b_block]
after = a_block in dchain and next_next in dchain[a_block]
is_first = i == 0
is_last = i + 1 == len(all_words)
res = _highlight_word(word, before, after, is_first, is_last)
@@ -38,7 +47,7 @@ def _highlight_word(word, before, after, is_first, is_last):
# Word is in the middle of a highlighted block, so don't change
# anything unless this is the first word (force block to start) or the
# last word (force block to end):
res = escape(word)
res = unicode(escape(word))
if is_first:
res = u'<span class="cv-hl">' + res
if is_last:
@@ -59,18 +68,21 @@ def _highlight_word(word, before, after, is_first, is_last):
res += u"</span>"
else:
# Word is completely outside of a highlighted block, so do nothing:
res = escape(word)
res = unicode(escape(word))
return res

def _fade_word(word, dir):
if len(word) <= 4:
return u'<span class="cv-hl-{0}">{1}</span>'.format(dir, escape(word))
word = unicode(escape(word))
return u'<span class="cv-hl-{0}">{1}</span>'.format(dir, word)
if dir == u"out":
before, after = unicode(escape(word[:-4])), unicode(escape(word[-4:]))
base = u'{0}<span class="cv-hl-out">{1}</span>'
return base.format(escape(word[:-4]), escape(word[-4:]))
return base.format(before, after)
else:
before, after = unicode(escape(word[:4])), unicode(escape(word[4:]))
base = u'<span class="cv-hl-in">{0}</span>{1}'
return base.format(escape(word[:4]), escape(word[4:]))
return base.format(before, after)

def _strip_word(word):
return sub("[^\w\s-]", "", word.lower(), flags=UNICODE)

Loading…
Cancel
Save