# -*- coding: utf-8 -*-
from collections import deque
from re import sub, UNICODE
from earwigbot.wiki.copyvios.markov import EMPTY_INTERSECTION
from markupsafe import escape
__all__ = ["highlight_delta"]
def highlight_delta(context, chain, delta):
degree = chain.degree - 1
highlights = [False] * degree
block = deque([chain.START] * degree)
if not delta:
delta = EMPTY_INTERSECTION
for word in chain.text.split() + ([chain.END] * degree):
word = _strip_word(chain, word)
block.append(word)
if tuple(block) in delta.chain:
highlights[-1 * degree:] = [True] * degree
highlights.append(True)
else:
highlights.append(False)
block.popleft()
i = degree
numwords = len(chain.text.split())
result = []
paragraphs = deque(chain.text.split("\n"))
while paragraphs:
words = []
for i, word in enumerate(_get_next(paragraphs), i):
if highlights[i]:
before = highlights[i - 1]
after = highlights[i + 1]
first = i == degree
last = i - degree + 1 == numwords
words.append(_highlight_word(word, before, after, first, last))
else:
words.append(unicode(escape(word)))
result.append(u" ".join(words))
i += 1
return u"
".join(result)
def _get_next(paragraphs):
body = []
while paragraphs and not body:
body = paragraphs.popleft().split()
if body and len(body) <= 3:
while paragraphs:
next = paragraphs[0].split()
if len(next) <= 3:
body += next
paragraphs.popleft()
else:
break
return body
def _highlight_word(word, before, after, first, last):
if before and after:
# Word is in the middle of a highlighted block:
res = unicode(escape(word))
if first:
res = u'' + res
if last:
res += u''
elif after:
# Word is the first in a highlighted block:
res = u'' + _fade_word(word, u"in")
if last:
res += u""
elif before:
# Word is the last in a highlighted block:
res = _fade_word(word, u"out") + u""
if first:
res = u'' + res
else:
res = unicode(escape(word))
return res
def _fade_word(word, dir):
if len(word) <= 4:
word = unicode(escape(word))
return u'{1}'.format(dir, word)
if dir == u"out":
before, after = unicode(escape(word[:-4])), unicode(escape(word[-4:]))
base = u'{0}{1}'
return base.format(before, after)
else:
before, after = unicode(escape(word[:4])), unicode(escape(word[4:]))
base = u'{0}{1}'
return base.format(before, after)
def _strip_word(chain, word):
if word == chain.START or word == chain.END:
return word
return sub("[^\w\s-]", "", word.lower(), flags=UNICODE)