From e23a37dde87eb4b1db38256f60a748b8e73d4f98 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 4 Sep 2012 20:25:06 -0400 Subject: [PATCH] Join adjacent lines with only one word. --- toolserver/copyvios/highlighter.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/toolserver/copyvios/highlighter.py b/toolserver/copyvios/highlighter.py index 1fa773c..539b3d7 100644 --- a/toolserver/copyvios/highlighter.py +++ b/toolserver/copyvios/highlighter.py @@ -22,9 +22,10 @@ def highlight_delta(context, chain, delta): i = degree numwords = len(chain.text.split()) processed = [] - for paragraph in chain.text.split("\n"): + paragraphs = chain.text.split("\n") + while paragraphs: words = [] - for i, word in enumerate(paragraph.split(), i): + for i, word in enumerate(_get_next(paragraphs), i): if highlights[i]: before = highlights[i - 1] after = highlights[i + 1] @@ -38,6 +39,19 @@ def highlight_delta(context, chain, delta): return u"

" + u"

\n

".join(processed) + u"

" +def _get_next(paragraphs): + paragraph = paragraphs.pop(0) + body = paragraph.split() + if len(body) == 1: + while paragraphs: + next = paragraphs[0].split() + if len(next) == 1: + body += next + paragraphs.pop(0) + else: + break + return body + def _highlight_word(word, before, after, first, last): if before and after: # Word is in the middle of a highlighted block: