' + res
elif after:
# Word is the first in a highlighted block, so start the block and
# then fade it in; force close the block after the word if this is
# the last word:
res = u'' + fade_word(word, u"in")
if is_last:
res += u""
else:
# Word is completely outside of a highlighted block, so do nothing:
res = word
return res
def fade_word(word, dir):
if len(word) <= 4:
return u'{1}'.format(dir, word)
if dir == u"out":
return u'{0}{1}'.format(word[:-4], word[-4:])
return u'{0}{1}'.format(word[:4], word[4:])
def strip_word(word):
return sub("[^\w\s-]", "", word.lower(), flags=UNICODE)
def urlstrip(url):
if url.startswith("http://"):
url = url[7:]
if url.startswith("https://"):
url = url[8:]
if url.startswith("www."):
url = url[4:]
if url.endswith("/"):
url = url[:-1]
return url
%>\
<%
lang = project = name = title = url = None
query = parse_qs(environ["QUERY_STRING"])
if "lang" in query:
lang = query["lang"][0].decode("utf8").lower()
if "::" in lang:
lang, name = lang.split("::", 1)
if "project" in query:
project = query["project"][0].decode("utf8").lower()
if "title" in query:
title = query["title"][0].decode("utf8")
if "url" in query:
url = query["url"][0].decode("utf8")
bot = Bot(".earwigbot")
site = bot.wiki.get_site()
all_langs, all_projects = get_sites(bot)
if lang and project and title:
page, result = get_results(bot, lang, project, name, all_projects,
title, url, query)
else:
page = result = None
%>\
<%include file="/support/header.mako" args="environ=environ, title='Copyvio Detector', add_css=('copyvios.css',), add_js=('copyvios.js',)"/>
Copyvio Detector
This tool attempts to detect copyright violations in articles. Simply give the title of the page you want to check and hit Submit. The tool will then search for its content elsewhere on the web and display a report if a similar webpage is found. If you also provide a URL, it will not query any search engines and instead display a report comparing the article to that particular webpage, like the Duplication Detector. Check out the FAQ for more information and technical details.
% if project and lang and title and not page:
The given site, (project=${project}, language=${lang}) doesn't seem to exist. Check its URL?
% elif project and lang and title and page and not result:
% elif page:
% if result.violation:
% else:
% endif
- Trigrams: Article: ${result.article_chain.size()} / Source: ${result.source_chain.size()} / Delta: ${result.delta_chain.size()}
% if result.cached:
% if result.queries:
- Retrieved from cache in ${round(result.tdiff, 3)} seconds (originally generated in ${round(result.original_tdiff, 3)}s using ${result.queries} queries; ${round(result.original_tdiff - result.tdiff, 3)}s saved).
% else:
- Retrieved from cache in ${round(result.tdiff, 3)} seconds (originally generated in ${round(result.original_tdiff, 3)}s; ${round(result.original_tdiff - result.tdiff, 3)}s saved).
% endif
% endif
% if result.queries:
- Fun fact: The Wikimedia Foundation paid Yahoo! Inc. $${result.queries * 0.0008} USD for these results.
% endif
Article: ${highlight_delta(result.article_chain, result.delta_chain)} |
Source: ${highlight_delta(result.source_chain, result.delta_chain)} |
% endif
<%include file="/support/footer.mako" args="environ=environ"/>