Browse Source

Updates.

pull/24/head
Ben Kurtovic 9 years ago
parent
commit
6045ea4e44
7 changed files with 71 additions and 62 deletions
  1. +1
    -1
      LICENSE
  2. +6
    -8
      copyvios/__init__.py
  3. +13
    -14
      copyvios/checker.py
  4. +7
    -6
      copyvios/sites.py
  5. +40
    -29
      pages/index.mako
  6. +3
    -3
      pages/settings.mako
  7. +1
    -1
      pages/support/footer.mako

+ 1
- 1
LICENSE View File

@@ -1,4 +1,4 @@
Copyright (c) 2009-2013 Ben Kurtovic <ben.kurtovic@gmail.com>
Copyright (c) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal


+ 6
- 8
copyvios/__init__.py View File

@@ -14,12 +14,10 @@ def main(context, environ):
if query.project:
query.project = query.project.lower()

bot = get_bot()
all_langs, all_projects = get_sites(bot)
page = result = None
query.bot = get_bot()
query.all_langs, query.all_projects = get_sites(query.bot)
if query.lang and query.project and query.title:
site = get_site(bot, query, all_projects)
if site:
page, result = get_results(bot, site, query)

return query, bot, all_langs, all_projects, page, result
query.site = get_site(query)
if query.site:
get_results(query)
return query

+ 13
- 14
copyvios/checker.py View File

@@ -8,28 +8,27 @@ from earwigbot import exceptions

from .misc import open_sql_connection

def get_results(bot, site, query):
page = site.get_page(query.title)
def get_results(query):
page = query.page = query.site.get_page(query.title)
try:
page.get() # Make sure that the page exists before we check it!
except (exceptions.PageNotFoundError, exceptions.InvalidPageError):
return page, None
return

if query.url:
if urlparse(query.url).scheme not in ["http", "https"]:
return page, "bad URI"
result = page.copyvio_compare(query.url)
result.cached = False
query.result = "bad URI"
return
query.result = page.copyvio_compare(query.url)
query.result.cached = False
else:
conn = open_sql_connection(bot, "cache")
conn = open_sql_connection(query.bot, "cache")
if not query.nocache:
result = _get_cached_results(page, conn)
if query.nocache or not result:
result = page.copyvio_check(max_queries=10, max_time=45)
result.cached = False
_cache_result(page, result, conn)

return page, result
query.result = _get_cached_results(page, conn)
if not query.result:
query.result = page.copyvio_check(max_queries=10, max_time=45)
query.result.cached = False
_cache_result(page, query.result, conn)

def _get_cached_results(page, conn):
query1 = "DELETE FROM cache WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)"


+ 7
- 6
copyvios/sites.py View File

@@ -7,23 +7,24 @@ from earwigbot import exceptions

from .misc import open_sql_connection

def get_site(bot, query, all_projects):
def get_site(query):
lang, project, name = query.lang, query.project, query.name
if project not in [proj[0] for proj in all_projects]:
wiki = query.bot.wiki
if project not in [proj[0] for proj in query.all_projects]:
return None
if project == "wikimedia" and name: # Special sites:
try:
return bot.wiki.get_site(name=name)
return wiki.get_site(name=name)
except exceptions.SiteNotFoundError:
try:
return bot.wiki.add_site(lang=lang, project=project)
return wiki.add_site(lang=lang, project=project)
except (exceptions.APIError, exceptions.LoginError):
return None
try:
return bot.wiki.get_site(lang=lang, project=project)
return wiki.get_site(lang=lang, project=project)
except exceptions.SiteNotFoundError:
try:
return bot.wiki.add_site(lang=lang, project=project)
return wiki.add_site(lang=lang, project=project)
except (exceptions.APIError, exceptions.LoginError):
return None



+ 40
- 29
pages/index.mako View File

@@ -1,32 +1,43 @@
<%include file="/support/header.mako" args="environ=environ, cookies=cookies, title='Earwig\'s Copyvio Detector'"/>\
<%namespace module="copyvios" import="main, highlight_delta"/>\
<%namespace module="copyvios.misc" import="urlstrip"/>\
<% query, bot, all_langs, all_projects, page, result = main(environ) %>\
% if query.project and query.lang and query.title and not page:
<div id="info-box" class="red-box">
<p>The given site (project=<b><span class="mono">${query.project | h}</span></b>, language=<b><span class="mono">${query.lang | h}</span></b>) doesn't seem to exist. It may also be closed or private. <a href="//${query.lang | h}.${query.project | h}.org/">Confirm its URL.</a></p>
</div>
% elif query.project and query.lang and query.title and page and not result:
<div id="info-box" class="red-box">
<p>The given page doesn't seem to exist: <a href="${page.url}">${page.title | h}</a>.</p>
</div>
% elif query.project and query.lang and query.title and query.url and page and result == "bad URI":
<% result = None %>
<div id="info-box" class="red-box">
<p>Unsupported URI scheme: <a href="${query.url | h}">${query.url | h}</a>.</p>
</div>
% endif
<%
query = main(environ)
# Unpack query data:
lang, orig_lang, title, oldid, url, nocache = query.lang, query.orig_lang, query.title, query.oldid, query.url, query.nocache
bot, site, page, result = query.bot, query.site, query.page, query.result
%>\
% if query.project and lang and (title or oldid):
% if not site:
<div id="info-box" class="red-box">
<p>The given site (project=<b><span class="mono">${query.project | h}</span></b>, language=<b><span class="mono">${lang | h}</span></b>) doesn't seem to exist. It may also be closed or private. <a href="//${lang | h}.${query.project | h}.org/">Confirm its URL.</a></p>
</div>
% elif title and not result:
<div id="info-box" class="red-box">
<p>The given page doesn't seem to exist: <a href="${page.url}">${page.title | h}</a>.</p>
</div>
% elif oldid and not result:
<div id="info-box" class="red-box">
<p>The given revision ID doesn't seem to exist: <a href="//${site.domain | h}/w/index.php?oldid=${oldid | h}">${oldid | h}</a>.</p>
</div>
% elif url and result == "bad URI":
<% result = None %>
<div id="info-box" class="red-box">
<p>Unsupported URI scheme: <a href="${url | h}">${url | h}</a>.</p>
</div>
% endif
%endif
<p>This tool attempts to detect <a href="//en.wikipedia.org/wiki/WP:COPYVIO">copyright violations</a> in articles. Simply give the title of the page or ID of the revision you want to check and hit Submit. The tool will search for similar content elsewhere on the web and display a report if a match is found. If you also provide a URL, it will not query any search engines and instead display a report comparing the article to that particular webpage, like the <a href="//toolserver.org/~dcoetzee/duplicationdetector/">Duplication Detector</a>. Check out the <a href="//en.wikipedia.org/wiki/User:EarwigBot/Copyvios/FAQ">FAQ</a> for more information and technical details.</p>
<p><i>Note:</i> The tool is still in beta. You are completely welcome to use it and provide <a href="//en.wikipedia.org/wiki/User_talk:The_Earwig">feedback</a>, but be aware that it may produce strange or broken results.</p>
<form action="${environ['SCRIPT_URL']}" method="get">
<form action="${environ['REQUEST_URI']}" method="get">
<table id="cv-form">
<tr>
<td>Site:</td>
<td colspan="3">
<span class="mono">http://</span>
<select name="lang">
<% selected_lang = query.orig_lang if query.orig_lang else cookies["CopyviosDefaultLang"].value if "CopyviosDefaultLang" in cookies else bot.wiki.get_site().lang %>\
% for code, name in all_langs:
<% selected_lang = orig_lang if orig_lang else cookies["CopyviosDefaultLang"].value if "CopyviosDefaultLang" in cookies else bot.wiki.get_site().lang %>\
% for code, name in query.all_langs:
% if code == selected_lang:
<option value="${code | h}" selected="selected">${name}</option>
% else:
@@ -37,7 +48,7 @@
<span class="mono">.</span>
<select name="project">
<% selected_project = query.project if query.project else cookies["CopyviosDefaultProject"].value if "CopyviosDefaultProject" in cookies else bot.wiki.get_site().project %>\
% for code, name in all_projects:
% for code, name in query.all_projects:
% if code == selected_project:
<option value="${code | h}" selected="selected">${name}</option>
% else:
@@ -53,16 +64,16 @@
<td id="cv-col2">
% if page:
<input class="cv-text" type="text" name="title" value="${page.title | h}" />
% elif query.title:
<input class="cv-text" type="text" name="title" value="${query.title | h}" />
% elif title:
<input class="cv-text" type="text" name="title" value="${title | h}" />
% else:
<input class="cv-text" type="text" name="title" />
% endif
</td>
<td id="cv-col3">or&nbsp;revision&nbsp;ID:</td>
<td id="cv-col4">
% if query.oldid:
<input class="cv-text" type="text" name="oldid" value="${query.oldid | h}" />
% if oldid:
<input class="cv-text" type="text" name="oldid" value="${oldid | h}" />
% else:
<input class="cv-text" type="text" name="oldid" />
% endif
@@ -71,18 +82,18 @@
<tr>
<td>URL&nbsp;(optional):</td>
<td colspan="3">
% if query.url:
<input class="cv-text" type="text" name="url" value="${query.url | h}" />
% if url:
<input class="cv-text" type="text" name="url" value="${url | h}" />
% else:
<input class="cv-text" type="text" name="url" />
% endif
</td>
</tr>
% if query.nocache or (result and result.cached):
% if nocache or (result and result.cached):
<tr>
<td>Bypass&nbsp;cache:</td>
<td colspan="3">
% if query.nocache:
% if nocache:
<input type="checkbox" name="nocache" value="1" checked="checked" />
% else:
<input type="checkbox" name="nocache" value="1" />
@@ -97,7 +108,7 @@
</tr>
</table>
</form>
% if page and result:
% if result:
<% show_details = "CopyviosShowDetails" in cookies and cookies["CopyviosShowDetails"].value == "True" %>
<div class="divider"></div>
<div id="cv-result" class="${'red' if result.violation else 'green'}-box">
@@ -107,7 +118,7 @@
<h2 id="cv-result-header">No violations detected in <a href="${page.url}">${page.title | h}</a>.</h2>
% endif
<ul id="cv-result-list">
% if not result.violation and not query.url:
% if not result.violation and not url:
% if result.url:
<li>Best match: <a href="${result.url | h}">${result.url | urlstrip, h}</a>.</li>
% else:


+ 3
- 3
pages/settings.mako View File

@@ -8,7 +8,7 @@
</div>
% endif
<p>This page contains some configurable options for the copyvio detector. Settings are saved as cookies. You can view and delete all cookies generated by this site at the bottom of this page.</p>
<form action="${environ['SCRIPT_URL']}" method="post">
<form action="${environ['REQUEST_URI']}" method="post">
<table>
<tr>
<td>Default site:</td>
@@ -85,7 +85,7 @@
<td><span class="mono">${cookie.value | h}</span></td>
% endtry
<td>
<form action="${environ['SCRIPT_URL']}" method="post">
<form action="${environ['REQUEST_URI']}" method="post">
<input type="hidden" name="action" value="delete">
<input type="hidden" name="cookie" value="${key | h}">
<button type="submit">Delete</button>
@@ -95,7 +95,7 @@
% endfor
<tr>
<td>
<form action="${environ['SCRIPT_URL']}" method="post">
<form action="${environ['REQUEST_URI']}" method="post">
<input type="hidden" name="action" value="delete">
<input type="hidden" name="all" value="1">
<button type="submit">Delete all</button>


+ 1
- 1
pages/support/footer.mako View File

@@ -2,7 +2,7 @@
<%namespace module="copyvios.background" import="get_desc_url"/>\
</div>
<div id="footer">
<p>Copyright &copy; 2009&ndash;2013 <a href="//en.wikipedia.org/wiki/User:The_Earwig">Ben Kurtovic</a> &bull; \
<p>Copyright &copy; 2009&ndash;2014 <a href="//en.wikipedia.org/wiki/User:The_Earwig">Ben Kurtovic</a> &bull; \
<a href="https://github.com/earwig/copyvios">View Source</a> &bull; \
% if ("CopyviosBackground" in cookies and cookies["CopyviosBackground"].value in ["potd", "list"]) or "CopyviosBackground" not in cookies:
<a href="${get_desc_url() | h}">Background</a> &bull; \


Loading…
Cancel
Save