Browse Source

Query object for storing parameters in a structured manner.

pull/24/head
Ben Kurtovic 12 years ago
parent
commit
8cce98fc8f
5 changed files with 50 additions and 38 deletions
  1. +12
    -12
      pages/copyvios.mako
  2. +12
    -19
      toolserver/copyvios/__init__.py
  3. +6
    -6
      toolserver/copyvios/checker.py
  4. +18
    -0
      toolserver/misc.py
  5. +2
    -1
      toolserver/sites.py

+ 12
- 12
pages/copyvios.mako View File

@@ -1,7 +1,7 @@
<%include file="/support/header.mako" args="environ=environ, title='Copyvio Detector', add_css=('copyvios.css',), add_js=('copyvios.js',)"/>\ <%include file="/support/header.mako" args="environ=environ, title='Copyvio Detector', add_css=('copyvios.css',), add_js=('copyvios.js',)"/>\
<%namespace module="toolserver.copyvios" import="main, highlight_delta"/>\ <%namespace module="toolserver.copyvios" import="main, highlight_delta"/>\
<%namespace module="toolserver.misc" import="urlstrip"/>\ <%namespace module="toolserver.misc" import="urlstrip"/>\
<% lang, orig_lang, project, title, url, bot, page, result = main(environ) %>
<% query, bot, all_langs, all_projects, page, result = main(environ) %>
<h1>Copyvio Detector</h1> <h1>Copyvio Detector</h1>
<p>This tool attempts to detect <a href="//en.wikipedia.org/wiki/WP:COPYVIO">copyright violations</a> in articles. Simply give the title of the page you want to check and hit Submit. The tool will then search for its content elsewhere on the web and display a report if a similar webpage is found. If you also provide a URL, it will not query any search engines and instead display a report comparing the article to that particular webpage, like the <a href="//toolserver.org/~dcoetzee/duplicationdetector/">Duplication Detector</a>. Check out the <a href="//en.wikipedia.org/wiki/User:EarwigBot/Copyvios/FAQ">FAQ</a> for more information and technical details.</p> <p>This tool attempts to detect <a href="//en.wikipedia.org/wiki/WP:COPYVIO">copyright violations</a> in articles. Simply give the title of the page you want to check and hit Submit. The tool will then search for its content elsewhere on the web and display a report if a similar webpage is found. If you also provide a URL, it will not query any search engines and instead display a report comparing the article to that particular webpage, like the <a href="//toolserver.org/~dcoetzee/duplicationdetector/">Duplication Detector</a>. Check out the <a href="//en.wikipedia.org/wiki/User:EarwigBot/Copyvios/FAQ">FAQ</a> for more information and technical details.</p>
<form action="${environ['PATH_INFO']}" method="get"> <form action="${environ['PATH_INFO']}" method="get">
@@ -11,7 +11,7 @@
<td> <td>
<tt>http://</tt> <tt>http://</tt>
<select name="lang"> <select name="lang">
<% selected_lang = orig_lang if orig_lang else bot.wiki.get_site().lang %>
<% selected_lang = query.orig_lang if query.orig_lang else bot.wiki.get_site().lang %>
% for code, name in all_langs: % for code, name in all_langs:
% if code == selected_lang: % if code == selected_lang:
<option value="${code}" selected="selected">${name}</option> <option value="${code}" selected="selected">${name}</option>
@@ -22,7 +22,7 @@
</select> </select>
<tt>.</tt> <tt>.</tt>
<select name="project"> <select name="project">
<% selected_project = project if project else bot.wiki.get_site().project %>
<% selected_project = query.project if query.project else bot.wiki.get_site().project %>
% for code, name in all_projects: % for code, name in all_projects:
% if code == selected_project: % if code == selected_project:
<option value="${code}" selected="selected">${name}</option> <option value="${code}" selected="selected">${name}</option>
@@ -38,24 +38,24 @@
<td>Page title:</td> <td>Page title:</td>
% if page: % if page:
<td><input type="text" name="title" size="60" value="${page.title | h}" /></td> <td><input type="text" name="title" size="60" value="${page.title | h}" /></td>
% elif title:
<td><input type="text" name="title" size="60" value="${title | h}" /></td>
% elif query.title:
<td><input type="text" name="title" size="60" value="${query.title | h}" /></td>
% else: % else:
<td><input type="text" name="title" size="60" /></td> <td><input type="text" name="title" size="60" /></td>
% endif % endif
</tr> </tr>
<tr> <tr>
<td>URL (optional):</td> <td>URL (optional):</td>
% if url:
<td><input type="text" name="url" size="120" value="${url | h}" /></td>
% if query.url:
<td><input type="text" name="url" size="120" value="${query.url | h}" /></td>
% else: % else:
<td><input type="text" name="url" size="120" /></td> <td><input type="text" name="url" size="120" /></td>
% endif % endif
</tr> </tr>
% if query.get("nocache") or (result and result.cached):
% if query.nocache or (result and result.cached):
<tr> <tr>
<td>Bypass cache:</td> <td>Bypass cache:</td>
% if query.get("nocache"):
% if query.nocache:
<td><input type="checkbox" name="nocache" value="1" checked="checked" /></td> <td><input type="checkbox" name="nocache" value="1" checked="checked" /></td>
% else: % else:
<td><input type="checkbox" name="nocache" value="1" /></td> <td><input type="checkbox" name="nocache" value="1" /></td>
@@ -67,12 +67,12 @@
</tr> </tr>
</table> </table>
</form> </form>
% if project and lang and title and not page:
% if query.project and query.lang and query.title and not page:
<div class="divider"></div> <div class="divider"></div>
<div id="cv-result-yes"> <div id="cv-result-yes">
<p>The given site (project=<b><tt>${project}</tt></b>, language=<b><tt>${lang}</tt></b>) doesn't seem to exist. It may also be closed or private. <a href="//${lang}.${project}.org/">Confirm its URL.</a></p>
<p>The given site (project=<b><tt>${query.project}</tt></b>, language=<b><tt>${query.lang}</tt></b>) doesn't seem to exist. It may also be closed or private. <a href="//${query.lang}.${query.project}.org/">Confirm its URL.</a></p>
</div> </div>
% elif project and lang and title and page and not result:
% elif query.project and query.lang and query.title and page and not result:
<div class="divider"></div> <div class="divider"></div>
<div id="cv-result-yes"> <div id="cv-result-yes">
<p>The given page doesn't seem to exist: <a href="${page.url}">${page.title | h}</a>.</p> <p>The given page doesn't seem to exist: <a href="${page.url}">${page.title | h}</a>.</p>


+ 12
- 19
toolserver/copyvios/__init__.py View File

@@ -1,34 +1,27 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-


from urlparse import parse_qs
from earwigbot.bot import Bot from earwigbot.bot import Bot


from .checker import get_results from .checker import get_results
from .highlighter import highlight_delta from .highlighter import highlight_delta
from ..misc import Query
from ..sites import get_site, get_sites from ..sites import get_site, get_sites


def main(context, environ): def main(context, environ):
lang = orig_lang = project = name = title = url = None

# Parse the query string:
query = parse_qs(environ["QUERY_STRING"])
if "lang" in query:
lang = orig_lang = query["lang"][0].decode("utf8").lower()
if "::" in lang:
lang, name = lang.split("::", 1)
if "project" in query:
project = query["project"][0].decode("utf8").lower()
if "title" in query:
title = query["title"][0].decode("utf8")
if "url" in query:
url = query["url"][0].decode("utf8")
query = Query(environ)
if query.lang:
query.lang = query.orig_lang = query.lang.lower()
if "::" in query.lang:
query.lang, query.name = query.lang.split("::", 1)
if query.project:
query.project = query.project.lower()


bot = Bot(".earwigbot") bot = Bot(".earwigbot")
all_langs, all_projects = get_sites(bot) all_langs, all_projects = get_sites(bot)
page = result = None page = result = None
if lang and project and title:
site = get_site(bot, lang, project, name, all_projects)
if query.lang and query.project and query.title:
site = get_site(bot, query, all_projects)
if site: if site:
page, result = get_results(bot, site, title, url, query)
page, result = get_results(bot, site, query)


return lang, orig_lang, project, title, url, bot, page, result
return query, bot, all_langs, all_projects, page, result

+ 6
- 6
toolserver/copyvios/checker.py View File

@@ -8,20 +8,20 @@ from earwigbot import exceptions


from ..misc import open_sql_connection from ..misc import open_sql_connection


def get_results(bot, site, title, url, query):
page = site.get_page(title)
def get_results(bot, site, query):
page = site.get_page(query.title)
try: try:
page.get() # Make sure that the page exists before we check it! page.get() # Make sure that the page exists before we check it!
except (exceptions.PageNotFoundError, exceptions.InvalidPageError): except (exceptions.PageNotFoundError, exceptions.InvalidPageError):
return page, None return page, None


# if url:
# result = _get_url_specific_results(page, url)
# if query.url:
# result = _get_url_specific_results(page, query.url)
# else: # else:
# conn = open_sql_connection(bot, "copyvioCache") # conn = open_sql_connection(bot, "copyvioCache")
# if not query.get("nocache"):
# if not query.nocache:
# result = _get_cached_results(page, conn) # result = _get_cached_results(page, conn)
# if query.get("nocache") or not result:
# if query.nocache or not result:
# result = _get_fresh_results(page, conn) # result = _get_fresh_results(page, conn)
tstart = time() tstart = time()
mc1 = __import__("earwigbot").wiki.copyvios.MarkovChain(page.get()) mc1 = __import__("earwigbot").wiki.copyvios.MarkovChain(page.get())


+ 18
- 0
toolserver/misc.py View File

@@ -1,9 +1,27 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-


from os.path import expanduser from os.path import expanduser
from urlparse import parse_qs


import oursql import oursql


class Query(object):
def __init__(self, environ):
self.query = {}
parsed = parse_qs(environ["QUERY_STRING"])
for key, value in parsed.iteritems():
self.query[key] = value[-1].decode("utf8")

def __getattr__(self, key):
try:
return self.query[key]
except KeyError:
return None

def __setattr__(self, key, value):
self.query[key] = value


def open_sql_connection(bot, dbname): def open_sql_connection(bot, dbname):
conn_args = bot.config.wiki["_toolserverSQL"][dbname] conn_args = bot.config.wiki["_toolserverSQL"][dbname]
if "read_default_file" not in conn_args and "user" not in conn_args and "passwd" not in conn_args: if "read_default_file" not in conn_args and "user" not in conn_args and "passwd" not in conn_args:


+ 2
- 1
toolserver/sites.py View File

@@ -7,7 +7,8 @@ from earwigbot import exceptions


from .misc import open_sql_connection from .misc import open_sql_connection


def get_site(bot, lang, project, name, all_projects):
def get_site(bot, query, all_projects):
lang, project, name = query.lang, query.project, query.name
if project not in [proj[0] for proj in all_projects]: if project not in [proj[0] for proj in all_projects]:
return None return None
if project == "wikimedia" and name: # Special sites: if project == "wikimedia" and name: # Special sites:


Loading…
Cancel
Save