From 8cce98fc8fc0023052752759a5ea5dac882528f9 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 22 Jul 2012 16:19:32 -0400 Subject: [PATCH] Query object for storing parameters in a structured manner. --- pages/copyvios.mako | 24 ++++++++++++------------ toolserver/copyvios/__init__.py | 31 ++++++++++++------------------- toolserver/copyvios/checker.py | 12 ++++++------ toolserver/misc.py | 18 ++++++++++++++++++ toolserver/sites.py | 3 ++- 5 files changed, 50 insertions(+), 38 deletions(-) diff --git a/pages/copyvios.mako b/pages/copyvios.mako index 3a80ab2..12ac0c6 100644 --- a/pages/copyvios.mako +++ b/pages/copyvios.mako @@ -1,7 +1,7 @@ <%include file="/support/header.mako" args="environ=environ, title='Copyvio Detector', add_css=('copyvios.css',), add_js=('copyvios.js',)"/>\ <%namespace module="toolserver.copyvios" import="main, highlight_delta"/>\ <%namespace module="toolserver.misc" import="urlstrip"/>\ -<% lang, orig_lang, project, title, url, bot, page, result = main(environ) %> +<% query, bot, all_langs, all_projects, page, result = main(environ) %>

Copyvio Detector

This tool attempts to detect copyright violations in articles. Simply give the title of the page you want to check and hit Submit. The tool will then search for its content elsewhere on the web and display a report if a similar webpage is found. If you also provide a URL, it will not query any search engines and instead display a report comparing the article to that particular webpage, like the Duplication Detector. Check out the FAQ for more information and technical details.

@@ -11,7 +11,7 @@ http:// . - % elif title: - + % elif query.title: + % else: % endif URL (optional): - % if url: - + % if query.url: + % else: % endif - % if query.get("nocache") or (result and result.cached): + % if query.nocache or (result and result.cached): Bypass cache: - % if query.get("nocache"): + % if query.nocache: % else: @@ -67,12 +67,12 @@
- % if project and lang and title and not page: + % if query.project and query.lang and query.title and not page:
-

The given site (project=${project}, language=${lang}) doesn't seem to exist. It may also be closed or private. Confirm its URL.

+

The given site (project=${query.project}, language=${query.lang}) doesn't seem to exist. It may also be closed or private. Confirm its URL.

- % elif project and lang and title and page and not result: + % elif query.project and query.lang and query.title and page and not result:

The given page doesn't seem to exist: ${page.title | h}.

diff --git a/toolserver/copyvios/__init__.py b/toolserver/copyvios/__init__.py index 201f0db..0992d6a 100644 --- a/toolserver/copyvios/__init__.py +++ b/toolserver/copyvios/__init__.py @@ -1,34 +1,27 @@ # -*- coding: utf-8 -*- -from urlparse import parse_qs from earwigbot.bot import Bot from .checker import get_results from .highlighter import highlight_delta +from ..misc import Query from ..sites import get_site, get_sites def main(context, environ): - lang = orig_lang = project = name = title = url = None - - # Parse the query string: - query = parse_qs(environ["QUERY_STRING"]) - if "lang" in query: - lang = orig_lang = query["lang"][0].decode("utf8").lower() - if "::" in lang: - lang, name = lang.split("::", 1) - if "project" in query: - project = query["project"][0].decode("utf8").lower() - if "title" in query: - title = query["title"][0].decode("utf8") - if "url" in query: - url = query["url"][0].decode("utf8") + query = Query(environ) + if query.lang: + query.lang = query.orig_lang = query.lang.lower() + if "::" in query.lang: + query.lang, query.name = query.lang.split("::", 1) + if query.project: + query.project = query.project.lower() bot = Bot(".earwigbot") all_langs, all_projects = get_sites(bot) page = result = None - if lang and project and title: - site = get_site(bot, lang, project, name, all_projects) + if query.lang and query.project and query.title: + site = get_site(bot, query, all_projects) if site: - page, result = get_results(bot, site, title, url, query) + page, result = get_results(bot, site, query) - return lang, orig_lang, project, title, url, bot, page, result + return query, bot, all_langs, all_projects, page, result diff --git a/toolserver/copyvios/checker.py b/toolserver/copyvios/checker.py index ea39627..a1662b3 100644 --- a/toolserver/copyvios/checker.py +++ b/toolserver/copyvios/checker.py @@ -8,20 +8,20 @@ from earwigbot import exceptions from ..misc import open_sql_connection -def get_results(bot, site, title, url, query): - page = site.get_page(title) +def get_results(bot, site, query): + page = site.get_page(query.title) try: page.get() # Make sure that the page exists before we check it! except (exceptions.PageNotFoundError, exceptions.InvalidPageError): return page, None - # if url: - # result = _get_url_specific_results(page, url) + # if query.url: + # result = _get_url_specific_results(page, query.url) # else: # conn = open_sql_connection(bot, "copyvioCache") - # if not query.get("nocache"): + # if not query.nocache: # result = _get_cached_results(page, conn) - # if query.get("nocache") or not result: + # if query.nocache or not result: # result = _get_fresh_results(page, conn) tstart = time() mc1 = __import__("earwigbot").wiki.copyvios.MarkovChain(page.get()) diff --git a/toolserver/misc.py b/toolserver/misc.py index 1c9a5b4..3c80b94 100644 --- a/toolserver/misc.py +++ b/toolserver/misc.py @@ -1,9 +1,27 @@ # -*- coding: utf-8 -*- from os.path import expanduser +from urlparse import parse_qs import oursql +class Query(object): + def __init__(self, environ): + self.query = {} + parsed = parse_qs(environ["QUERY_STRING"]) + for key, value in parsed.iteritems(): + self.query[key] = value[-1].decode("utf8") + + def __getattr__(self, key): + try: + return self.query[key] + except KeyError: + return None + + def __setattr__(self, key, value): + self.query[key] = value + + def open_sql_connection(bot, dbname): conn_args = bot.config.wiki["_toolserverSQL"][dbname] if "read_default_file" not in conn_args and "user" not in conn_args and "passwd" not in conn_args: diff --git a/toolserver/sites.py b/toolserver/sites.py index 158a355..e0260aa 100644 --- a/toolserver/sites.py +++ b/toolserver/sites.py @@ -7,7 +7,8 @@ from earwigbot import exceptions from .misc import open_sql_connection -def get_site(bot, lang, project, name, all_projects): +def get_site(bot, query, all_projects): + lang, project, name = query.lang, query.project, query.name if project not in [proj[0] for proj in all_projects]: return None if project == "wikimedia" and name: # Special sites: