Procházet zdrojové kódy

Query object for storing parameters in a structured manner.

pull/24/head
Ben Kurtovic před 12 roky
rodič
revize
8cce98fc8f
5 změnil soubory, kde provedl 50 přidání a 38 odebrání
  1. +12
    -12
      pages/copyvios.mako
  2. +12
    -19
      toolserver/copyvios/__init__.py
  3. +6
    -6
      toolserver/copyvios/checker.py
  4. +18
    -0
      toolserver/misc.py
  5. +2
    -1
      toolserver/sites.py

+ 12
- 12
pages/copyvios.mako Zobrazit soubor

@@ -1,7 +1,7 @@
<%include file="/support/header.mako" args="environ=environ, title='Copyvio Detector', add_css=('copyvios.css',), add_js=('copyvios.js',)"/>\
<%namespace module="toolserver.copyvios" import="main, highlight_delta"/>\
<%namespace module="toolserver.misc" import="urlstrip"/>\
<% lang, orig_lang, project, title, url, bot, page, result = main(environ) %>
<% query, bot, all_langs, all_projects, page, result = main(environ) %>
<h1>Copyvio Detector</h1>
<p>This tool attempts to detect <a href="//en.wikipedia.org/wiki/WP:COPYVIO">copyright violations</a> in articles. Simply give the title of the page you want to check and hit Submit. The tool will then search for its content elsewhere on the web and display a report if a similar webpage is found. If you also provide a URL, it will not query any search engines and instead display a report comparing the article to that particular webpage, like the <a href="//toolserver.org/~dcoetzee/duplicationdetector/">Duplication Detector</a>. Check out the <a href="//en.wikipedia.org/wiki/User:EarwigBot/Copyvios/FAQ">FAQ</a> for more information and technical details.</p>
<form action="${environ['PATH_INFO']}" method="get">
@@ -11,7 +11,7 @@
<td>
<tt>http://</tt>
<select name="lang">
<% selected_lang = orig_lang if orig_lang else bot.wiki.get_site().lang %>
<% selected_lang = query.orig_lang if query.orig_lang else bot.wiki.get_site().lang %>
% for code, name in all_langs:
% if code == selected_lang:
<option value="${code}" selected="selected">${name}</option>
@@ -22,7 +22,7 @@
</select>
<tt>.</tt>
<select name="project">
<% selected_project = project if project else bot.wiki.get_site().project %>
<% selected_project = query.project if query.project else bot.wiki.get_site().project %>
% for code, name in all_projects:
% if code == selected_project:
<option value="${code}" selected="selected">${name}</option>
@@ -38,24 +38,24 @@
<td>Page title:</td>
% if page:
<td><input type="text" name="title" size="60" value="${page.title | h}" /></td>
% elif title:
<td><input type="text" name="title" size="60" value="${title | h}" /></td>
% elif query.title:
<td><input type="text" name="title" size="60" value="${query.title | h}" /></td>
% else:
<td><input type="text" name="title" size="60" /></td>
% endif
</tr>
<tr>
<td>URL (optional):</td>
% if url:
<td><input type="text" name="url" size="120" value="${url | h}" /></td>
% if query.url:
<td><input type="text" name="url" size="120" value="${query.url | h}" /></td>
% else:
<td><input type="text" name="url" size="120" /></td>
% endif
</tr>
% if query.get("nocache") or (result and result.cached):
% if query.nocache or (result and result.cached):
<tr>
<td>Bypass cache:</td>
% if query.get("nocache"):
% if query.nocache:
<td><input type="checkbox" name="nocache" value="1" checked="checked" /></td>
% else:
<td><input type="checkbox" name="nocache" value="1" /></td>
@@ -67,12 +67,12 @@
</tr>
</table>
</form>
% if project and lang and title and not page:
% if query.project and query.lang and query.title and not page:
<div class="divider"></div>
<div id="cv-result-yes">
<p>The given site (project=<b><tt>${project}</tt></b>, language=<b><tt>${lang}</tt></b>) doesn't seem to exist. It may also be closed or private. <a href="//${lang}.${project}.org/">Confirm its URL.</a></p>
<p>The given site (project=<b><tt>${query.project}</tt></b>, language=<b><tt>${query.lang}</tt></b>) doesn't seem to exist. It may also be closed or private. <a href="//${query.lang}.${query.project}.org/">Confirm its URL.</a></p>
</div>
% elif project and lang and title and page and not result:
% elif query.project and query.lang and query.title and page and not result:
<div class="divider"></div>
<div id="cv-result-yes">
<p>The given page doesn't seem to exist: <a href="${page.url}">${page.title | h}</a>.</p>


+ 12
- 19
toolserver/copyvios/__init__.py Zobrazit soubor

@@ -1,34 +1,27 @@
# -*- coding: utf-8 -*-

from urlparse import parse_qs
from earwigbot.bot import Bot

from .checker import get_results
from .highlighter import highlight_delta
from ..misc import Query
from ..sites import get_site, get_sites

def main(context, environ):
lang = orig_lang = project = name = title = url = None

# Parse the query string:
query = parse_qs(environ["QUERY_STRING"])
if "lang" in query:
lang = orig_lang = query["lang"][0].decode("utf8").lower()
if "::" in lang:
lang, name = lang.split("::", 1)
if "project" in query:
project = query["project"][0].decode("utf8").lower()
if "title" in query:
title = query["title"][0].decode("utf8")
if "url" in query:
url = query["url"][0].decode("utf8")
query = Query(environ)
if query.lang:
query.lang = query.orig_lang = query.lang.lower()
if "::" in query.lang:
query.lang, query.name = query.lang.split("::", 1)
if query.project:
query.project = query.project.lower()

bot = Bot(".earwigbot")
all_langs, all_projects = get_sites(bot)
page = result = None
if lang and project and title:
site = get_site(bot, lang, project, name, all_projects)
if query.lang and query.project and query.title:
site = get_site(bot, query, all_projects)
if site:
page, result = get_results(bot, site, title, url, query)
page, result = get_results(bot, site, query)

return lang, orig_lang, project, title, url, bot, page, result
return query, bot, all_langs, all_projects, page, result

+ 6
- 6
toolserver/copyvios/checker.py Zobrazit soubor

@@ -8,20 +8,20 @@ from earwigbot import exceptions

from ..misc import open_sql_connection

def get_results(bot, site, title, url, query):
page = site.get_page(title)
def get_results(bot, site, query):
page = site.get_page(query.title)
try:
page.get() # Make sure that the page exists before we check it!
except (exceptions.PageNotFoundError, exceptions.InvalidPageError):
return page, None

# if url:
# result = _get_url_specific_results(page, url)
# if query.url:
# result = _get_url_specific_results(page, query.url)
# else:
# conn = open_sql_connection(bot, "copyvioCache")
# if not query.get("nocache"):
# if not query.nocache:
# result = _get_cached_results(page, conn)
# if query.get("nocache") or not result:
# if query.nocache or not result:
# result = _get_fresh_results(page, conn)
tstart = time()
mc1 = __import__("earwigbot").wiki.copyvios.MarkovChain(page.get())


+ 18
- 0
toolserver/misc.py Zobrazit soubor

@@ -1,9 +1,27 @@
# -*- coding: utf-8 -*-

from os.path import expanduser
from urlparse import parse_qs

import oursql

class Query(object):
def __init__(self, environ):
self.query = {}
parsed = parse_qs(environ["QUERY_STRING"])
for key, value in parsed.iteritems():
self.query[key] = value[-1].decode("utf8")

def __getattr__(self, key):
try:
return self.query[key]
except KeyError:
return None

def __setattr__(self, key, value):
self.query[key] = value


def open_sql_connection(bot, dbname):
conn_args = bot.config.wiki["_toolserverSQL"][dbname]
if "read_default_file" not in conn_args and "user" not in conn_args and "passwd" not in conn_args:


+ 2
- 1
toolserver/sites.py Zobrazit soubor

@@ -7,7 +7,8 @@ from earwigbot import exceptions

from .misc import open_sql_connection

def get_site(bot, lang, project, name, all_projects):
def get_site(bot, query, all_projects):
lang, project, name = query.lang, query.project, query.name
if project not in [proj[0] for proj in all_projects]:
return None
if project == "wikimedia" and name: # Special sites:


Načítá se…
Zrušit
Uložit