Browse Source

Refactor the last bit of parsing code into support.copyvios.

Will clean up in a bit.
pull/24/head
Ben Kurtovic 12 years ago
parent
commit
0f3414c576
4 changed files with 35 additions and 33 deletions
  1. +3
    -31
      pages/copyvios.mako
  2. +30
    -0
      pages/support/copyvios/__init__.py
  3. +1
    -1
      pages/support/copyvios/checker.py
  4. +1
    -1
      pages/support/copyvios/highlighter.py

+ 3
- 31
pages/copyvios.mako View File

@@ -1,35 +1,7 @@
<%!
from urlparse import parse_qs
from earwigbot.bot import Bot
%>\
<%namespace module="support.copyvios" import="get_results, highlight_delta"/>\
<%namespace module="support.sites" import="get_site, get_sites"/>\
<%include file="/support/header.mako" args="environ=environ, title='Copyvio Detector', add_css=('copyvios.css',), add_js=('copyvios.js',)"/>\
<%namespace module="support.copyvios" import="main, highlight_delta"/>\
<%namespace module="support.misc" import="urlstrip"/>\
<%
lang = orig_lang = project = name = title = url = None
site = page = result = None

# Parse the query string.
query = parse_qs(environ["QUERY_STRING"])
if "lang" in query:
lang = orig_lang = query["lang"][0].decode("utf8").lower()
if "::" in lang:
lang, name = lang.split("::", 1)
if "project" in query:
project = query["project"][0].decode("utf8").lower()
if "title" in query:
title = query["title"][0].decode("utf8")
if "url" in query:
url = query["url"][0].decode("utf8")

bot = Bot(".earwigbot")
all_langs, all_projects = get_sites(bot)
if lang and project and title:
site = get_site(bot, lang, project, name, all_projects)
if site:
page, result = get_results(bot, site, title, url, query)
%>\
<%include file="/support/header.mako" args="environ=environ, title='Copyvio Detector', add_css=('copyvios.css',), add_js=('copyvios.js',)"/>
<% lang, project, name, title, url, site, page, result = main(environ) %>
<h1>Copyvio Detector</h1>
<p>This tool attempts to detect <a href="//en.wikipedia.org/wiki/WP:COPYVIO">copyright violations</a> in articles. Simply give the title of the page you want to check and hit Submit. The tool will then search for its content elsewhere on the web and display a report if a similar webpage is found. If you also provide a URL, it will not query any search engines and instead display a report comparing the article to that particular webpage, like the <a href="//toolserver.org/~dcoetzee/duplicationdetector/">Duplication Detector</a>. Check out the <a href="//en.wikipedia.org/wiki/User:EarwigBot/Copyvios/FAQ">FAQ</a> for more information and technical details.</p>
<form action="${environ['PATH_INFO']}" method="get">


+ 30
- 0
pages/support/copyvios/__init__.py View File

@@ -1,4 +1,34 @@
# -*- coding: utf-8 -*-

from urlparse import parse_qs
from earwigbot.bot import Bot

from .checker import get_results
from .highlighter import highlight_delta
from ..sites import get_site, get_sites

def main(context, environ):
lang = orig_lang = project = name = title = url = None
site = page = result = None

# Parse the query string.
query = parse_qs(environ["QUERY_STRING"])
if "lang" in query:
lang = orig_lang = query["lang"][0].decode("utf8").lower()
if "::" in lang:
lang, name = lang.split("::", 1)
if "project" in query:
project = query["project"][0].decode("utf8").lower()
if "title" in query:
title = query["title"][0].decode("utf8")
if "url" in query:
url = query["url"][0].decode("utf8")

bot = Bot(".earwigbot")
all_langs, all_projects = get_sites(bot)
if lang and project and title:
site = get_site(bot, lang, project, name, all_projects)
if site:
page, result = get_results(bot, site, title, url, query)

return lang, project, name, title, url, site, page, result

+ 1
- 1
pages/support/copyvios/checker.py View File

@@ -8,7 +8,7 @@ from earwigbot import exceptions

from ..misc import open_sql_connection

def get_results(context, bot, site, title, url, query):
def get_results(bot, site, title, url, query):
page = site.get_page(title)
try:
page.get() # Make sure that the page exists before we check it!


+ 1
- 1
pages/support/copyvios/highlighter.py View File

@@ -2,7 +2,7 @@

from re import sub, UNICODE

def highlight_delta(context, chain, delta):
def highlight_delta(chain, delta):
processed = []
prev_prev = prev = chain.START
i = 0


Loading…
Cancel
Save