Bladeren bron

First version of new API (for #7)

pull/24/head
Ben Kurtovic 9 jaren geleden
bovenliggende
commit
6639338fa8
8 gewijzigde bestanden met toevoegingen van 165 en 13 verwijderingen
  1. +24
    -1
      app.fcgi
  2. +103
    -0
      copyvios/api.py
  3. +3
    -2
      copyvios/checker.py
  4. +4
    -4
      copyvios/highlighter.py
  5. +3
    -0
      static/api.css
  6. +21
    -0
      templates/api.mako
  7. +4
    -4
      templates/index.mako
  8. +3
    -2
      templates/support/footer.mako

+ 24
- 1
app.fcgi Bestand weergeven

@@ -2,6 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-


from functools import wraps from functools import wraps
from json import dumps
from logging import DEBUG, INFO, getLogger from logging import DEBUG, INFO, getLogger
from logging.handlers import TimedRotatingFileHandler from logging.handlers import TimedRotatingFileHandler
from time import asctime from time import asctime
@@ -9,10 +10,11 @@ from traceback import format_exc


from earwigbot.bot import Bot from earwigbot.bot import Bot
from earwigbot.wiki.copyvios import globalize from earwigbot.wiki.copyvios import globalize
from flask import Flask, g, request
from flask import Flask, g, make_response, request
from flask.ext.mako import MakoTemplates, render_template, TemplateError from flask.ext.mako import MakoTemplates, render_template, TemplateError
from flup.server.fcgi import WSGIServer from flup.server.fcgi import WSGIServer


from copyvios.api import format_api_error, handle_api_request
from copyvios.checker import do_check from copyvios.checker import do_check
from copyvios.cookies import parse_cookies from copyvios.cookies import parse_cookies
from copyvios.settings import process_settings from copyvios.settings import process_settings
@@ -85,5 +87,26 @@ def settings():
"default_lang": default.lang, "default_project": default.project} "default_lang": default.lang, "default_project": default.project}
return render_template("settings.mako", **kwargs) return render_template("settings.mako", **kwargs)


@app.route("/api.json")
def api():
if not request.args:
return render_template("api.mako", help=True)

format = request.args.get("format", "json")
if format in ["json", "jsonfm"]:
try:
result = handle_api_request()
except Exception as exc:
result = format_api_error("unhandled_exception", exc)
else:
errmsg = u"Unknown format: '{0}'".format(format)
result = format_api_error("unknown_format", errmsg)

if format == "jsonfm":
return render_template("api.mako", help=False, result=result)
resp = make_response(dumps(result))
resp.mimetype = "application/json"
return resp

if __name__ == '__main__': if __name__ == '__main__':
WSGIServer(app).run() WSGIServer(app).run()

+ 103
- 0
copyvios/api.py Bestand weergeven

@@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-

from .checker import do_check, T_POSSIBLE, T_SUSPECT
from .misc import Query
from .sites import get_sites

__all__ = ["format_api_error", "handle_api_request"]

_HOOKS = {
"compare": _hook_check,
"search": _hook_check,
"sites": _hook_sites,
}

_CHECK_ERRORS = {
"no search method": "Either 'use_engine' or 'use_links' must be true",
"no URL": "The parameter 'url' is required for URL comparisons",
"bad URI": "The given URI scheme is unsupported",
"no data": "No text could be found in the given URL (note that only HTML "
"and plain text pages are supported, and content generated by "
"JavaScript or found inside iframes is ignored)",
"timeout": "The given URL timed out before any data could be retrieved",
"search error": "An error occurred while using the search engine; try "
"reloading or setting 'use_engine' to 0",
}

def _serialize_page(page):
return {"title": page.title, "url": page.url}

def _serialize_source(source, show_skip=True):
if not source:
return {"url": None, "confidence": 0.0, "violation": "none"}

conf = source.confidence
data = {
"url": source.url,
"confidence": conf,
"violation": "suspected" if conf >= T_SUSPECT else
"possible" if conf >= T_POSSIBLE else "none"
}
if show_skip:
data["skipped"] = source.skipped
return data

def format_api_error(code, info):
if isinstance(info, BaseException):
info = type(info).__name__ + ": " + str(info)
elif isinstance(info, unicode):
info = info.encode("utf8")
return {"status": "error", "error": {"code": code, "info": info}}

def handle_api_request():
query = Query()
action = query.action.lower() if query.action else ""
return _HOOKS.get(action, _hook_default)(query)

def _hook_default(query):
info = u"Unknown action: '{0}'".format(query.action.lower())
return format_api_error("unknown_action", info)

def _hook_check(query):
do_check(query)
if not query.submitted:
info = ("The query parameters 'project', 'lang', and either 'title' "
"or 'oldid' are required for checks")
return format_api_error("missing_params", info)
if query.error:
info = _CHECK_ERRORS.get(query.error, "An unknown error occurred")
return format_api_error(query.error.replace(" ", "_"), info)
elif not query.site:
info = (u"The given site (project={0}, lang={1}) either doesn't exist,"
u" is closed, or is private").format(query.project, query.lang)
return format_api_error("bad_site", info)
elif not query.result:
if query.oldid:
info = u"The given revision ID doesn't seem to exist: {0}"
return format_api_error("bad_oldid", info.format(query.oldid))
else:
info = u"The given page doesn't seem to exist: {0}"
return format_api_error("bad_title", info.format(query.page.title))

result = query.result
data = {
"status": "ok",
"meta": {
"time": result.time,
"queries": result.queries,
"cached": result.cached,
"redirected": bool(query.redirected_from)
},
"page": _serialize_page(query.page),
"best": _serialize_source(result.best, show_skip=False),
"sources": [_serialize_source(source) for source in result.sources]
}
if result.cached:
data["meta"]["cache_time"] = result.cache_time
if query.redirected_from:
data["original_page"] = _serialize_page(query.redirected_from)
return data

def _hook_sites(query):
langs, projects = get_sites()
return {"status": "ok", "langs": langs, "projects": projects}

+ 3
- 2
copyvios/checker.py Bestand weergeven

@@ -17,8 +17,9 @@ __all__ = ["do_check", "T_POSSIBLE", "T_SUSPECT"]
T_POSSIBLE = 0.4 T_POSSIBLE = 0.4
T_SUSPECT = 0.75 T_SUSPECT = 0.75


def do_check():
query = Query()
def do_check(query=None):
if not query:
query = Query()
if query.lang: if query.lang:
query.lang = query.orig_lang = query.lang.lower() query.lang = query.orig_lang = query.lang.lower()
if "::" in query.lang: if "::" in query.lang:


+ 4
- 4
copyvios/highlighter.py Bestand weergeven

@@ -7,7 +7,7 @@ from markupsafe import escape


__all__ = ["highlight_delta"] __all__ = ["highlight_delta"]


def highlight_delta(context, chain, delta=None):
def highlight_delta(context, chain, delta):
degree = chain.degree - 1 degree = chain.degree - 1
highlights = [False] * degree highlights = [False] * degree
block = [chain.START] * degree block = [chain.START] * degree
@@ -26,7 +26,7 @@ def highlight_delta(context, chain, delta=None):


i = degree i = degree
numwords = len(chain.text.split()) numwords = len(chain.text.split())
processed = []
result = []
paragraphs = chain.text.split("\n") paragraphs = chain.text.split("\n")
while paragraphs: while paragraphs:
words = [] words = []
@@ -39,10 +39,10 @@ def highlight_delta(context, chain, delta=None):
words.append(_highlight_word(word, before, after, first, last)) words.append(_highlight_word(word, before, after, first, last))
else: else:
words.append(unicode(escape(word))) words.append(unicode(escape(word)))
processed.append(u" ".join(words))
result.append(u" ".join(words))
i += 1 i += 1


return u"<br /><br />".join(processed)
return u"<br /><br />".join(result)


def _get_next(paragraphs): def _get_next(paragraphs):
paragraph = paragraphs.pop(0) paragraph = paragraphs.pop(0)


+ 3
- 0
static/api.css Bestand weergeven

@@ -0,0 +1,3 @@
.code {
font-family: monospace;
}

+ 21
- 0
templates/api.mako Bestand weergeven

@@ -0,0 +1,21 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>API - Earwig's Copyvio Detector</title>
<link rel="stylesheet" href="${request.script_root}/static/api.min.css" type="text/css" />
</head>
<body>
% if help:
<div id="help">
<p>This is the first version of the <a href="//en.wikipedia.org/wiki/Application_programming_interface">API</a> for <a href="${request.script_root}">Earwig's Copyvio Detector</a>. It works, but some bugs might still need to be ironed out, so please <a href="https://github.com/earwig/copyvios/issues">report any</a> if you see them.</p>
</div>
% endif
% if result:
<div id="result">
<p>You are using <span class="code">jsonfm</span> output mode, which renders JSON data as a formatted HTML document. This is intended for testing and debugging only.</p>
<!-- walk tree -->
</div>
% endif
</body>
</html>

+ 4
- 4
templates/index.mako Bestand weergeven

@@ -30,14 +30,14 @@
<div id="info-box" class="red-box"> <div id="info-box" class="red-box">
<p>The given site (project=<b><span class="mono">${query.project | h}</span></b>, language=<b><span class="mono">${query.lang | h}</span></b>) doesn't seem to exist. It may also be closed or private. <a href="//${query.lang | h}.${query.project | h}.org/">Confirm its URL.</a></p> <p>The given site (project=<b><span class="mono">${query.project | h}</span></b>, language=<b><span class="mono">${query.lang | h}</span></b>) doesn't seem to exist. It may also be closed or private. <a href="//${query.lang | h}.${query.project | h}.org/">Confirm its URL.</a></p>
</div> </div>
% elif query.title and not result:
<div id="info-box" class="red-box">
<p>The given page doesn't seem to exist: <a href="${query.page.url}">${query.page.title | h}</a>.</p>
</div>
% elif query.oldid and not result: % elif query.oldid and not result:
<div id="info-box" class="red-box"> <div id="info-box" class="red-box">
<p>The given revision ID doesn't seem to exist: <a href="//${query.site.domain | h}/w/index.php?oldid=${query.oldid | h}">${query.oldid | h}</a>.</p> <p>The given revision ID doesn't seem to exist: <a href="//${query.site.domain | h}/w/index.php?oldid=${query.oldid | h}">${query.oldid | h}</a>.</p>
</div> </div>
% elif query.title and not result:
<div id="info-box" class="red-box">
<p>The given page doesn't seem to exist: <a href="${query.page.url}">${query.page.title | h}</a>.</p>
</div>
% endif % endif
%endif %endif
<p>This tool attempts to detect <a href="//en.wikipedia.org/wiki/WP:COPYVIO">copyright violations</a> in articles. In search mode, it will check for similar content elsewhere on the web using <a href="//developer.yahoo.com/boss/search/">Yahoo! BOSS</a> and/or external links present in the text of the page, depending on which options are selected. In comparison mode, the tool will skip the searching step and display a report comparing the article to the given webpage, like the <a href="//tools.wmflabs.org/dupdet/">Duplication Detector</a>.</p> <p>This tool attempts to detect <a href="//en.wikipedia.org/wiki/WP:COPYVIO">copyright violations</a> in articles. In search mode, it will check for similar content elsewhere on the web using <a href="//developer.yahoo.com/boss/search/">Yahoo! BOSS</a> and/or external links present in the text of the page, depending on which options are selected. In comparison mode, the tool will skip the searching step and display a report comparing the article to the given webpage, like the <a href="//tools.wmflabs.org/dupdet/">Duplication Detector</a>.</p>


+ 3
- 2
templates/support/footer.mako Bestand weergeven

@@ -1,8 +1,9 @@
<%! from flask import g %>\
<%! from flask import g, request %>\
</div> </div>
<div id="footer"> <div id="footer">
<p>Copyright &copy; 2009&ndash;2014 <a href="//en.wikipedia.org/wiki/User:The_Earwig">Ben Kurtovic</a> &bull; \ <p>Copyright &copy; 2009&ndash;2014 <a href="//en.wikipedia.org/wiki/User:The_Earwig">Ben Kurtovic</a> &bull; \
<a href="https://github.com/earwig/copyvios">View Source</a> &bull; \
<a href="${request.script_root}/api.json">API</a> &bull; \
<a href="https://github.com/earwig/copyvios">Source Code</a> &bull; \
% if ("CopyviosBackground" in g.cookies and g.cookies["CopyviosBackground"].value in ["potd", "list"]) or "CopyviosBackground" not in g.cookies: % if ("CopyviosBackground" in g.cookies and g.cookies["CopyviosBackground"].value in ["potd", "list"]) or "CopyviosBackground" not in g.cookies:
<a href="${g.descurl | h}">Background</a> &bull; \ <a href="${g.descurl | h}">Background</a> &bull; \
% endif % endif


Laden…
Annuleren
Opslaan