diff --git a/copyvios/api.py b/copyvios/api.py index 1d803dd..0be95e1 100644 --- a/copyvios/api.py +++ b/copyvios/api.py @@ -2,6 +2,7 @@ from collections import OrderedDict +from .highlighter import highlight_delta from .checker import do_check, T_POSSIBLE, T_SUSPECT from .misc import Query, cache from .sites import update_sites @@ -40,6 +41,11 @@ def _serialize_source(source, show_skip=True): data["excluded"] = source.excluded return data +def _serialize_detail(result): + article = highlight_delta(result.article_chain, result.best.chains[1]) + source = highlight_delta(result.best.chains[0], result.best.chains[1]) + return OrderedDict((("article", article), ("source", source))) + def format_api_error(code, info): if isinstance(info, BaseException): info = type(info).__name__ + ": " + str(info) @@ -90,12 +96,15 @@ def _hook_check(query): data["original_page"] = _serialize_page(query.redirected_from) data["best"] = _serialize_source(result.best, show_skip=False) data["sources"] = [_serialize_source(source) for source in result.sources] + if query.detail in ("1", "true"): + data["detail"] = _serialize_detail(result) return data def _hook_sites(query): update_sites() - return OrderedDict((("status", "ok"), - ("langs", cache.langs), ("projects", cache.projects))) + return OrderedDict(( + ("status", "ok"), ("langs", cache.langs), ("projects", cache.projects) + )) _HOOKS = { "compare": _hook_check, diff --git a/templates/api.mako b/templates/api.mako index 6f4732e..7111462 100644 --- a/templates/api.mako +++ b/templates/api.mako @@ -112,6 +112,12 @@ Yes The URL of the suspected violation source that will be compared to the page. + + detail + boolean + No (default: false) + Whether to include the detailed HTML text comparison available in the regular interface. If not, only the confidence percentage is available. + @@ -219,7 +225,11 @@ "excluded": booleanwhether the source was skipped for being in the excluded URL list }, ... - ] + ], + only if action=compare and detail=true "detail": { + "article": stringarticle text, with shared passages marked with HTML, + "source": stringbest source text, with shared passages marked with HTML + } }

In the case of action=search, sources will contain one entry for each source checked (or skipped if the check ends early), sorted in order of confidence, with skipped and excluded sources at the bottom.

In the case of action=compare, best will always contain information about the URL that was given, so response["best"]["url"] will never be null. Also, sources will always contain one entry, with the same data as best, since only one source is checked in comparison mode.

@@ -241,11 +251,8 @@ ... ] } -

Caveats

- +

Etiquette

+ The tool uses the same workers to handle all requests, so making concurrent API calls is only going to slow you down. Most operations are not rate-limited, but full searches with use_engine=True are globally limited to a few thousand per day. Be respectful!

Example

https://tools.wmflabs.org/copyvios/api.json?version=1&action=search&project=wikipedia&lang=en&title=User:EarwigBot/Copyvios/Tests/2

{