diff --git a/copyvios/api.py b/copyvios/api.py
index 1d803dd..0be95e1 100644
--- a/copyvios/api.py
+++ b/copyvios/api.py
@@ -2,6 +2,7 @@
from collections import OrderedDict
+from .highlighter import highlight_delta
from .checker import do_check, T_POSSIBLE, T_SUSPECT
from .misc import Query, cache
from .sites import update_sites
@@ -40,6 +41,11 @@ def _serialize_source(source, show_skip=True):
data["excluded"] = source.excluded
return data
+def _serialize_detail(result):
+ article = highlight_delta(result.article_chain, result.best.chains[1])
+ source = highlight_delta(result.best.chains[0], result.best.chains[1])
+ return OrderedDict((("article", article), ("source", source)))
+
def format_api_error(code, info):
if isinstance(info, BaseException):
info = type(info).__name__ + ": " + str(info)
@@ -90,12 +96,15 @@ def _hook_check(query):
data["original_page"] = _serialize_page(query.redirected_from)
data["best"] = _serialize_source(result.best, show_skip=False)
data["sources"] = [_serialize_source(source) for source in result.sources]
+ if query.detail in ("1", "true"):
+ data["detail"] = _serialize_detail(result)
return data
def _hook_sites(query):
update_sites()
- return OrderedDict((("status", "ok"),
- ("langs", cache.langs), ("projects", cache.projects)))
+ return OrderedDict((
+ ("status", "ok"), ("langs", cache.langs), ("projects", cache.projects)
+ ))
_HOOKS = {
"compare": _hook_check,
diff --git a/templates/api.mako b/templates/api.mako
index 6f4732e..7111462 100644
--- a/templates/api.mako
+++ b/templates/api.mako
@@ -112,6 +112,12 @@
Yes |
The URL of the suspected violation source that will be compared to the page. |
+
+ detail |
+ boolean |
+ No (default: false) |
+ Whether to include the detailed HTML text comparison available in the regular interface. If not, only the confidence percentage is available. |
+
@@ -219,7 +225,11 @@
"excluded": boolean whether the source was skipped for being in the excluded URL list
},
...
- ]
+ ],
+ only if action=compare and detail=true "detail": {
+ "article": string article text, with shared passages marked with HTML,
+ "source": string best source text, with shared passages marked with HTML
+ }
}
In the case of action=search, sources will contain one entry for each source checked (or skipped if the check ends early), sorted in order of confidence, with skipped and excluded sources at the bottom.
In the case of action=compare, best will always contain information about the URL that was given, so response["best"]["url"] will never be null. Also, sources will always contain one entry, with the same data as best, since only one source is checked in comparison mode.
@@ -241,11 +251,8 @@
...
]
}
- Caveats
-
- - There is currently no way to get the contents of the article or suspected source, nor can you get the data behind the visual comparison available from the main tool. This may be changed in a future version if there is sufficient demand for it.
- - Requests are typically not rate-limited, but the tool uses the same workers to handle all requests, so making simultaneous API calls is only going to slow you down. In general, you are fine making an unlimited number of requests, as long as they are not concurrent and you wait a few seconds between them.
-
+ Etiquette
+ The tool uses the same workers to handle all requests, so making concurrent API calls is only going to slow you down. Most operations are not rate-limited, but full searches with use_engine=True are globally limited to a few thousand per day. Be respectful!
Example
https://tools.wmflabs.org/copyvios/api.json?version=1&action=search&project=wikipedia&lang=en&title=User:EarwigBot/Copyvios/Tests/2
{