A copyright violation detector running on Wikimedia Cloud Services https://tools.wmflabs.org/copyvios/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

117 lines
4.2 KiB

  1. # -*- coding: utf-8 -*-
  2. from .checker import do_check, T_POSSIBLE, T_SUSPECT
  3. from .misc import Query
  4. from .sites import get_sites
  5. __all__ = ["format_api_error", "handle_api_request"]
  6. _HOOKS = {
  7. "compare": _hook_check,
  8. "search": _hook_check,
  9. "sites": _hook_sites,
  10. }
  11. _CHECK_ERRORS = {
  12. "no search method": "Either 'use_engine' or 'use_links' must be true",
  13. "no URL": "The parameter 'url' is required for URL comparisons",
  14. "bad URI": "The given URI scheme is unsupported",
  15. "no data": "No text could be found in the given URL (note that only HTML "
  16. "and plain text pages are supported, and content generated by "
  17. "JavaScript or found inside iframes is ignored)",
  18. "timeout": "The given URL timed out before any data could be retrieved",
  19. "search error": "An error occurred while using the search engine; try "
  20. "reloading or setting 'use_engine' to 0",
  21. }
  22. def _serialize_page(page):
  23. return {"title": page.title, "url": page.url}
  24. def _serialize_source(source, show_skip=True):
  25. if not source:
  26. return {"url": None, "confidence": 0.0, "violation": "none"}
  27. conf = source.confidence
  28. data = {
  29. "url": source.url,
  30. "confidence": conf,
  31. "violation": "suspected" if conf >= T_SUSPECT else
  32. "possible" if conf >= T_POSSIBLE else "none"
  33. }
  34. if show_skip:
  35. data["skipped"] = source.skipped
  36. return data
  37. def format_api_error(code, info):
  38. if isinstance(info, BaseException):
  39. info = type(info).__name__ + ": " + str(info)
  40. elif isinstance(info, unicode):
  41. info = info.encode("utf8")
  42. return {"status": "error", "error": {"code": code, "info": info}}
  43. def handle_api_request():
  44. query = Query()
  45. if query.version:
  46. try:
  47. query.version = int(query.version)
  48. except ValueError:
  49. info = "The version string is invalid: {0}".format(query.version)
  50. return format_api_error("invalid_version", info)
  51. else:
  52. query.version = 1
  53. if query.version == 1:
  54. action = query.action.lower() if query.action else ""
  55. return _HOOKS.get(action, _hook_default)(query)
  56. info = "The API version is unsupported: {0}".format(query.version)
  57. return format_api_error("unsupported_version", info)
  58. def _hook_default(query):
  59. info = u"Unknown action: '{0}'".format(query.action.lower())
  60. return format_api_error("unknown_action", info)
  61. def _hook_check(query):
  62. do_check(query)
  63. if not query.submitted:
  64. info = ("The query parameters 'project', 'lang', and either 'title' "
  65. "or 'oldid' are required for checks")
  66. return format_api_error("missing_params", info)
  67. if query.error:
  68. info = _CHECK_ERRORS.get(query.error, "An unknown error occurred")
  69. return format_api_error(query.error.replace(" ", "_"), info)
  70. elif not query.site:
  71. info = (u"The given site (project={0}, lang={1}) either doesn't exist,"
  72. u" is closed, or is private").format(query.project, query.lang)
  73. return format_api_error("bad_site", info)
  74. elif not query.result:
  75. if query.oldid:
  76. info = u"The given revision ID doesn't seem to exist: {0}"
  77. return format_api_error("bad_oldid", info.format(query.oldid))
  78. else:
  79. info = u"The given page doesn't seem to exist: {0}"
  80. return format_api_error("bad_title", info.format(query.page.title))
  81. result = query.result
  82. data = {
  83. "status": "ok",
  84. "meta": {
  85. "time": result.time,
  86. "queries": result.queries,
  87. "cached": result.cached,
  88. "redirected": bool(query.redirected_from)
  89. },
  90. "page": _serialize_page(query.page),
  91. "best": _serialize_source(result.best, show_skip=False),
  92. "sources": [_serialize_source(source) for source in result.sources]
  93. }
  94. if result.cached:
  95. data["meta"]["cache_time"] = result.cache_time
  96. if query.redirected_from:
  97. data["original_page"] = _serialize_page(query.redirected_from)
  98. return data
  99. def _hook_sites(query):
  100. langs, projects = get_sites()
  101. return {"status": "ok", "langs": langs, "projects": projects}