A copyright violation detector running on Wikimedia Cloud Services https://tools.wmflabs.org/copyvios/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

123 lines
4.5 KiB

  1. # -*- coding: utf-8 -*-
  2. from collections import OrderedDict
  3. from .checker import do_check, T_POSSIBLE, T_SUSPECT
  4. from .misc import Query, cache
  5. from .sites import update_sites
  6. __all__ = ["format_api_error", "handle_api_request"]
  7. _CHECK_ERRORS = {
  8. "no search method": "Either 'use_engine' or 'use_links' must be true",
  9. "no URL": "The parameter 'url' is required for URL comparisons",
  10. "bad URI": "The given URI scheme is unsupported",
  11. "no data": "No text could be found in the given URL (note that only HTML "
  12. "and plain text pages are supported, and content generated by "
  13. "JavaScript or found inside iframes is ignored)",
  14. "timeout": "The given URL timed out before any data could be retrieved",
  15. "search error": "An error occurred while using the search engine; try "
  16. "reloading or setting 'use_engine' to 0",
  17. }
  18. def _serialize_page(page):
  19. return OrderedDict((("title", page.title), ("url", page.url)))
  20. def _serialize_source(source, show_skip=True):
  21. if not source:
  22. return OrderedDict((
  23. ("url", None), ("confidence", 0.0), ("violation", "none")))
  24. conf = source.confidence
  25. data = OrderedDict((
  26. ("url", source.url),
  27. ("confidence", conf),
  28. ("violation", "suspected" if conf >= T_SUSPECT else
  29. "possible" if conf >= T_POSSIBLE else "none")
  30. ))
  31. if show_skip:
  32. data["skipped"] = source.skipped
  33. data["excluded"] = source.excluded
  34. return data
  35. def format_api_error(code, info):
  36. if isinstance(info, BaseException):
  37. info = type(info).__name__ + ": " + str(info)
  38. elif isinstance(info, unicode):
  39. info = info.encode("utf8")
  40. error_inner = OrderedDict((("code", code), ("info", info)))
  41. return OrderedDict((("status", "error"), ("error", error_inner)))
  42. def _hook_default(query):
  43. info = u"Unknown action: '{0}'".format(query.action.lower())
  44. return format_api_error("unknown_action", info)
  45. def _hook_check(query):
  46. do_check(query)
  47. if not query.submitted:
  48. info = ("The query parameters 'project', 'lang', and either 'title' "
  49. "or 'oldid' are required for checks")
  50. return format_api_error("missing_params", info)
  51. if query.error:
  52. info = _CHECK_ERRORS.get(query.error, "An unknown error occurred")
  53. return format_api_error(query.error.replace(" ", "_"), info)
  54. elif not query.site:
  55. info = (u"The given site (project={0}, lang={1}) either doesn't exist,"
  56. u" is closed, or is private").format(query.project, query.lang)
  57. return format_api_error("bad_site", info)
  58. elif not query.result:
  59. if query.oldid:
  60. info = u"The given revision ID doesn't seem to exist: {0}"
  61. return format_api_error("bad_oldid", info.format(query.oldid))
  62. else:
  63. info = u"The given page doesn't seem to exist: {0}"
  64. return format_api_error("bad_title", info.format(query.page.title))
  65. result = query.result
  66. data = OrderedDict((
  67. ("status", "ok"),
  68. ("meta", OrderedDict((
  69. ("time", result.time),
  70. ("queries", result.queries),
  71. ("cached", result.cached),
  72. ("redirected", bool(query.redirected_from))
  73. ))),
  74. ("page", _serialize_page(query.page))
  75. ))
  76. if result.cached:
  77. data["meta"]["cache_time"] = result.cache_time
  78. if query.redirected_from:
  79. data["original_page"] = _serialize_page(query.redirected_from)
  80. data["best"] = _serialize_source(result.best, show_skip=False)
  81. data["sources"] = [_serialize_source(source) for source in result.sources]
  82. return data
  83. def _hook_sites(query):
  84. update_sites()
  85. return OrderedDict((("status", "ok"),
  86. ("langs", cache.langs), ("projects", cache.projects)))
  87. _HOOKS = {
  88. "compare": _hook_check,
  89. "search": _hook_check,
  90. "sites": _hook_sites,
  91. }
  92. def handle_api_request():
  93. query = Query()
  94. if query.version:
  95. try:
  96. query.version = int(query.version)
  97. except ValueError:
  98. info = "The version string is invalid: {0}".format(query.version)
  99. return format_api_error("invalid_version", info)
  100. else:
  101. query.version = 1
  102. if query.version == 1:
  103. action = query.action.lower() if query.action else ""
  104. return _HOOKS.get(action, _hook_default)(query)
  105. info = "The API version is unsupported: {0}".format(query.version)
  106. return format_api_error("unsupported_version", info)