A copyright violation detector running on Wikimedia Cloud Services https://tools.wmflabs.org/copyvios/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

122 lines
4.4 KiB

  1. # -*- coding: utf-8 -*-
  2. from collections import OrderedDict
  3. from .checker import do_check, T_POSSIBLE, T_SUSPECT
  4. from .misc import Query
  5. from .sites import get_sites
  6. __all__ = ["format_api_error", "handle_api_request"]
  7. _CHECK_ERRORS = {
  8. "no search method": "Either 'use_engine' or 'use_links' must be true",
  9. "no URL": "The parameter 'url' is required for URL comparisons",
  10. "bad URI": "The given URI scheme is unsupported",
  11. "no data": "No text could be found in the given URL (note that only HTML "
  12. "and plain text pages are supported, and content generated by "
  13. "JavaScript or found inside iframes is ignored)",
  14. "timeout": "The given URL timed out before any data could be retrieved",
  15. "search error": "An error occurred while using the search engine; try "
  16. "reloading or setting 'use_engine' to 0",
  17. }
  18. def _serialize_page(page):
  19. return OrderedDict(("title", page.title), ("url", page.url))
  20. def _serialize_source(source, show_skip=True):
  21. if not source:
  22. return OrderedDict(("url", None), ("confidence", 0.0),
  23. ("violation", "none"))
  24. conf = source.confidence
  25. data = OrderedDict(
  26. ("url", source.url),
  27. ("confidence", conf),
  28. ("violation", "suspected" if conf >= T_SUSPECT else
  29. "possible" if conf >= T_POSSIBLE else "none")
  30. )
  31. if show_skip:
  32. data["skipped"] = source.skipped
  33. return data
  34. def format_api_error(code, info):
  35. if isinstance(info, BaseException):
  36. info = type(info).__name__ + ": " + str(info)
  37. elif isinstance(info, unicode):
  38. info = info.encode("utf8")
  39. error_inner = OrderedDict(("code", code), ("info", info))
  40. return OrderedDict(("status", "error"), ("error", error_inner))
  41. def _hook_default(query):
  42. info = u"Unknown action: '{0}'".format(query.action.lower())
  43. return format_api_error("unknown_action", info)
  44. def _hook_check(query):
  45. do_check(query)
  46. if not query.submitted:
  47. info = ("The query parameters 'project', 'lang', and either 'title' "
  48. "or 'oldid' are required for checks")
  49. return format_api_error("missing_params", info)
  50. if query.error:
  51. info = _CHECK_ERRORS.get(query.error, "An unknown error occurred")
  52. return format_api_error(query.error.replace(" ", "_"), info)
  53. elif not query.site:
  54. info = (u"The given site (project={0}, lang={1}) either doesn't exist,"
  55. u" is closed, or is private").format(query.project, query.lang)
  56. return format_api_error("bad_site", info)
  57. elif not query.result:
  58. if query.oldid:
  59. info = u"The given revision ID doesn't seem to exist: {0}"
  60. return format_api_error("bad_oldid", info.format(query.oldid))
  61. else:
  62. info = u"The given page doesn't seem to exist: {0}"
  63. return format_api_error("bad_title", info.format(query.page.title))
  64. result = query.result
  65. data = OrderedDict(
  66. ("status", "ok"),
  67. ("meta", OrderedDict(
  68. ("time", result.time),
  69. ("queries", result.queries),
  70. ("cached", result.cached),
  71. ("redirected", bool(query.redirected_from))
  72. )),
  73. ("page", _serialize_page(query.page))
  74. )
  75. if result.cached:
  76. data["meta"]["cache_time"] = result.cache_time
  77. if query.redirected_from:
  78. data["original_page"] = _serialize_page(query.redirected_from)
  79. data["best"] = _serialize_source(result.best, show_skip=False)
  80. data["sources"] = [_serialize_source(source) for source in result.sources]
  81. return data
  82. def _hook_sites(query):
  83. langs, projects = get_sites()
  84. return OrderedDict(("status", "ok"), ("langs", langs),
  85. ("projects", projects))
  86. _HOOKS = {
  87. "compare": _hook_check,
  88. "search": _hook_check,
  89. "sites": _hook_sites,
  90. }
  91. def handle_api_request():
  92. query = Query()
  93. if query.version:
  94. try:
  95. query.version = int(query.version)
  96. except ValueError:
  97. info = "The version string is invalid: {0}".format(query.version)
  98. return format_api_error("invalid_version", info)
  99. else:
  100. query.version = 1
  101. if query.version == 1:
  102. action = query.action.lower() if query.action else ""
  103. return _HOOKS.get(action, _hook_default)(query)
  104. info = "The API version is unsupported: {0}".format(query.version)
  105. return format_api_error("unsupported_version", info)