A copyright violation detector running on Wikimedia Cloud Services https://tools.wmflabs.org/copyvios/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

175 lines
4.7 KiB

  1. #! /usr/bin/env python
  2. import functools
  3. import hashlib
  4. import json
  5. import os
  6. import time
  7. import traceback
  8. from typing import Any
  9. from earwigbot.wiki.copyvios import globalize
  10. from flask import Response, make_response, render_template, request
  11. from copyvios import app
  12. from copyvios.api import format_api_error, handle_api_request
  13. from copyvios.attribution import get_attribution_info
  14. from copyvios.background import get_background
  15. from copyvios.cache import cache
  16. from copyvios.checker import (
  17. T_POSSIBLE,
  18. T_SUSPECT,
  19. CopyvioCheckError,
  20. ErrorCode,
  21. do_check,
  22. )
  23. from copyvios.cookies import get_cookies, get_new_cookies
  24. from copyvios.highlighter import highlight_delta
  25. from copyvios.misc import get_notice, get_permalink
  26. from copyvios.query import CheckQuery
  27. from copyvios.settings import process_settings
  28. from copyvios.sites import update_sites
  29. AnyResponse = Response | str | bytes
  30. app.logger.info(f"Flask server started {time.asctime()}")
  31. globalize(num_workers=8)
  32. @app.errorhandler(Exception)
  33. def handle_errors(exc: Exception) -> AnyResponse:
  34. if app.debug:
  35. raise # Use built-in debugger
  36. app.logger.exception("Caught exception:")
  37. return render_template("error.html.jinja", traceback=traceback.format_exc())
  38. @app.context_processor
  39. def setup_context() -> dict[str, Any]:
  40. return {
  41. "T_POSSIBLE": T_POSSIBLE,
  42. "T_SUSPECT": T_SUSPECT,
  43. "ErrorCode": ErrorCode,
  44. "cache": cache,
  45. "dump_json": json.dumps,
  46. "get_attribution_info": get_attribution_info,
  47. "get_background": get_background,
  48. "get_cookies": get_cookies,
  49. "get_notice": get_notice,
  50. "get_permalink": get_permalink,
  51. "highlight_delta": highlight_delta,
  52. }
  53. @app.after_request
  54. def add_new_cookies(response: Response) -> Response:
  55. for cookie in get_new_cookies():
  56. response.headers.add("Set-Cookie", cookie)
  57. return response
  58. @app.after_request
  59. def write_access_log(response: Response) -> Response:
  60. app.logger.debug(
  61. f"{time.asctime()} {request.method} {request.path} "
  62. f"{request.values.to_dict()} -> {response.status_code}"
  63. )
  64. return response
  65. @functools.lru_cache
  66. def _get_hash(path: str, mtime: float) -> str:
  67. # mtime is used as part of the cache key
  68. with open(path, "rb") as fp:
  69. return hashlib.sha1(fp.read()).hexdigest()
  70. def external_url_handler(
  71. error: Exception, endpoint: str, values: dict[str, Any]
  72. ) -> str:
  73. if endpoint == "static" and "file" in values:
  74. assert app.static_folder is not None
  75. path = os.path.join(app.static_folder, values["file"])
  76. mtime = os.path.getmtime(path)
  77. hashstr = _get_hash(path, mtime)
  78. return f"/static/{values['file']}?v={hashstr}"
  79. raise error
  80. app.url_build_error_handlers.append(external_url_handler)
  81. @app.route("/")
  82. def index() -> AnyResponse:
  83. update_sites()
  84. query = CheckQuery.from_get_args()
  85. try:
  86. result = do_check(query)
  87. error = None
  88. except CopyvioCheckError as exc:
  89. app.logger.exception(f"Copyvio check failed on {query}")
  90. result = None
  91. error = exc
  92. return render_template(
  93. "index.html.jinja",
  94. query=query,
  95. result=result,
  96. error=error,
  97. splash=not result,
  98. )
  99. @app.route("/settings", methods=["GET", "POST"])
  100. def settings() -> AnyResponse:
  101. status = process_settings() if request.method == "POST" else None
  102. update_sites()
  103. return render_template(
  104. "settings.html.jinja",
  105. status=status,
  106. default_site=cache.bot.wiki.get_site(),
  107. splash=True,
  108. )
  109. @app.route("/api")
  110. def api() -> AnyResponse:
  111. return render_template("api_help.html.jinja")
  112. @app.route("/api.json")
  113. def api_json() -> AnyResponse:
  114. if not request.args:
  115. return render_template("api_help.html.jinja")
  116. format = request.args.get("format", "json")
  117. if format in ["json", "jsonfm"]:
  118. update_sites()
  119. try:
  120. result = handle_api_request()
  121. except Exception as exc:
  122. app.logger.exception("API request failed")
  123. result = format_api_error("unhandled_exception", exc)
  124. else:
  125. errmsg = f"Unknown format: {format!r}"
  126. result = format_api_error("unknown_format", errmsg)
  127. if format == "jsonfm":
  128. return render_template("api_result.html.jinja", result=result)
  129. resp = make_response(json.dumps(result))
  130. resp.mimetype = "application/json"
  131. resp.headers["Access-Control-Allow-Origin"] = "*"
  132. return resp
  133. if app.debug:
  134. # Silence browser 404s when testing
  135. @app.route("/favicon.ico")
  136. def favicon() -> AnyResponse:
  137. return app.send_static_file("favicon.ico")
  138. if __name__ == "__main__":
  139. app.run()