A copyright violation detector running on Wikimedia Cloud Services https://tools.wmflabs.org/copyvios/
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.
 
 
 
 
 

74 строки
2.7 KiB

  1. # -*- coding: utf-8 -*-
  2. from datetime import datetime
  3. from hashlib import sha256
  4. from urlparse import urlparse
  5. from earwigbot import exceptions
  6. from .misc import open_sql_connection
  7. def get_results(query):
  8. page = query.page = query.site.get_page(query.title)
  9. try:
  10. page.get() # Make sure that the page exists before we check it!
  11. except (exceptions.PageNotFoundError, exceptions.InvalidPageError):
  12. return
  13. if query.url:
  14. if urlparse(query.url).scheme not in ["http", "https"]:
  15. query.result = "bad URI"
  16. return
  17. query.result = page.copyvio_compare(query.url)
  18. query.result.cached = False
  19. else:
  20. conn = open_sql_connection(query.bot, "cache")
  21. if not query.nocache:
  22. query.result = _get_cached_results(page, conn)
  23. if not query.result:
  24. query.result = page.copyvio_check(max_queries=10, max_time=45)
  25. query.result.cached = False
  26. _cache_result(page, query.result, conn)
  27. def _get_cached_results(page, conn):
  28. query1 = "DELETE FROM cache WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)"
  29. query2 = "SELECT cache_url, cache_time, cache_queries, cache_process_time FROM cache WHERE cache_id = ? AND cache_hash = ?"
  30. shahash = sha256(page.get().encode("utf8")).hexdigest()
  31. with conn.cursor() as cursor:
  32. cursor.execute(query1)
  33. cursor.execute(query2, (page.pageid, shahash))
  34. results = cursor.fetchall()
  35. if not results:
  36. return None
  37. url, cache_time, num_queries, original_time = results[0]
  38. result = page.copyvio_compare(url)
  39. result.cached = True
  40. result.queries = num_queries
  41. result.original_time = original_time
  42. result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC")
  43. result.cache_age = _format_date(cache_time)
  44. return result
  45. def _format_date(cache_time):
  46. diff = datetime.utcnow() - cache_time
  47. if diff.seconds > 3600:
  48. return "{0} hours".format(diff.seconds / 3600)
  49. if diff.seconds > 60:
  50. return "{0} minutes".format(diff.seconds / 60)
  51. return "{0} seconds".format(diff.seconds)
  52. def _cache_result(page, result, conn):
  53. pageid = page.pageid
  54. shahash = sha256(page.get().encode("utf8")).hexdigest()
  55. query1 = "SELECT 1 FROM cache WHERE cache_id = ?"
  56. query2 = "DELETE FROM cache WHERE cache_id = ?"
  57. query3 = "INSERT INTO cache VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?, ?)"
  58. with conn.cursor() as cursor:
  59. cursor.execute(query1, (pageid,))
  60. if cursor.fetchall():
  61. cursor.execute(query2, (pageid,))
  62. cursor.execute(query3, (pageid, shahash, result.url, result.queries,
  63. result.time))