A copyright violation detector running on Wikimedia Cloud Services https://tools.wmflabs.org/copyvios/
25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.
 
 
 
 
 

96 satır
3.3 KiB

  1. # -*- coding: utf-8 -*-
  2. from datetime import datetime
  3. from hashlib import sha256
  4. from urlparse import urlparse
  5. from earwigbot import exceptions
  6. from .misc import Query, get_cache_db
  7. from .sites import get_site, get_sites
  8. __all__ = ["do_check"]
  9. def do_check():
  10. query = Query()
  11. if query.lang:
  12. query.lang = query.orig_lang = query.lang.lower()
  13. if "::" in query.lang:
  14. query.lang, query.name = query.lang.split("::", 1)
  15. if query.project:
  16. query.project = query.project.lower()
  17. query.all_langs, query.all_projects = get_sites()
  18. if query.project and query.lang and (query.title or query.oldid):
  19. query.site = get_site(query)
  20. if query.site:
  21. if query.title:
  22. _get_results(query)
  23. elif query.oldid:
  24. pass
  25. return query
  26. def _get_results(query):
  27. page = query.page = query.site.get_page(query.title)
  28. try:
  29. page.get() # Make sure that the page exists before we check it!
  30. except (exceptions.PageNotFoundError, exceptions.InvalidPageError):
  31. return
  32. if query.url:
  33. if urlparse(query.url).scheme not in ["http", "https"]:
  34. query.error = "bad URI"
  35. return
  36. query.result = page.copyvio_compare(query.url)
  37. query.result.cached = False
  38. else:
  39. conn = get_cache_db()
  40. if not query.nocache:
  41. query.result = _get_cached_results(page, conn)
  42. if not query.result:
  43. query.result = page.copyvio_check(max_queries=10, max_time=45)
  44. query.result.cached = False
  45. _cache_result(page, query.result, conn)
  46. def _get_cached_results(page, conn):
  47. query1 = "DELETE FROM cache WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)"
  48. query2 = "SELECT cache_url, cache_time, cache_queries, cache_process_time FROM cache WHERE cache_id = ? AND cache_hash = ?"
  49. shahash = sha256(page.get().encode("utf8")).hexdigest()
  50. with conn.cursor() as cursor:
  51. cursor.execute(query1)
  52. cursor.execute(query2, (page.pageid, shahash))
  53. results = cursor.fetchall()
  54. if not results:
  55. return None
  56. url, cache_time, num_queries, original_time = results[0]
  57. result = page.copyvio_compare(url)
  58. result.cached = True
  59. result.queries = num_queries
  60. result.original_time = original_time
  61. result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC")
  62. result.cache_age = _format_date(cache_time)
  63. return result
  64. def _format_date(cache_time):
  65. diff = datetime.utcnow() - cache_time
  66. if diff.seconds > 3600:
  67. return "{0} hours".format(diff.seconds / 3600)
  68. if diff.seconds > 60:
  69. return "{0} minutes".format(diff.seconds / 60)
  70. return "{0} seconds".format(diff.seconds)
  71. def _cache_result(page, result, conn):
  72. pageid = page.pageid
  73. shahash = sha256(page.get().encode("utf8")).hexdigest()
  74. query1 = "SELECT 1 FROM cache WHERE cache_id = ?"
  75. query2 = "DELETE FROM cache WHERE cache_id = ?"
  76. query3 = "INSERT INTO cache VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?, ?)"
  77. with conn.cursor() as cursor:
  78. cursor.execute(query1, (pageid,))
  79. if cursor.fetchall():
  80. cursor.execute(query2, (pageid,))
  81. cursor.execute(query3, (pageid, shahash, result.url, result.queries,
  82. result.time))