A copyright violation detector running on Wikimedia Cloud Services https://tools.wmflabs.org/copyvios/
25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.

checker.py 3.3 KiB

12 yıl önce
10 yıl önce
10 yıl önce
10 yıl önce
10 yıl önce
11 yıl önce
11 yıl önce
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. # -*- coding: utf-8 -*-
  2. from datetime import datetime
  3. from hashlib import sha256
  4. from urlparse import urlparse
  5. from earwigbot import exceptions
  6. from .misc import Query, get_cache_db
  7. from .sites import get_site, get_sites
  8. __all__ = ["do_check"]
  9. def do_check():
  10. query = Query()
  11. if query.lang:
  12. query.lang = query.orig_lang = query.lang.lower()
  13. if "::" in query.lang:
  14. query.lang, query.name = query.lang.split("::", 1)
  15. if query.project:
  16. query.project = query.project.lower()
  17. query.all_langs, query.all_projects = get_sites()
  18. if query.project and query.lang and (query.title or query.oldid):
  19. query.site = get_site(query)
  20. if query.site:
  21. if query.title:
  22. _get_results(query)
  23. elif query.oldid:
  24. pass
  25. return query
  26. def _get_results(query):
  27. page = query.page = query.site.get_page(query.title)
  28. try:
  29. page.get() # Make sure that the page exists before we check it!
  30. except (exceptions.PageNotFoundError, exceptions.InvalidPageError):
  31. return
  32. if query.url:
  33. if urlparse(query.url).scheme not in ["http", "https"]:
  34. query.error = "bad URI"
  35. return
  36. query.result = page.copyvio_compare(query.url)
  37. query.result.cached = False
  38. else:
  39. conn = get_cache_db()
  40. if not query.nocache:
  41. query.result = _get_cached_results(page, conn)
  42. if not query.result:
  43. query.result = page.copyvio_check(max_queries=10, max_time=45)
  44. query.result.cached = False
  45. _cache_result(page, query.result, conn)
  46. def _get_cached_results(page, conn):
  47. query1 = "DELETE FROM cache WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)"
  48. query2 = "SELECT cache_url, cache_time, cache_queries, cache_process_time FROM cache WHERE cache_id = ? AND cache_hash = ?"
  49. shahash = sha256(page.get().encode("utf8")).hexdigest()
  50. with conn.cursor() as cursor:
  51. cursor.execute(query1)
  52. cursor.execute(query2, (page.pageid, shahash))
  53. results = cursor.fetchall()
  54. if not results:
  55. return None
  56. url, cache_time, num_queries, original_time = results[0]
  57. result = page.copyvio_compare(url)
  58. result.cached = True
  59. result.queries = num_queries
  60. result.original_time = original_time
  61. result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC")
  62. result.cache_age = _format_date(cache_time)
  63. return result
  64. def _format_date(cache_time):
  65. diff = datetime.utcnow() - cache_time
  66. if diff.seconds > 3600:
  67. return "{0} hours".format(diff.seconds / 3600)
  68. if diff.seconds > 60:
  69. return "{0} minutes".format(diff.seconds / 60)
  70. return "{0} seconds".format(diff.seconds)
  71. def _cache_result(page, result, conn):
  72. pageid = page.pageid
  73. shahash = sha256(page.get().encode("utf8")).hexdigest()
  74. query1 = "SELECT 1 FROM cache WHERE cache_id = ?"
  75. query2 = "DELETE FROM cache WHERE cache_id = ?"
  76. query3 = "INSERT INTO cache VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?, ?)"
  77. with conn.cursor() as cursor:
  78. cursor.execute(query1, (pageid,))
  79. if cursor.fetchall():
  80. cursor.execute(query2, (pageid,))
  81. cursor.execute(query3, (pageid, shahash, result.url, result.queries,
  82. result.time))