A copyright violation detector running on Wikimedia Cloud Services https://tools.wmflabs.org/copyvios/
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

vor 12 Jahren
vor 10 Jahren
vor 10 Jahren
vor 10 Jahren
vor 10 Jahren
vor 10 Jahren
vor 11 Jahren
vor 11 Jahren
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. # -*- coding: utf-8 -*-
  2. from datetime import datetime
  3. from hashlib import sha256
  4. from urlparse import urlparse
  5. from earwigbot import exceptions
  6. from .misc import get_bot, Query, open_sql_connection
  7. from .sites import get_site, get_sites
  8. def do_check():
  9. query = Query()
  10. if query.lang:
  11. query.lang = query.orig_lang = query.lang.lower()
  12. if "::" in query.lang:
  13. query.lang, query.name = query.lang.split("::", 1)
  14. if query.project:
  15. query.project = query.project.lower()
  16. query.bot = get_bot()
  17. query.all_langs, query.all_projects = get_sites(query.bot)
  18. if query.project and query.lang and query.title: # TODO: and (query.title or query.oldid): ...
  19. query.site = get_site(query)
  20. if query.site:
  21. _get_results(query)
  22. return query
  23. def _get_results(query):
  24. page = query.page = query.site.get_page(query.title)
  25. try:
  26. page.get() # Make sure that the page exists before we check it!
  27. except (exceptions.PageNotFoundError, exceptions.InvalidPageError):
  28. return
  29. if query.url:
  30. if urlparse(query.url).scheme not in ["http", "https"]:
  31. query.result = "bad URI"
  32. return
  33. query.result = page.copyvio_compare(query.url)
  34. query.result.cached = False
  35. else:
  36. conn = open_sql_connection(query.bot, "cache")
  37. if not query.nocache:
  38. query.result = _get_cached_results(page, conn)
  39. if not query.result:
  40. query.result = page.copyvio_check(max_queries=10, max_time=45)
  41. query.result.cached = False
  42. _cache_result(page, query.result, conn)
  43. def _get_cached_results(page, conn):
  44. query1 = "DELETE FROM cache WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)"
  45. query2 = "SELECT cache_url, cache_time, cache_queries, cache_process_time FROM cache WHERE cache_id = ? AND cache_hash = ?"
  46. shahash = sha256(page.get().encode("utf8")).hexdigest()
  47. with conn.cursor() as cursor:
  48. cursor.execute(query1)
  49. cursor.execute(query2, (page.pageid, shahash))
  50. results = cursor.fetchall()
  51. if not results:
  52. return None
  53. url, cache_time, num_queries, original_time = results[0]
  54. result = page.copyvio_compare(url)
  55. result.cached = True
  56. result.queries = num_queries
  57. result.original_time = original_time
  58. result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC")
  59. result.cache_age = _format_date(cache_time)
  60. return result
  61. def _format_date(cache_time):
  62. diff = datetime.utcnow() - cache_time
  63. if diff.seconds > 3600:
  64. return "{0} hours".format(diff.seconds / 3600)
  65. if diff.seconds > 60:
  66. return "{0} minutes".format(diff.seconds / 60)
  67. return "{0} seconds".format(diff.seconds)
  68. def _cache_result(page, result, conn):
  69. pageid = page.pageid
  70. shahash = sha256(page.get().encode("utf8")).hexdigest()
  71. query1 = "SELECT 1 FROM cache WHERE cache_id = ?"
  72. query2 = "DELETE FROM cache WHERE cache_id = ?"
  73. query3 = "INSERT INTO cache VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?, ?)"
  74. with conn.cursor() as cursor:
  75. cursor.execute(query1, (pageid,))
  76. if cursor.fetchall():
  77. cursor.execute(query2, (pageid,))
  78. cursor.execute(query3, (pageid, shahash, result.url, result.queries,
  79. result.time))