A copyright violation detector running on Wikimedia Cloud Services https://tools.wmflabs.org/copyvios/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

117 lines
4.6 KiB

  1. # -*- coding: utf-8 -*-
  2. from time import time
  3. from urlparse import urlparse
  4. from earwigbot import exceptions
  5. from .misc import open_sql_connection
  6. def get_site(bot, query, all_projects):
  7. lang, project, name = query.lang, query.project, query.name
  8. if project not in [proj[0] for proj in all_projects]:
  9. return None
  10. if project == "wikimedia" and name: # Special sites:
  11. try:
  12. return bot.wiki.get_site(name=name)
  13. except exceptions.SiteNotFoundError:
  14. try:
  15. return bot.wiki.add_site(lang=lang, project=project)
  16. except (exceptions.APIError, exceptions.LoginError):
  17. return None
  18. try:
  19. return bot.wiki.get_site(lang=lang, project=project)
  20. except exceptions.SiteNotFoundError:
  21. try:
  22. return bot.wiki.add_site(lang=lang, project=project)
  23. except (exceptions.APIError, exceptions.LoginError):
  24. return None
  25. def get_sites(bot):
  26. max_staleness = 60 * 60 * 24 * 7
  27. conn = open_sql_connection(bot, "globals")
  28. query1 = "SELECT update_time FROM updates WHERE update_service = ?"
  29. query2 = "SELECT lang_code, lang_name FROM language"
  30. query3 = "SELECT project_code, project_name FROM project"
  31. with conn.cursor() as cursor:
  32. cursor.execute(query1, ("sites",))
  33. try:
  34. time_since_update = int(time() - cursor.fetchall()[0][0])
  35. except IndexError:
  36. time_since_update = time()
  37. if time_since_update > max_staleness:
  38. _update_sites(bot.wiki.get_site(), cursor)
  39. cursor.execute(query2)
  40. langs = []
  41. for code, name in cursor.fetchall():
  42. if "\U" in name:
  43. name = name.decode("unicode_escape")
  44. langs.append((code, name))
  45. cursor.execute(query3)
  46. projects = cursor.fetchall()
  47. return langs, projects
  48. def _update_sites(site, cursor):
  49. matrix = site.api_query(action="sitematrix")["sitematrix"]
  50. del matrix["count"]
  51. languages, projects = set(), set()
  52. for site in matrix.itervalues():
  53. if isinstance(site, list): # Special sites
  54. bad_sites = ["closed", "private", "fishbowl"]
  55. for special in site:
  56. if all([key not in special for key in bad_sites]):
  57. full = urlparse(special["url"]).netloc
  58. if full.count(".") == 1: # No subdomain, so use "www"
  59. lang, project = "www", full.split(".")[0]
  60. else:
  61. lang, project = full.rsplit(".", 2)[:2]
  62. code = u"{0}::{1}".format(lang, special["dbname"])
  63. name = special["code"].capitalize()
  64. languages.add((code, u"{0} ({1})".format(lang, name)))
  65. projects.add((project, project.capitalize()))
  66. continue
  67. this = set()
  68. for web in site["site"]:
  69. if "closed" in web:
  70. continue
  71. project = "wikipedia" if web["code"] == u"wiki" else web["code"]
  72. this.add((project, project.capitalize()))
  73. if this:
  74. code = site["code"]
  75. if "\U" in site["name"].encode("unicode_escape"):
  76. name = site["name"].encode("unicode_escape")
  77. else:
  78. name = site["name"]
  79. languages.add((code, u"{0} ({1})".format(code, name)))
  80. projects |= this
  81. _save_site_updates(cursor, languages, projects)
  82. def _save_site_updates(cursor, languages, projects):
  83. query1 = "SELECT lang_code, lang_name FROM language"
  84. query2 = "DELETE FROM language WHERE lang_code = ? AND lang_name = ?"
  85. query3 = "INSERT INTO language VALUES (?, ?)"
  86. query4 = "SELECT project_code, project_name FROM project"
  87. query5 = "DELETE FROM project WHERE project_code = ? AND project_name = ?"
  88. query6 = "INSERT INTO project VALUES (?, ?)"
  89. query7 = "SELECT 1 FROM updates WHERE update_service = ?"
  90. query8 = "UPDATE updates SET update_time = ? WHERE update_service = ?"
  91. query9 = "INSERT INTO updates VALUES (?, ?)"
  92. _synchronize_sites_with_db(cursor, languages, query1, query2, query3)
  93. _synchronize_sites_with_db(cursor, projects, query4, query5, query6)
  94. cursor.execute(query7, ("sites",))
  95. if cursor.fetchall():
  96. cursor.execute(query8, (time(), "sites"))
  97. else:
  98. cursor.execute(query9, ("sites", time()))
  99. def _synchronize_sites_with_db(cursor, updates, q_list, q_rmv, q_update):
  100. removals = []
  101. cursor.execute(q_list)
  102. for site in cursor:
  103. if site in updates:
  104. updates.remove(site)
  105. else:
  106. removals.append(site)
  107. cursor.executemany(q_rmv, removals)
  108. cursor.executemany(q_update, updates)