A copyright violation detector running on Wikimedia Cloud Services https://tools.wmflabs.org/copyvios/
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 
 
 
 

122 linhas
4.7 KiB

  1. # -*- coding: utf-8 -*-
  2. from time import time
  3. from urlparse import urlparse
  4. from earwigbot import exceptions
  5. from flask import g
  6. from .misc import get_globals_db
  7. __all__ = ["get_site", "get_sites"]
  8. def get_site(query):
  9. lang, project, name = query.lang, query.project, query.name
  10. wiki = g.bot.wiki
  11. if project not in [proj[0] for proj in query.all_projects]:
  12. return None
  13. if project == "wikimedia" and name: # Special sites:
  14. try:
  15. return wiki.get_site(name=name)
  16. except exceptions.SiteNotFoundError:
  17. try:
  18. return wiki.add_site(lang=lang, project=project)
  19. except (exceptions.APIError, exceptions.LoginError):
  20. return None
  21. try:
  22. return wiki.get_site(lang=lang, project=project)
  23. except exceptions.SiteNotFoundError:
  24. try:
  25. return wiki.add_site(lang=lang, project=project)
  26. except (exceptions.APIError, exceptions.LoginError):
  27. return None
  28. def get_sites():
  29. max_staleness = 60 * 60 * 24 * 7
  30. conn = get_globals_db()
  31. query1 = "SELECT update_time FROM updates WHERE update_service = ?"
  32. query2 = "SELECT lang_code, lang_name FROM language"
  33. query3 = "SELECT project_code, project_name FROM project"
  34. with conn.cursor() as cursor:
  35. cursor.execute(query1, ("sites",))
  36. try:
  37. time_since_update = int(time() - cursor.fetchall()[0][0])
  38. except IndexError:
  39. time_since_update = time()
  40. if time_since_update > max_staleness:
  41. _update_sites(cursor)
  42. cursor.execute(query2)
  43. langs = []
  44. for code, name in cursor.fetchall():
  45. if "\U" in name:
  46. name = name.decode("unicode_escape")
  47. langs.append((code, name))
  48. cursor.execute(query3)
  49. projects = cursor.fetchall()
  50. return langs, projects
  51. def _update_sites(cursor):
  52. site = g.bot.wiki.get_site()
  53. matrix = site.api_query(action="sitematrix")["sitematrix"]
  54. del matrix["count"]
  55. languages, projects = set(), set()
  56. for site in matrix.itervalues():
  57. if isinstance(site, list): # Special sites
  58. bad_sites = ["closed", "private", "fishbowl"]
  59. for special in site:
  60. if all([key not in special for key in bad_sites]):
  61. full = urlparse(special["url"]).netloc
  62. if full.count(".") == 1: # No subdomain, so use "www"
  63. lang, project = "www", full.split(".")[0]
  64. else:
  65. lang, project = full.rsplit(".", 2)[:2]
  66. code = u"{0}::{1}".format(lang, special["dbname"])
  67. name = special["code"].capitalize()
  68. languages.add((code, u"{0} ({1})".format(lang, name)))
  69. projects.add((project, project.capitalize()))
  70. continue
  71. this = set()
  72. for web in site["site"]:
  73. if "closed" in web:
  74. continue
  75. project = "wikipedia" if web["code"] == u"wiki" else web["code"]
  76. this.add((project, project.capitalize()))
  77. if this:
  78. code = site["code"]
  79. if "\U" in site["name"].encode("unicode_escape"):
  80. name = site["name"].encode("unicode_escape")
  81. else:
  82. name = site["name"]
  83. languages.add((code, u"{0} ({1})".format(code, name)))
  84. projects |= this
  85. _save_site_updates(cursor, languages, projects)
  86. def _save_site_updates(cursor, languages, projects):
  87. query1 = "SELECT lang_code, lang_name FROM language"
  88. query2 = "DELETE FROM language WHERE lang_code = ? AND lang_name = ?"
  89. query3 = "INSERT INTO language VALUES (?, ?)"
  90. query4 = "SELECT project_code, project_name FROM project"
  91. query5 = "DELETE FROM project WHERE project_code = ? AND project_name = ?"
  92. query6 = "INSERT INTO project VALUES (?, ?)"
  93. query7 = "SELECT 1 FROM updates WHERE update_service = ?"
  94. query8 = "UPDATE updates SET update_time = ? WHERE update_service = ?"
  95. query9 = "INSERT INTO updates VALUES (?, ?)"
  96. _synchronize_sites_with_db(cursor, languages, query1, query2, query3)
  97. _synchronize_sites_with_db(cursor, projects, query4, query5, query6)
  98. cursor.execute(query7, ("sites",))
  99. if cursor.fetchall():
  100. cursor.execute(query8, (time(), "sites"))
  101. else:
  102. cursor.execute(query9, ("sites", time()))
  103. def _synchronize_sites_with_db(cursor, updates, q_list, q_rmv, q_update):
  104. removals = []
  105. cursor.execute(q_list)
  106. for site in cursor:
  107. if site in updates:
  108. updates.remove(site)
  109. else:
  110. removals.append(site)
  111. cursor.executemany(q_rmv, removals)
  112. cursor.executemany(q_update, updates)