A copyright violation detector running on Wikimedia Cloud Services https://tools.wmflabs.org/copyvios/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

sites.py 4.6 KiB

12 jaren geleden
10 jaren geleden
10 jaren geleden
10 jaren geleden
10 jaren geleden
10 jaren geleden
10 jaren geleden
11 jaren geleden
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. # -*- coding: utf-8 -*-
  2. from time import time
  3. from urlparse import urlparse
  4. from earwigbot import exceptions
  5. from .misc import open_sql_connection
  6. def get_site(query):
  7. lang, project, name = query.lang, query.project, query.name
  8. wiki = query.bot.wiki
  9. if project not in [proj[0] for proj in query.all_projects]:
  10. return None
  11. if project == "wikimedia" and name: # Special sites:
  12. try:
  13. return wiki.get_site(name=name)
  14. except exceptions.SiteNotFoundError:
  15. try:
  16. return wiki.add_site(lang=lang, project=project)
  17. except (exceptions.APIError, exceptions.LoginError):
  18. return None
  19. try:
  20. return wiki.get_site(lang=lang, project=project)
  21. except exceptions.SiteNotFoundError:
  22. try:
  23. return wiki.add_site(lang=lang, project=project)
  24. except (exceptions.APIError, exceptions.LoginError):
  25. return None
  26. def get_sites(bot):
  27. max_staleness = 60 * 60 * 24 * 7
  28. conn = open_sql_connection(bot, "globals")
  29. query1 = "SELECT update_time FROM updates WHERE update_service = ?"
  30. query2 = "SELECT lang_code, lang_name FROM language"
  31. query3 = "SELECT project_code, project_name FROM project"
  32. with conn.cursor() as cursor:
  33. cursor.execute(query1, ("sites",))
  34. try:
  35. time_since_update = int(time() - cursor.fetchall()[0][0])
  36. except IndexError:
  37. time_since_update = time()
  38. if time_since_update > max_staleness:
  39. _update_sites(bot.wiki.get_site(), cursor)
  40. cursor.execute(query2)
  41. langs = []
  42. for code, name in cursor.fetchall():
  43. if "\U" in name:
  44. name = name.decode("unicode_escape")
  45. langs.append((code, name))
  46. cursor.execute(query3)
  47. projects = cursor.fetchall()
  48. return langs, projects
  49. def _update_sites(site, cursor):
  50. matrix = site.api_query(action="sitematrix")["sitematrix"]
  51. del matrix["count"]
  52. languages, projects = set(), set()
  53. for site in matrix.itervalues():
  54. if isinstance(site, list): # Special sites
  55. bad_sites = ["closed", "private", "fishbowl"]
  56. for special in site:
  57. if all([key not in special for key in bad_sites]):
  58. full = urlparse(special["url"]).netloc
  59. if full.count(".") == 1: # No subdomain, so use "www"
  60. lang, project = "www", full.split(".")[0]
  61. else:
  62. lang, project = full.rsplit(".", 2)[:2]
  63. code = u"{0}::{1}".format(lang, special["dbname"])
  64. name = special["code"].capitalize()
  65. languages.add((code, u"{0} ({1})".format(lang, name)))
  66. projects.add((project, project.capitalize()))
  67. continue
  68. this = set()
  69. for web in site["site"]:
  70. if "closed" in web:
  71. continue
  72. project = "wikipedia" if web["code"] == u"wiki" else web["code"]
  73. this.add((project, project.capitalize()))
  74. if this:
  75. code = site["code"]
  76. if "\U" in site["name"].encode("unicode_escape"):
  77. name = site["name"].encode("unicode_escape")
  78. else:
  79. name = site["name"]
  80. languages.add((code, u"{0} ({1})".format(code, name)))
  81. projects |= this
  82. _save_site_updates(cursor, languages, projects)
  83. def _save_site_updates(cursor, languages, projects):
  84. query1 = "SELECT lang_code, lang_name FROM language"
  85. query2 = "DELETE FROM language WHERE lang_code = ? AND lang_name = ?"
  86. query3 = "INSERT INTO language VALUES (?, ?)"
  87. query4 = "SELECT project_code, project_name FROM project"
  88. query5 = "DELETE FROM project WHERE project_code = ? AND project_name = ?"
  89. query6 = "INSERT INTO project VALUES (?, ?)"
  90. query7 = "SELECT 1 FROM updates WHERE update_service = ?"
  91. query8 = "UPDATE updates SET update_time = ? WHERE update_service = ?"
  92. query9 = "INSERT INTO updates VALUES (?, ?)"
  93. _synchronize_sites_with_db(cursor, languages, query1, query2, query3)
  94. _synchronize_sites_with_db(cursor, projects, query4, query5, query6)
  95. cursor.execute(query7, ("sites",))
  96. if cursor.fetchall():
  97. cursor.execute(query8, (time(), "sites"))
  98. else:
  99. cursor.execute(query9, ("sites", time()))
  100. def _synchronize_sites_with_db(cursor, updates, q_list, q_rmv, q_update):
  101. removals = []
  102. cursor.execute(q_list)
  103. for site in cursor:
  104. if site in updates:
  105. updates.remove(site)
  106. else:
  107. removals.append(site)
  108. cursor.executemany(q_rmv, removals)
  109. cursor.executemany(q_update, updates)