A copyright violation detector running on Wikimedia Cloud Services https://tools.wmflabs.org/copyvios/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

74 lines
2.7 KiB

  1. # -*- coding: utf-8 -*-
  2. from time import time
  3. from urlparse import urlparse
  4. from earwigbot import exceptions
  5. from .misc import cache
  6. __all__ = ["get_site", "update_sites"]
  7. def get_site(query):
  8. lang, project, name = query.lang, query.project, query.name
  9. wiki = cache.bot.wiki
  10. if project not in [proj[0] for proj in cache.projects]:
  11. return None
  12. if project == "wikimedia" and name: # Special sites:
  13. try:
  14. return wiki.get_site(name=name)
  15. except exceptions.SiteNotFoundError:
  16. return _add_site(lang, project)
  17. try:
  18. return wiki.get_site(lang=lang, project=project)
  19. except exceptions.SiteNotFoundError:
  20. return _add_site(lang, project)
  21. def update_sites():
  22. if time() - cache.last_sites_update > 60 * 60 * 24 * 7:
  23. cache.langs, cache.projects = _load_sites()
  24. cache.last_sites_update = time()
  25. def _add_site(lang, project):
  26. update_sites()
  27. if not any(project == item[0] for item in cache.projects):
  28. return None
  29. if lang != "www" and not any(lang == item[0] for item in cache.langs):
  30. return None
  31. try:
  32. return cache.bot.wiki.add_site(lang=lang, project=project)
  33. except (exceptions.APIError, exceptions.LoginError):
  34. return None
  35. def _load_sites():
  36. site = cache.bot.wiki.get_site()
  37. matrix = site.api_query(action="sitematrix")["sitematrix"]
  38. del matrix["count"]
  39. langs, projects = set(), set()
  40. for site in matrix.itervalues():
  41. if isinstance(site, list): # Special sites
  42. bad_sites = ["closed", "private", "fishbowl"]
  43. for special in site:
  44. if all([key not in special for key in bad_sites]):
  45. full = urlparse(special["url"]).netloc
  46. if full.count(".") == 1: # No subdomain, so use "www"
  47. lang, project = "www", full.split(".")[0]
  48. else:
  49. lang, project = full.rsplit(".", 2)[:2]
  50. code = u"{0}::{1}".format(lang, special["dbname"])
  51. name = special["code"].capitalize()
  52. langs.add((code, u"{0} ({1})".format(lang, name)))
  53. projects.add((project, project.capitalize()))
  54. else:
  55. this = set()
  56. for web in site["site"]:
  57. if "closed" in web:
  58. continue
  59. proj = "wikipedia" if web["code"] == u"wiki" else web["code"]
  60. this.add((proj, proj.capitalize()))
  61. if this:
  62. code = site["code"]
  63. langs.add((code, u"{0} ({1})".format(code, site["name"])))
  64. projects |= this
  65. return list(sorted(langs)), list(sorted(projects))