A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
Non puoi selezionare più di 25 argomenti Gli argomenti devono iniziare con una lettera o un numero, possono includere trattini ('-') e possono essere lunghi fino a 35 caratteri.

153 righe
5.4 KiB

  1. # -*- coding: utf-8 -*-
  2. from datetime import datetime, timedelta
  3. from os.path import expanduser
  4. from threading import Lock
  5. from classes import BaseTask
  6. import config
  7. import wiki
  8. # Valid submission statuses:
  9. STATUS_NONE = 0
  10. STATUS_PEND = 1
  11. STATUS_DECLINE = 2
  12. STATUS_ACCEPT = 3
  13. class Task(BaseTask):
  14. """A task to generate charts about AfC submissions over time.
  15. The main function of the task is to work through the "AfC submissions by
  16. date" categories (e.g. [[Category:AfC submissions by date/12 July 2011]])
  17. and determine the number of declined, accepted, and currently pending
  18. submissions every day.
  19. This information is saved to a MySQL database ("u_earwig_afc_history") and
  20. used to generate attractive graphs showing the number of AfC submissions
  21. over time.
  22. """
  23. name = "afc_history"
  24. def __init__(self):
  25. cfg = config.tasks.get(self.name, {})
  26. self.destination = cfg.get("destination", "afc_history.png")
  27. self.categories = cfg.get("categories", {})
  28. # Connection data for our SQL database:
  29. kwargs = cfg.get("sql", {})
  30. kwargs["read_default_file"] = expanduser("~/.my.cnf")
  31. self.conn_data = kwargs
  32. self.db_access_lock = Lock()
  33. def run(self, **kwargs):
  34. self.site = wiki.get_site()
  35. with self.db_access_lock:
  36. self.conn = oursql.connect(**self.conn_data)
  37. action = kwargs.get("action")
  38. try:
  39. if action == "update":
  40. self.update(kwargs.get("days", 90))
  41. elif action == "generate":
  42. self.generate(kwargs.get("days", 90))
  43. finally:
  44. self.conn.close()
  45. def update(self, num_days):
  46. self.logger.info("Updating past {0} days".format(num_days))
  47. generator = self.backwards_cat_iterator()
  48. for d in xrange(num_days):
  49. category = generator.next()
  50. date = category.title().split("/")[-1]
  51. self.update_date(date, category)
  52. self.logger.info("Update complete")
  53. def generate(self, data):
  54. self.logger.info("Generating chart for past {0} days".format(num_days))
  55. data = {}
  56. generator = self.backwards_cat_iterator()
  57. for d in xrange(num_days):
  58. category = generator.next()
  59. date = category.title().split("/")[-1]
  60. data[date] = self.get_date_counts(date)
  61. dest = expanduser(self.destination)
  62. with open(dest, "wb") as fp:
  63. fp.write(data)
  64. self.logger.info("Chart saved to {0}".format(dest))
  65. def backwards_cat_iterator(self):
  66. date_base = self.categories["dateBase"]
  67. current = datetime.utcnow()
  68. while 1:
  69. subcat = current.stftime("%d %B %Y")
  70. title = "/".join((date_base, subcat))
  71. yield self.site.get_category(title)
  72. current -= timedelta(1) # Subtract one day from date
  73. def update_date(self, date, category):
  74. msg = "Updating {0} ([[{1}]])".format(date, category.title())
  75. self.logger.debug(msg)
  76. q_select = "SELECT page_id, page_status FROM page WHERE page_date = ?"
  77. q_delete = "DELETE FROM page WHERE page_id = ?"
  78. q_update = "UPDATE page SET page_status = ? WHERE page_id = ?"
  79. q_insert = "INSERT INTO page VALUES (?, ?, ?)"
  80. members = category.members(use_sql=True)
  81. tracked = []
  82. statuses = {}
  83. with self.conn.cursor() as cursor:
  84. cursor.execute(q_select, (date,))
  85. for pageid, status in cursor:
  86. tracked.append(pageid)
  87. statuses[pageid] = status
  88. for title, pageid in members:
  89. status = self.get_status(title, pageid)
  90. if status == STATUS_NONE:
  91. if pageid in tracked:
  92. cursor.execute(q_delete, (pageid,))
  93. continue
  94. if pageid in tracked:
  95. if status != statuses[pageid]:
  96. cursor.execute(q_update, (status, pageid))
  97. else:
  98. cursor.execute(q_insert, (pageid, date, status))
  99. def get_status(self, title, pageid):
  100. page = self.site.get_page(title)
  101. ns = page.namespace()
  102. if ns == wiki.NS_FILE_TALK: # Ignore accepted FFU requests
  103. return CHART_NONE
  104. if ns == wiki.NS_TALK:
  105. new_page = page.toggle_talk()
  106. if new_page.is_redirect():
  107. return CHART_NONE # Ignore accepted AFC/R requests
  108. return CHART_ACCEPT
  109. cats = self.categories
  110. query = "SELECT 1 FROM categorylinks WHERE cl_from = ? AND cl_to = ?"
  111. match = lambda cat: list(self.site.sql_query(query, (cat, pageid)))
  112. if match(cats["pending"]):
  113. return STATUS_PEND
  114. elif match(cats["unsubmitted"]):
  115. return STATUS_NONE
  116. elif match(cats["declined"]):
  117. return STATUS_DECLINE
  118. return STATUS_NONE
  119. def get_date_counts(self, date):
  120. query = "SELECT COUNT(*) FROM page WHERE page_date = ? AND page_status = ?"
  121. statuses = [STATUS_PEND, STATUS_DECLINE, STATUS_ACCEPT]
  122. counts = {}
  123. with self.conn.cursor() as cursor:
  124. for status in statuses:
  125. cursor.execute(query, (date, status))
  126. count = cursor.fetchall()[0][0]
  127. counts[status] = count
  128. return counts