A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

afc_history.py 7.0 KiB

13 年前
13 年前
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. # -*- coding: utf-8 -*-
  2. from collections import OrderedDict
  3. from datetime import datetime, timedelta
  4. from itertools import count
  5. from os.path import expanduser
  6. from threading import Lock
  7. from time import sleep
  8. from matplotlib import pyplot as plt
  9. from numpy import arange
  10. import oursql
  11. from classes import BaseTask
  12. import config
  13. import wiki
  14. # Valid submission statuses:
  15. STATUS_NONE = 0
  16. STATUS_PEND = 1
  17. STATUS_DECLINE = 2
  18. STATUS_ACCEPT = 3
  19. class Task(BaseTask):
  20. """A task to generate charts about AfC submissions over time.
  21. The main function of the task is to work through the "AfC submissions by
  22. date" categories (e.g. [[Category:AfC submissions by date/12 July 2011]])
  23. and determine the number of declined, accepted, and currently pending
  24. submissions every day.
  25. This information is saved to a MySQL database ("u_earwig_afc_history") and
  26. used to generate a graph showing the number of AfC submissions by date
  27. with matplotlib and numpy. The chart is saved as a PNG to
  28. config.tasks["afc_history"]["graph"]["dest"], which defaults to
  29. "afc_history.png".
  30. """
  31. name = "afc_history"
  32. def __init__(self):
  33. cfg = config.tasks.get(self.name, {})
  34. self.num_days = cfg.get("days", 90)
  35. self.categories = cfg.get("categories", {})
  36. # Graph stuff:
  37. self.graph = cfg.get("graph", {})
  38. self.dest = self.graph.get("dest", "afc_history.png")
  39. # Connection data for our SQL database:
  40. kwargs = cfg.get("sql", {})
  41. kwargs["read_default_file"] = expanduser("~/.my.cnf")
  42. self.conn_data = kwargs
  43. self.db_access_lock = Lock()
  44. def run(self, **kwargs):
  45. self.site = wiki.get_site()
  46. with self.db_access_lock:
  47. self.conn = oursql.connect(**self.conn_data)
  48. action = kwargs.get("action")
  49. try:
  50. num_days = int(kwargs.get("days", self.num_days))
  51. if action == "update":
  52. self.update(num_days)
  53. elif action == "generate":
  54. self.generate(num_days)
  55. finally:
  56. self.conn.close()
  57. def update(self, num_days):
  58. self.logger.info("Updating past {0} days".format(num_days))
  59. generator = self.backwards_cat_iterator()
  60. for d in xrange(num_days):
  61. category = generator.next()
  62. date = category.title().split("/")[-1]
  63. self.update_date(date, category)
  64. sleep(15)
  65. self.logger.info("Update complete")
  66. def generate(self, num_days):
  67. self.logger.info("Generating chart for past {0} days".format(num_days))
  68. data = OrderedDict()
  69. generator = self.backwards_cat_iterator()
  70. for d in xrange(num_days):
  71. category = generator.next()
  72. date = category.title().split("/")[-1]
  73. data[date] = self.get_date_counts(date)
  74. dest = expanduser(self.destination)
  75. self.generate_chart(reversed(data))
  76. plt.savefig(dest)
  77. self.logger.info("Chart saved to {0}".format(dest))
  78. def backwards_cat_iterator(self):
  79. date_base = self.categories["dateBase"]
  80. current = datetime.utcnow()
  81. while 1:
  82. subcat = current.strftime("%d %B %Y")
  83. title = "/".join((date_base, subcat))
  84. yield self.site.get_category(title)
  85. current -= timedelta(1) # Subtract one day from date
  86. def update_date(self, date, category):
  87. msg = "Updating {0} ([[{1}]])".format(date, category.title())
  88. self.logger.debug(msg)
  89. q_select = "SELECT page_date, page_status FROM page WHERE page_id = ?"
  90. q_delete = "DELETE FROM page WHERE page_id = ?"
  91. q_update = "UPDATE page SET page_date = ?, page_status = ? WHERE page_id = ?"
  92. q_insert = "INSERT INTO page VALUES (?, ?, ?)"
  93. members = category.members(use_sql=True)
  94. with self.conn.cursor() as cursor:
  95. for title, pageid in members:
  96. cursor.execute(q_select, (pageid,))
  97. stored = cursor.fetchall()
  98. status = self.get_status(title, pageid)
  99. if status == STATUS_NONE:
  100. if stored:
  101. cursor.execute(q_delete, (pageid,))
  102. continue
  103. if stored:
  104. stored_date, stored_status = list(stored)[0]
  105. if date != stored_date or status != stored_status:
  106. cursor.execute(q_update, (date, status, pageid))
  107. else:
  108. cursor.execute(q_insert, (pageid, date, status))
  109. def get_status(self, title, pageid):
  110. page = self.site.get_page(title)
  111. ns = page.namespace()
  112. if ns == wiki.NS_FILE_TALK: # Ignore accepted FFU requests
  113. return STATUS_NONE
  114. if ns == wiki.NS_TALK:
  115. new_page = page.toggle_talk()
  116. if new_page.is_redirect():
  117. return STATUS_NONE # Ignore accepted AFC/R requests
  118. return STATUS_ACCEPT
  119. cats = self.categories
  120. sq = self.site.sql_query
  121. query = "SELECT 1 FROM categorylinks WHERE cl_to = ? AND cl_from = ?"
  122. match = lambda cat: list(sq(query, (cat.replace(" ", "_"), pageid)))
  123. if match(cats["pending"]):
  124. return STATUS_PEND
  125. elif match(cats["unsubmitted"]):
  126. return STATUS_NONE
  127. elif match(cats["declined"]):
  128. return STATUS_DECLINE
  129. return STATUS_NONE
  130. def get_date_counts(self, date):
  131. query = "SELECT COUNT(*) FROM page WHERE page_date = ? AND page_status = ?"
  132. statuses = [STATUS_PEND, STATUS_DECLINE, STATUS_ACCEPT]
  133. counts = {}
  134. with self.conn.cursor() as cursor:
  135. for status in statuses:
  136. cursor.execute(query, (date, status))
  137. count = cursor.fetchall()[0][0]
  138. counts[status] = count
  139. return counts
  140. def generate_chart(self, data):
  141. pends = [d[STATUS_PEND] for d in data.itervalues()]
  142. declines = [d[STATUS_DECLINE] for d in data.itervalues()]
  143. accepts = [d[STATUS_ACCEPT] for d in data.itervalues()]
  144. ind = arange(len(data))
  145. width = self.graph.get("width", 0.75)
  146. xstep = self.graph.get("xAxisStep", 6)
  147. xticks = arange(xstep-1, ind.size+xstep-1, xstep) + width/2.0
  148. xlabels = [d for c, d in zip(count(1), data.keys()) if not c % xstep]
  149. pcolor = self.graph.get("pendingColor", "y")
  150. dcolor = self.graph.get("declinedColor", "r")
  151. acolor = self.graph.get("acceptedColor", "g")
  152. p1 = plt.bar(ind, pends, width, color=pcolor)
  153. p2 = plt.bar(ind, declines, width, color=dcolor, bottom=pends)
  154. p3 = plt.bar(ind, accepts, width, color=acolor, bottom=declines)
  155. plt.title("AfC submissions per date")
  156. plt.ylabel("Submissions")
  157. plt.xlabel("Date")
  158. plt.xticks(xticks, xlabels)
  159. plt.legend((p1[0], p2[0], p3[0]), ("Pending", "Declined", "Accepted"))
  160. fig = plt.gcf()
  161. fig.set_size_inches(12, 9) # 1200, 900
  162. fig.autofmt_xdate()