A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

152 lines
5.3 KiB

  1. # -*- coding: utf-8 -*-
  2. from datetime import datetime, timedelta
  3. from os.path import expanduser
  4. from threading import Lock
  5. from classes import BaseTask
  6. import wiki
  7. # Valid submission statuses:
  8. STATUS_NONE = 0
  9. STATUS_PEND = 1
  10. STATUS_DECLINE = 2
  11. STATUS_ACCEPT = 3
  12. class Task(BaseTask):
  13. """A task to generate charts about AfC submissions over time.
  14. The main function of the task is to work through the "AfC submissions by
  15. date" categories (e.g. [[Category:AfC submissions by date/12 July 2011]])
  16. and determine the number of declined, accepted, and currently pending
  17. submissions every day.
  18. This information is saved to a MySQL database ("u_earwig_afc_history") and
  19. used to generate attractive graphs showing the number of AfC submissions
  20. over time.
  21. """
  22. name = "afc_history"
  23. def __init__(self):
  24. cfg = config.tasks.get(self.name, {})
  25. self.destination = cfg.get("destination", "afc_history.png")
  26. self.categories = cfg.get("categories", {})
  27. # Connection data for our SQL database:
  28. kwargs = cfg.get("sql", {})
  29. kwargs["read_default_file"] = expanduser("~/.my.cnf")
  30. self.conn_data = kwargs
  31. self.db_access_lock = Lock()
  32. def run(self, **kwargs):
  33. self.site = wiki.get_site()
  34. with self.db_access_lock:
  35. self.conn = oursql.connect(**self.conn_data)
  36. action = kwargs.get("action")
  37. try:
  38. if action == "update":
  39. self.update(kwargs.get("days", 90))
  40. elif action == "generate":
  41. self.generate(kwargs.get("days", 90))
  42. finally:
  43. self.conn.close()
  44. def update(self, num_days):
  45. self.logger.info("Updating past {0} days".format(num_days))
  46. generator = self.backwards_cat_iterator()
  47. for d in xrange(num_days):
  48. category = generator.next()
  49. date = category.title().split("/")[-1]
  50. self.update_date(date, category)
  51. self.logger.info("Update complete")
  52. def generate(self, data):
  53. self.logger.info("Generating chart for past {0} days".format(num_days))
  54. data = {}
  55. generator = self.backwards_cat_iterator()
  56. for d in xrange(num_days):
  57. category = generator.next()
  58. date = category.title().split("/")[-1]
  59. data[date] = self.get_date_counts(date)
  60. dest = expanduser(self.destination)
  61. with open(dest, "wb") as fp:
  62. fp.write(data)
  63. self.logger.info("Chart saved to {0}".format(dest))
  64. def backwards_cat_iterator(self):
  65. date_base = self.categories["dateBase"]
  66. current = datetime.utcnow()
  67. while 1:
  68. subcat = current.stftime("%d %B %Y")
  69. title = "/".join((date_base, subcat))
  70. yield self.site.get_category(title)
  71. current -= timedelta(1) # Subtract one day from date
  72. def update_date(self, date, category):
  73. msg = "Updating {0} ([[{1}]])".format(date, category.title())
  74. self.logger.debug(msg)
  75. q_select = "SELECT page_id, page_status FROM page WHERE page_date = ?"
  76. q_delete = "DELETE FROM page WHERE page_id = ?"
  77. q_update = "UPDATE page SET page_status = ? WHERE page_id = ?"
  78. q_insert = "INSERT INTO page VALUES (?, ?, ?)"
  79. members = category.members(use_sql=True)
  80. tracked = []
  81. statuses = {}
  82. with self.conn.cursor() as cursor:
  83. cursor.execute(q_select, (date,))
  84. for pageid, status in cursor:
  85. tracked.append(pageid)
  86. statuses[pageid] = status
  87. for title, pageid in members:
  88. status = self.get_status(title, pageid)
  89. if status == STATUS_NONE:
  90. if pageid in tracked:
  91. cursor.execute(q_delete, (pageid,))
  92. continue
  93. if pageid in tracked:
  94. if status != statuses[pageid]:
  95. cursor.execute(q_update, (status, pageid))
  96. else:
  97. cursor.execute(q_insert, (pageid, date, status))
  98. def get_status(self, title, pageid):
  99. page = self.site.get_page(title)
  100. ns = page.namespace()
  101. if ns == wiki.NS_FILE_TALK: # Ignore accepted FFU requests
  102. return CHART_NONE
  103. if ns == wiki.NS_TALK:
  104. new_page = page.toggle_talk()
  105. if new_page.is_redirect():
  106. return CHART_NONE # Ignore accepted AFC/R requests
  107. return CHART_ACCEPT
  108. cats = self.categories
  109. query = "SELECT 1 FROM categorylinks WHERE cl_from = ? AND cl_to = ?"
  110. match = lambda cat: list(self.site.sql_query(query, (cat, pageid)))
  111. if match(cats["pending"]):
  112. return STATUS_PEND
  113. elif match(cats["unsubmitted"]):
  114. return STATUS_NONE
  115. elif match(cats["declined"]):
  116. return STATUS_DECLINE
  117. return STATUS_NONE
  118. def get_date_counts(self, date):
  119. query = "SELECT COUNT(*) FROM page WHERE page_date = ? AND page_status = ?"
  120. statuses = [STATUS_PEND, STATUS_DECLINE, STATUS_ACCEPT]
  121. counts = {}
  122. with self.conn.cursor() as cursor:
  123. for status in statuses:
  124. cursor.execute(query, (date, status))
  125. count = cursor.fetchall()[0][0]
  126. counts[status] = count
  127. return counts