A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

158 Zeilen
5.5 KiB

  1. # -*- coding: utf-8 -*-
  2. from datetime import datetime, timedelta
  3. from os.path import expanduser
  4. from threading import Lock
  5. from time import sleep
  6. import oursql
  7. from classes import BaseTask
  8. import config
  9. import wiki
  10. # Valid submission statuses:
  11. STATUS_NONE = 0
  12. STATUS_PEND = 1
  13. STATUS_DECLINE = 2
  14. STATUS_ACCEPT = 3
  15. class Task(BaseTask):
  16. """A task to generate charts about AfC submissions over time.
  17. The main function of the task is to work through the "AfC submissions by
  18. date" categories (e.g. [[Category:AfC submissions by date/12 July 2011]])
  19. and determine the number of declined, accepted, and currently pending
  20. submissions every day.
  21. This information is saved to a MySQL database ("u_earwig_afc_history") and
  22. used to generate attractive graphs showing the number of AfC submissions
  23. over time.
  24. """
  25. name = "afc_history"
  26. def __init__(self):
  27. cfg = config.tasks.get(self.name, {})
  28. self.destination = cfg.get("destination", "afc_history.png")
  29. self.categories = cfg.get("categories", {})
  30. # Connection data for our SQL database:
  31. kwargs = cfg.get("sql", {})
  32. kwargs["read_default_file"] = expanduser("~/.my.cnf")
  33. self.conn_data = kwargs
  34. self.db_access_lock = Lock()
  35. def run(self, **kwargs):
  36. self.site = wiki.get_site()
  37. with self.db_access_lock:
  38. self.conn = oursql.connect(**self.conn_data)
  39. action = kwargs.get("action")
  40. try:
  41. num_days = int(kwargs.get("days", 90))
  42. if action == "update":
  43. self.update(num_days)
  44. elif action == "generate":
  45. self.generate(num_days)
  46. finally:
  47. self.conn.close()
  48. def update(self, num_days):
  49. self.logger.info("Updating past {0} days".format(num_days))
  50. generator = self.backwards_cat_iterator()
  51. for d in xrange(num_days):
  52. category = generator.next()
  53. date = category.title().split("/")[-1]
  54. self.update_date(date, category)
  55. sleep(15)
  56. self.logger.info("Update complete")
  57. def generate(self, num_days):
  58. self.logger.info("Generating chart for past {0} days".format(num_days))
  59. data = {}
  60. generator = self.backwards_cat_iterator()
  61. for d in xrange(num_days):
  62. category = generator.next()
  63. date = category.title().split("/")[-1]
  64. data[date] = self.get_date_counts(date)
  65. dest = expanduser(self.destination)
  66. with open(dest, "wb") as fp:
  67. fp.write(data)
  68. self.logger.info("Chart saved to {0}".format(dest))
  69. def backwards_cat_iterator(self):
  70. date_base = self.categories["dateBase"]
  71. current = datetime.utcnow()
  72. while 1:
  73. subcat = current.strftime("%d %B %Y")
  74. title = "/".join((date_base, subcat))
  75. yield self.site.get_category(title)
  76. current -= timedelta(1) # Subtract one day from date
  77. def update_date(self, date, category):
  78. msg = "Updating {0} ([[{1}]])".format(date, category.title())
  79. self.logger.debug(msg)
  80. q_select = "SELECT page_date, page_status FROM page WHERE page_id = ?"
  81. q_delete = "DELETE FROM page WHERE page_id = ?"
  82. q_update = "UPDATE page SET page_date = ?, page_status = ? WHERE page_id = ?"
  83. q_insert = "INSERT INTO page VALUES (?, ?, ?)"
  84. members = category.members(use_sql=True)
  85. with self.conn.cursor() as cursor:
  86. for title, pageid in members:
  87. cursor.execute(q_select, (pageid,))
  88. stored = cursor.fetchall()
  89. status = self.get_status(title, pageid)
  90. if status == STATUS_NONE:
  91. if stored:
  92. cursor.execute(q_delete, (pageid,))
  93. continue
  94. if stored:
  95. stored_date, stored_status = list(stored)[0]
  96. if date != stored_date or status != stored_status:
  97. cursor.execute(q_update, (date, status, pageid))
  98. else:
  99. cursor.execute(q_insert, (pageid, date, status))
  100. def get_status(self, title, pageid):
  101. page = self.site.get_page(title)
  102. ns = page.namespace()
  103. if ns == wiki.NS_FILE_TALK: # Ignore accepted FFU requests
  104. return STATUS_NONE
  105. if ns == wiki.NS_TALK:
  106. new_page = page.toggle_talk()
  107. if new_page.is_redirect():
  108. return STATUS_NONE # Ignore accepted AFC/R requests
  109. return STATUS_ACCEPT
  110. cats = self.categories
  111. sq = self.site.sql_query
  112. query = "SELECT 1 FROM categorylinks WHERE cl_to = ? AND cl_from = ?"
  113. match = lambda cat: list(sq(query, (cat.replace(" ", "_"), pageid)))
  114. if match(cats["pending"]):
  115. return STATUS_PEND
  116. elif match(cats["unsubmitted"]):
  117. return STATUS_NONE
  118. elif match(cats["declined"]):
  119. return STATUS_DECLINE
  120. return STATUS_NONE
  121. def get_date_counts(self, date):
  122. query = "SELECT COUNT(*) FROM page WHERE page_date = ? AND page_status = ?"
  123. statuses = [STATUS_PEND, STATUS_DECLINE, STATUS_ACCEPT]
  124. counts = {}
  125. with self.conn.cursor() as cursor:
  126. for status in statuses:
  127. cursor.execute(query, (date, status))
  128. count = cursor.fetchall()[0][0]
  129. counts[status] = count
  130. return counts