A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.

155 satır
5.4 KiB

  1. # -*- coding: utf-8 -*-
  2. from datetime import datetime, timedelta
  3. from os.path import expanduser
  4. from threading import Lock
  5. import oursql
  6. from classes import BaseTask
  7. import config
  8. import wiki
  9. # Valid submission statuses:
  10. STATUS_NONE = 0
  11. STATUS_PEND = 1
  12. STATUS_DECLINE = 2
  13. STATUS_ACCEPT = 3
  14. class Task(BaseTask):
  15. """A task to generate charts about AfC submissions over time.
  16. The main function of the task is to work through the "AfC submissions by
  17. date" categories (e.g. [[Category:AfC submissions by date/12 July 2011]])
  18. and determine the number of declined, accepted, and currently pending
  19. submissions every day.
  20. This information is saved to a MySQL database ("u_earwig_afc_history") and
  21. used to generate attractive graphs showing the number of AfC submissions
  22. over time.
  23. """
  24. name = "afc_history"
  25. def __init__(self):
  26. cfg = config.tasks.get(self.name, {})
  27. self.destination = cfg.get("destination", "afc_history.png")
  28. self.categories = cfg.get("categories", {})
  29. # Connection data for our SQL database:
  30. kwargs = cfg.get("sql", {})
  31. kwargs["read_default_file"] = expanduser("~/.my.cnf")
  32. self.conn_data = kwargs
  33. self.db_access_lock = Lock()
  34. def run(self, **kwargs):
  35. self.site = wiki.get_site()
  36. with self.db_access_lock:
  37. self.conn = oursql.connect(**self.conn_data)
  38. action = kwargs.get("action")
  39. try:
  40. if action == "update":
  41. self.update(kwargs.get("days", 90))
  42. elif action == "generate":
  43. self.generate(kwargs.get("days", 90))
  44. finally:
  45. self.conn.close()
  46. def update(self, num_days):
  47. self.logger.info("Updating past {0} days".format(num_days))
  48. generator = self.backwards_cat_iterator()
  49. for d in xrange(num_days):
  50. category = generator.next()
  51. date = category.title().split("/")[-1]
  52. self.update_date(date, category)
  53. self.logger.info("Update complete")
  54. def generate(self, data):
  55. self.logger.info("Generating chart for past {0} days".format(num_days))
  56. data = {}
  57. generator = self.backwards_cat_iterator()
  58. for d in xrange(num_days):
  59. category = generator.next()
  60. date = category.title().split("/")[-1]
  61. data[date] = self.get_date_counts(date)
  62. dest = expanduser(self.destination)
  63. with open(dest, "wb") as fp:
  64. fp.write(data)
  65. self.logger.info("Chart saved to {0}".format(dest))
  66. def backwards_cat_iterator(self):
  67. date_base = self.categories["dateBase"]
  68. current = datetime.utcnow()
  69. while 1:
  70. subcat = current.stftime("%d %B %Y")
  71. title = "/".join((date_base, subcat))
  72. yield self.site.get_category(title)
  73. current -= timedelta(1) # Subtract one day from date
  74. def update_date(self, date, category):
  75. msg = "Updating {0} ([[{1}]])".format(date, category.title())
  76. self.logger.debug(msg)
  77. q_select = "SELECT page_id, page_status FROM page WHERE page_date = ?"
  78. q_delete = "DELETE FROM page WHERE page_id = ?"
  79. q_update = "UPDATE page SET page_status = ? WHERE page_id = ?"
  80. q_insert = "INSERT INTO page VALUES (?, ?, ?)"
  81. members = category.members(use_sql=True)
  82. tracked = []
  83. statuses = {}
  84. with self.conn.cursor() as cursor:
  85. cursor.execute(q_select, (date,))
  86. for pageid, status in cursor:
  87. tracked.append(pageid)
  88. statuses[pageid] = status
  89. for title, pageid in members:
  90. status = self.get_status(title, pageid)
  91. if status == STATUS_NONE:
  92. if pageid in tracked:
  93. cursor.execute(q_delete, (pageid,))
  94. continue
  95. if pageid in tracked:
  96. if status != statuses[pageid]:
  97. cursor.execute(q_update, (status, pageid))
  98. else:
  99. cursor.execute(q_insert, (pageid, date, status))
  100. def get_status(self, title, pageid):
  101. page = self.site.get_page(title)
  102. ns = page.namespace()
  103. if ns == wiki.NS_FILE_TALK: # Ignore accepted FFU requests
  104. return CHART_NONE
  105. if ns == wiki.NS_TALK:
  106. new_page = page.toggle_talk()
  107. if new_page.is_redirect():
  108. return CHART_NONE # Ignore accepted AFC/R requests
  109. return CHART_ACCEPT
  110. cats = self.categories
  111. query = "SELECT 1 FROM categorylinks WHERE cl_from = ? AND cl_to = ?"
  112. match = lambda cat: list(self.site.sql_query(query, (cat, pageid)))
  113. if match(cats["pending"]):
  114. return STATUS_PEND
  115. elif match(cats["unsubmitted"]):
  116. return STATUS_NONE
  117. elif match(cats["declined"]):
  118. return STATUS_DECLINE
  119. return STATUS_NONE
  120. def get_date_counts(self, date):
  121. query = "SELECT COUNT(*) FROM page WHERE page_date = ? AND page_status = ?"
  122. statuses = [STATUS_PEND, STATUS_DECLINE, STATUS_ACCEPT]
  123. counts = {}
  124. with self.conn.cursor() as cursor:
  125. for status in statuses:
  126. cursor.execute(query, (date, status))
  127. count = cursor.fetchall()[0][0]
  128. counts[status] = count
  129. return counts