From d94926994491a2f2445da4ea89b6253c5ff532b0 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 19 Nov 2011 15:57:12 -0500 Subject: [PATCH] Some SQL updates, starting work on afc_history task. * get() -> return a Task instance by name (tasks) * Using SQL to save API queries. (commands.{afc_report,afc_status}) * ignore_list -> ignoreList in config. (tasks.afc_statistics) --- bot/commands/afc_report.py | 73 +++++++++++---------- bot/commands/afc_status.py | 5 +- bot/tasks/__init__.py | 11 +++- bot/tasks/afc_history.py | 151 ++++++++++++++++++++++++++++++++++++++++++++ bot/tasks/afc_statistics.py | 2 +- 5 files changed, 202 insertions(+), 40 deletions(-) create mode 100644 bot/tasks/afc_history.py diff --git a/bot/commands/afc_report.py b/bot/commands/afc_report.py index 22429cb..855f810 100644 --- a/bot/commands/afc_report.py +++ b/bot/commands/afc_report.py @@ -3,6 +3,7 @@ import re from classes import BaseCommand +import tasks import wiki class Command(BaseCommand): @@ -14,41 +15,50 @@ class Command(BaseCommand): self.site._maxlag = None self.data = data + try: + self.statistics = tasks.get("afc_statistics") + except KeyError: + e = "Cannot run command: requires afc_statistics task." + self.logger.error(e) + return + if not data.args: msg = "what submission do you want me to give information about?" self.connection.reply(data, msg) return - title = ' '.join(data.args) + title = " ".join(data.args) title = title.replace("http://en.wikipedia.org/wiki/", "") title = title.replace("http://enwp.org/", "").strip() # Given '!report Foo', first try [[Foo]]: - if self.report(title): - return + page = self.get_page(title) + if page: + return self.report(page) # Then try [[Wikipedia:Articles for creation/Foo]]: - title2 = "".join(("Wikipedia:Articles for creation/", title)) - if self.report(title2): - return + newtitle = "/".join(("Wikipedia:Articles for creation", title)) + page = self.get_page(newtitle) + if page: + return self.report(page) # Then try [[Wikipedia talk:Articles for creation/Foo]]: - title3 = "".join(("Wikipedia talk:Articles for creation/", title)) - if self.report(title3): - return + newtitle = "/".join(("Wikipedia talk:Articles for creation", title)) + page = self.get_page(newtitle) + if page: + return self.report(page) msg = "submission \x0302{0}\x0301 not found.".format(title) self.connection.reply(data, msg) - def report(self, title): - data = self.data + def get_page(self, title): page = self.site.get_page(title, follow_redirects=False) - if not page.exists()[0]: - return + if page.exists()[0]: + return page + def report(self, page): url = page.url().replace("en.wikipedia.org/wiki", "enwp.org") - short = re.sub("wikipedia( talk)?\:articles for creation\/", "", title, - flags=re.IGNORECASE) + short = self.statistics.get_short_title(page.title()) status = self.get_status(page) user = self.site.get_user(page.creator()) user_name = user.name() @@ -60,31 +70,26 @@ class Command(BaseCommand): if status == "accepted": msg3 = "Reviewed by \x0302{0}\x0301 ({1})" - self.connection.reply(data, msg1.format(short, url)) - self.connection.say(data.chan, msg2.format(status)) - self.connection.say(data.chan, msg3.format(user_name, user_url)) - - return True + self.connection.reply(self.data, msg1.format(short, url)) + self.connection.say(self.data.chan, msg2.format(status)) + self.connection.say(self.data.chan, msg3.format(user_name, user_url)) def get_status(self, page): - content = page.get() - if page.is_redirect(): target = page.get_redirect_target() - if self.site.get_page(target).namespace() == 0: + if self.site.get_page(target).namespace() == wiki.NS_MAIN: return "accepted" return "redirect" - elif re.search("\{\{afc submission\|r\|(.*?)\}\}", content, re.I): + + statuses = self.statistics.get_statuses(page.get()) + if "R" in statuses: return "being reviewed" - elif re.search("\{\{afc submission\|h?\|(.*?)\}\}", content, re.I): - return "pending" - elif re.search("\{\{afc submission\|t\|(.*?)\}\}", content, re.I): + elif "H" in statuses: + return "pending draft" + elif "P" in statuses: + return "pending submission" + elif "T" in statuses: return "unsubmitted draft" - elif re.search("\{\{afc submission\|d\|(.*?)\}\}", content, re.I): - regex = "\{\{afc submission\|d\|(.*?)(\||\}\})" - try: - reason = re.findall(regex, content, re.I)[0][0] - except IndexError: - return "declined" - return "declined with reason \"{0}\"".format(reason) + elif "D" in statuses: + return "declined" return "unkown" diff --git a/bot/commands/afc_status.py b/bot/commands/afc_status.py index d05ba17..c2e219f 100644 --- a/bot/commands/afc_status.py +++ b/bot/commands/afc_status.py @@ -92,12 +92,11 @@ class Command(BaseCommand): def count_submissions(self): """Returns the number of open AFC submissions (count of CAT:PEND).""" cat = self.site.get_category("Pending AfC submissions") - subs = len(cat.members(limit=500)) + subs = len(cat.members(limit=2500, use_sql=True)) # Remove [[Wikipedia:Articles for creation/Redirects]] and # [[Wikipedia:Files for upload]], which aren't real submissions: - subs -= 2 - return subs + return subs - 2 def count_redirects(self): """Returns the number of open redirect submissions. Calculated as the diff --git a/bot/tasks/__init__.py b/bot/tasks/__init__.py index f1a8b17..da91cc1 100644 --- a/bot/tasks/__init__.py +++ b/bot/tasks/__init__.py @@ -16,7 +16,7 @@ import time from classes import BaseTask import config -__all__ = ["load", "schedule", "start", "get_all"] +__all__ = ["load", "schedule", "start", "get", "get_all"] # Base directory when searching for tasks: base_dir = os.path.join(config.root_dir, "bot", "tasks") @@ -77,7 +77,7 @@ def schedule(now=time.gmtime()): """Start all tasks that are supposed to be run at a given time.""" # Get list of tasks to run this turn: tasks = config.schedule(now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, - now.tm_wday) + now.tm_wday) for task in tasks: if isinstance(task, list): # they've specified kwargs @@ -106,6 +106,13 @@ def start(task_name, **kwargs): task_thread.start() +def get(task_name): + """Return the class instance associated with a certain task name. + + Will raise KeyError if the task is not found. + """ + return _tasks[task_name] + def get_all(): """Return our dict of all loaded tasks.""" return _tasks diff --git a/bot/tasks/afc_history.py b/bot/tasks/afc_history.py new file mode 100644 index 0000000..5d6de1a --- /dev/null +++ b/bot/tasks/afc_history.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- + +from datetime import datetime, timedelta +from os.path import expanduser +from threading import Lock + +from classes import BaseTask +import wiki + +# Valid submission statuses: +STATUS_NONE = 0 +STATUS_PEND = 1 +STATUS_DECLINE = 2 +STATUS_ACCEPT = 3 + +class Task(BaseTask): + """A task to generate charts about AfC submissions over time. + + The main function of the task is to work through the "AfC submissions by + date" categories (e.g. [[Category:AfC submissions by date/12 July 2011]]) + and determine the number of declined, accepted, and currently pending + submissions every day. + + This information is saved to a MySQL database ("u_earwig_afc_history") and + used to generate attractive graphs showing the number of AfC submissions + over time. + """ + name = "afc_history" + + def __init__(self): + cfg = config.tasks.get(self.name, {}) + self.destination = cfg.get("destination", "afc_history.png") + self.categories = cfg.get("categories", {}) + + # Connection data for our SQL database: + kwargs = cfg.get("sql", {}) + kwargs["read_default_file"] = expanduser("~/.my.cnf") + self.conn_data = kwargs + self.db_access_lock = Lock() + + def run(self, **kwargs): + self.site = wiki.get_site() + with self.db_access_lock: + self.conn = oursql.connect(**self.conn_data) + + action = kwargs.get("action") + try: + if action == "update": + self.update(kwargs.get("days", 90)) + elif action == "generate": + self.generate(kwargs.get("days", 90)) + finally: + self.conn.close() + + def update(self, num_days): + self.logger.info("Updating past {0} days".format(num_days)) + generator = self.backwards_cat_iterator() + for d in xrange(num_days): + category = generator.next() + date = category.title().split("/")[-1] + self.update_date(date, category) + self.logger.info("Update complete") + + def generate(self, data): + self.logger.info("Generating chart for past {0} days".format(num_days)) + data = {} + generator = self.backwards_cat_iterator() + for d in xrange(num_days): + category = generator.next() + date = category.title().split("/")[-1] + data[date] = self.get_date_counts(date) + + dest = expanduser(self.destination) + with open(dest, "wb") as fp: + fp.write(data) + self.logger.info("Chart saved to {0}".format(dest)) + + def backwards_cat_iterator(self): + date_base = self.categories["dateBase"] + current = datetime.utcnow() + while 1: + subcat = current.stftime("%d %B %Y") + title = "/".join((date_base, subcat)) + yield self.site.get_category(title) + current -= timedelta(1) # Subtract one day from date + + def update_date(self, date, category): + msg = "Updating {0} ([[{1}]])".format(date, category.title()) + self.logger.debug(msg) + + q_select = "SELECT page_id, page_status FROM page WHERE page_date = ?" + q_delete = "DELETE FROM page WHERE page_id = ?" + q_update = "UPDATE page SET page_status = ? WHERE page_id = ?" + q_insert = "INSERT INTO page VALUES (?, ?, ?)" + members = category.members(use_sql=True) + tracked = [] + statuses = {} + + with self.conn.cursor() as cursor: + cursor.execute(q_select, (date,)) + for pageid, status in cursor: + tracked.append(pageid) + statuses[pageid] = status + + for title, pageid in members: + status = self.get_status(title, pageid) + if status == STATUS_NONE: + if pageid in tracked: + cursor.execute(q_delete, (pageid,)) + continue + if pageid in tracked: + if status != statuses[pageid]: + cursor.execute(q_update, (status, pageid)) + else: + cursor.execute(q_insert, (pageid, date, status)) + + def get_status(self, title, pageid): + page = self.site.get_page(title) + ns = page.namespace() + + if ns == wiki.NS_FILE_TALK: # Ignore accepted FFU requests + return CHART_NONE + + if ns == wiki.NS_TALK: + new_page = page.toggle_talk() + if new_page.is_redirect(): + return CHART_NONE # Ignore accepted AFC/R requests + return CHART_ACCEPT + + cats = self.categories + query = "SELECT 1 FROM categorylinks WHERE cl_from = ? AND cl_to = ?" + match = lambda cat: list(self.site.sql_query(query, (cat, pageid))) + + if match(cats["pending"]): + return STATUS_PEND + elif match(cats["unsubmitted"]): + return STATUS_NONE + elif match(cats["declined"]): + return STATUS_DECLINE + return STATUS_NONE + + def get_date_counts(self, date): + query = "SELECT COUNT(*) FROM page WHERE page_date = ? AND page_status = ?" + statuses = [STATUS_PEND, STATUS_DECLINE, STATUS_ACCEPT] + counts = {} + with self.conn.cursor() as cursor: + for status in statuses: + cursor.execute(query, (date, status)) + count = cursor.fetchall()[0][0] + counts[status] = count + return counts diff --git a/bot/tasks/afc_statistics.py b/bot/tasks/afc_statistics.py index 48e1fc1..abb3582 100644 --- a/bot/tasks/afc_statistics.py +++ b/bot/tasks/afc_statistics.py @@ -39,7 +39,7 @@ class Task(BaseTask): # Set some wiki-related attributes: self.pagename = cfg.get("page", "Template:AFC statistics") self.pending_cat = cfg.get("pending", "Pending AfC submissions") - self.ignore_list = cfg.get("ignore_list", []) + self.ignore_list = cfg.get("ignoreList", []) default_summary = "Updating statistics for [[WP:WPAFC|WikiProject Articles for creation]]." self.summary = self.make_summary(cfg.get("summary", default_summary))