* get() -> return a Task instance by name (tasks) * Using SQL to save API queries. (commands.{afc_report,afc_status}) * ignore_list -> ignoreList in config. (tasks.afc_statistics)tags/v0.1^2
@@ -3,6 +3,7 @@ | |||||
import re | import re | ||||
from classes import BaseCommand | from classes import BaseCommand | ||||
import tasks | |||||
import wiki | import wiki | ||||
class Command(BaseCommand): | class Command(BaseCommand): | ||||
@@ -14,41 +15,50 @@ class Command(BaseCommand): | |||||
self.site._maxlag = None | self.site._maxlag = None | ||||
self.data = data | self.data = data | ||||
try: | |||||
self.statistics = tasks.get("afc_statistics") | |||||
except KeyError: | |||||
e = "Cannot run command: requires afc_statistics task." | |||||
self.logger.error(e) | |||||
return | |||||
if not data.args: | if not data.args: | ||||
msg = "what submission do you want me to give information about?" | msg = "what submission do you want me to give information about?" | ||||
self.connection.reply(data, msg) | self.connection.reply(data, msg) | ||||
return | return | ||||
title = ' '.join(data.args) | |||||
title = " ".join(data.args) | |||||
title = title.replace("http://en.wikipedia.org/wiki/", "") | title = title.replace("http://en.wikipedia.org/wiki/", "") | ||||
title = title.replace("http://enwp.org/", "").strip() | title = title.replace("http://enwp.org/", "").strip() | ||||
# Given '!report Foo', first try [[Foo]]: | # Given '!report Foo', first try [[Foo]]: | ||||
if self.report(title): | |||||
return | |||||
page = self.get_page(title) | |||||
if page: | |||||
return self.report(page) | |||||
# Then try [[Wikipedia:Articles for creation/Foo]]: | # Then try [[Wikipedia:Articles for creation/Foo]]: | ||||
title2 = "".join(("Wikipedia:Articles for creation/", title)) | |||||
if self.report(title2): | |||||
return | |||||
newtitle = "/".join(("Wikipedia:Articles for creation", title)) | |||||
page = self.get_page(newtitle) | |||||
if page: | |||||
return self.report(page) | |||||
# Then try [[Wikipedia talk:Articles for creation/Foo]]: | # Then try [[Wikipedia talk:Articles for creation/Foo]]: | ||||
title3 = "".join(("Wikipedia talk:Articles for creation/", title)) | |||||
if self.report(title3): | |||||
return | |||||
newtitle = "/".join(("Wikipedia talk:Articles for creation", title)) | |||||
page = self.get_page(newtitle) | |||||
if page: | |||||
return self.report(page) | |||||
msg = "submission \x0302{0}\x0301 not found.".format(title) | msg = "submission \x0302{0}\x0301 not found.".format(title) | ||||
self.connection.reply(data, msg) | self.connection.reply(data, msg) | ||||
def report(self, title): | |||||
data = self.data | |||||
def get_page(self, title): | |||||
page = self.site.get_page(title, follow_redirects=False) | page = self.site.get_page(title, follow_redirects=False) | ||||
if not page.exists()[0]: | |||||
return | |||||
if page.exists()[0]: | |||||
return page | |||||
def report(self, page): | |||||
url = page.url().replace("en.wikipedia.org/wiki", "enwp.org") | url = page.url().replace("en.wikipedia.org/wiki", "enwp.org") | ||||
short = re.sub("wikipedia( talk)?\:articles for creation\/", "", title, | |||||
flags=re.IGNORECASE) | |||||
short = self.statistics.get_short_title(page.title()) | |||||
status = self.get_status(page) | status = self.get_status(page) | ||||
user = self.site.get_user(page.creator()) | user = self.site.get_user(page.creator()) | ||||
user_name = user.name() | user_name = user.name() | ||||
@@ -60,31 +70,26 @@ class Command(BaseCommand): | |||||
if status == "accepted": | if status == "accepted": | ||||
msg3 = "Reviewed by \x0302{0}\x0301 ({1})" | msg3 = "Reviewed by \x0302{0}\x0301 ({1})" | ||||
self.connection.reply(data, msg1.format(short, url)) | |||||
self.connection.say(data.chan, msg2.format(status)) | |||||
self.connection.say(data.chan, msg3.format(user_name, user_url)) | |||||
return True | |||||
self.connection.reply(self.data, msg1.format(short, url)) | |||||
self.connection.say(self.data.chan, msg2.format(status)) | |||||
self.connection.say(self.data.chan, msg3.format(user_name, user_url)) | |||||
def get_status(self, page): | def get_status(self, page): | ||||
content = page.get() | |||||
if page.is_redirect(): | if page.is_redirect(): | ||||
target = page.get_redirect_target() | target = page.get_redirect_target() | ||||
if self.site.get_page(target).namespace() == 0: | |||||
if self.site.get_page(target).namespace() == wiki.NS_MAIN: | |||||
return "accepted" | return "accepted" | ||||
return "redirect" | return "redirect" | ||||
elif re.search("\{\{afc submission\|r\|(.*?)\}\}", content, re.I): | |||||
statuses = self.statistics.get_statuses(page.get()) | |||||
if "R" in statuses: | |||||
return "being reviewed" | return "being reviewed" | ||||
elif re.search("\{\{afc submission\|h?\|(.*?)\}\}", content, re.I): | |||||
return "pending" | |||||
elif re.search("\{\{afc submission\|t\|(.*?)\}\}", content, re.I): | |||||
elif "H" in statuses: | |||||
return "pending draft" | |||||
elif "P" in statuses: | |||||
return "pending submission" | |||||
elif "T" in statuses: | |||||
return "unsubmitted draft" | return "unsubmitted draft" | ||||
elif re.search("\{\{afc submission\|d\|(.*?)\}\}", content, re.I): | |||||
regex = "\{\{afc submission\|d\|(.*?)(\||\}\})" | |||||
try: | |||||
reason = re.findall(regex, content, re.I)[0][0] | |||||
except IndexError: | |||||
return "declined" | |||||
return "declined with reason \"{0}\"".format(reason) | |||||
elif "D" in statuses: | |||||
return "declined" | |||||
return "unkown" | return "unkown" |
@@ -92,12 +92,11 @@ class Command(BaseCommand): | |||||
def count_submissions(self): | def count_submissions(self): | ||||
"""Returns the number of open AFC submissions (count of CAT:PEND).""" | """Returns the number of open AFC submissions (count of CAT:PEND).""" | ||||
cat = self.site.get_category("Pending AfC submissions") | cat = self.site.get_category("Pending AfC submissions") | ||||
subs = len(cat.members(limit=500)) | |||||
subs = len(cat.members(limit=2500, use_sql=True)) | |||||
# Remove [[Wikipedia:Articles for creation/Redirects]] and | # Remove [[Wikipedia:Articles for creation/Redirects]] and | ||||
# [[Wikipedia:Files for upload]], which aren't real submissions: | # [[Wikipedia:Files for upload]], which aren't real submissions: | ||||
subs -= 2 | |||||
return subs | |||||
return subs - 2 | |||||
def count_redirects(self): | def count_redirects(self): | ||||
"""Returns the number of open redirect submissions. Calculated as the | """Returns the number of open redirect submissions. Calculated as the | ||||
@@ -16,7 +16,7 @@ import time | |||||
from classes import BaseTask | from classes import BaseTask | ||||
import config | import config | ||||
__all__ = ["load", "schedule", "start", "get_all"] | |||||
__all__ = ["load", "schedule", "start", "get", "get_all"] | |||||
# Base directory when searching for tasks: | # Base directory when searching for tasks: | ||||
base_dir = os.path.join(config.root_dir, "bot", "tasks") | base_dir = os.path.join(config.root_dir, "bot", "tasks") | ||||
@@ -77,7 +77,7 @@ def schedule(now=time.gmtime()): | |||||
"""Start all tasks that are supposed to be run at a given time.""" | """Start all tasks that are supposed to be run at a given time.""" | ||||
# Get list of tasks to run this turn: | # Get list of tasks to run this turn: | ||||
tasks = config.schedule(now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, | tasks = config.schedule(now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, | ||||
now.tm_wday) | |||||
now.tm_wday) | |||||
for task in tasks: | for task in tasks: | ||||
if isinstance(task, list): # they've specified kwargs | if isinstance(task, list): # they've specified kwargs | ||||
@@ -106,6 +106,13 @@ def start(task_name, **kwargs): | |||||
task_thread.start() | task_thread.start() | ||||
def get(task_name): | |||||
"""Return the class instance associated with a certain task name. | |||||
Will raise KeyError if the task is not found. | |||||
""" | |||||
return _tasks[task_name] | |||||
def get_all(): | def get_all(): | ||||
"""Return our dict of all loaded tasks.""" | """Return our dict of all loaded tasks.""" | ||||
return _tasks | return _tasks |
@@ -0,0 +1,151 @@ | |||||
# -*- coding: utf-8 -*- | |||||
from datetime import datetime, timedelta | |||||
from os.path import expanduser | |||||
from threading import Lock | |||||
from classes import BaseTask | |||||
import wiki | |||||
# Valid submission statuses: | |||||
STATUS_NONE = 0 | |||||
STATUS_PEND = 1 | |||||
STATUS_DECLINE = 2 | |||||
STATUS_ACCEPT = 3 | |||||
class Task(BaseTask): | |||||
"""A task to generate charts about AfC submissions over time. | |||||
The main function of the task is to work through the "AfC submissions by | |||||
date" categories (e.g. [[Category:AfC submissions by date/12 July 2011]]) | |||||
and determine the number of declined, accepted, and currently pending | |||||
submissions every day. | |||||
This information is saved to a MySQL database ("u_earwig_afc_history") and | |||||
used to generate attractive graphs showing the number of AfC submissions | |||||
over time. | |||||
""" | |||||
name = "afc_history" | |||||
def __init__(self): | |||||
cfg = config.tasks.get(self.name, {}) | |||||
self.destination = cfg.get("destination", "afc_history.png") | |||||
self.categories = cfg.get("categories", {}) | |||||
# Connection data for our SQL database: | |||||
kwargs = cfg.get("sql", {}) | |||||
kwargs["read_default_file"] = expanduser("~/.my.cnf") | |||||
self.conn_data = kwargs | |||||
self.db_access_lock = Lock() | |||||
def run(self, **kwargs): | |||||
self.site = wiki.get_site() | |||||
with self.db_access_lock: | |||||
self.conn = oursql.connect(**self.conn_data) | |||||
action = kwargs.get("action") | |||||
try: | |||||
if action == "update": | |||||
self.update(kwargs.get("days", 90)) | |||||
elif action == "generate": | |||||
self.generate(kwargs.get("days", 90)) | |||||
finally: | |||||
self.conn.close() | |||||
def update(self, num_days): | |||||
self.logger.info("Updating past {0} days".format(num_days)) | |||||
generator = self.backwards_cat_iterator() | |||||
for d in xrange(num_days): | |||||
category = generator.next() | |||||
date = category.title().split("/")[-1] | |||||
self.update_date(date, category) | |||||
self.logger.info("Update complete") | |||||
def generate(self, data): | |||||
self.logger.info("Generating chart for past {0} days".format(num_days)) | |||||
data = {} | |||||
generator = self.backwards_cat_iterator() | |||||
for d in xrange(num_days): | |||||
category = generator.next() | |||||
date = category.title().split("/")[-1] | |||||
data[date] = self.get_date_counts(date) | |||||
dest = expanduser(self.destination) | |||||
with open(dest, "wb") as fp: | |||||
fp.write(data) | |||||
self.logger.info("Chart saved to {0}".format(dest)) | |||||
def backwards_cat_iterator(self): | |||||
date_base = self.categories["dateBase"] | |||||
current = datetime.utcnow() | |||||
while 1: | |||||
subcat = current.stftime("%d %B %Y") | |||||
title = "/".join((date_base, subcat)) | |||||
yield self.site.get_category(title) | |||||
current -= timedelta(1) # Subtract one day from date | |||||
def update_date(self, date, category): | |||||
msg = "Updating {0} ([[{1}]])".format(date, category.title()) | |||||
self.logger.debug(msg) | |||||
q_select = "SELECT page_id, page_status FROM page WHERE page_date = ?" | |||||
q_delete = "DELETE FROM page WHERE page_id = ?" | |||||
q_update = "UPDATE page SET page_status = ? WHERE page_id = ?" | |||||
q_insert = "INSERT INTO page VALUES (?, ?, ?)" | |||||
members = category.members(use_sql=True) | |||||
tracked = [] | |||||
statuses = {} | |||||
with self.conn.cursor() as cursor: | |||||
cursor.execute(q_select, (date,)) | |||||
for pageid, status in cursor: | |||||
tracked.append(pageid) | |||||
statuses[pageid] = status | |||||
for title, pageid in members: | |||||
status = self.get_status(title, pageid) | |||||
if status == STATUS_NONE: | |||||
if pageid in tracked: | |||||
cursor.execute(q_delete, (pageid,)) | |||||
continue | |||||
if pageid in tracked: | |||||
if status != statuses[pageid]: | |||||
cursor.execute(q_update, (status, pageid)) | |||||
else: | |||||
cursor.execute(q_insert, (pageid, date, status)) | |||||
def get_status(self, title, pageid): | |||||
page = self.site.get_page(title) | |||||
ns = page.namespace() | |||||
if ns == wiki.NS_FILE_TALK: # Ignore accepted FFU requests | |||||
return CHART_NONE | |||||
if ns == wiki.NS_TALK: | |||||
new_page = page.toggle_talk() | |||||
if new_page.is_redirect(): | |||||
return CHART_NONE # Ignore accepted AFC/R requests | |||||
return CHART_ACCEPT | |||||
cats = self.categories | |||||
query = "SELECT 1 FROM categorylinks WHERE cl_from = ? AND cl_to = ?" | |||||
match = lambda cat: list(self.site.sql_query(query, (cat, pageid))) | |||||
if match(cats["pending"]): | |||||
return STATUS_PEND | |||||
elif match(cats["unsubmitted"]): | |||||
return STATUS_NONE | |||||
elif match(cats["declined"]): | |||||
return STATUS_DECLINE | |||||
return STATUS_NONE | |||||
def get_date_counts(self, date): | |||||
query = "SELECT COUNT(*) FROM page WHERE page_date = ? AND page_status = ?" | |||||
statuses = [STATUS_PEND, STATUS_DECLINE, STATUS_ACCEPT] | |||||
counts = {} | |||||
with self.conn.cursor() as cursor: | |||||
for status in statuses: | |||||
cursor.execute(query, (date, status)) | |||||
count = cursor.fetchall()[0][0] | |||||
counts[status] = count | |||||
return counts |
@@ -39,7 +39,7 @@ class Task(BaseTask): | |||||
# Set some wiki-related attributes: | # Set some wiki-related attributes: | ||||
self.pagename = cfg.get("page", "Template:AFC statistics") | self.pagename = cfg.get("page", "Template:AFC statistics") | ||||
self.pending_cat = cfg.get("pending", "Pending AfC submissions") | self.pending_cat = cfg.get("pending", "Pending AfC submissions") | ||||
self.ignore_list = cfg.get("ignore_list", []) | |||||
self.ignore_list = cfg.get("ignoreList", []) | |||||
default_summary = "Updating statistics for [[WP:WPAFC|WikiProject Articles for creation]]." | default_summary = "Updating statistics for [[WP:WPAFC|WikiProject Articles for creation]]." | ||||
self.summary = self.make_summary(cfg.get("summary", default_summary)) | self.summary = self.make_summary(cfg.get("summary", default_summary)) | ||||