* get() -> return a Task instance by name (tasks) * Using SQL to save API queries. (commands.{afc_report,afc_status}) * ignore_list -> ignoreList in config. (tasks.afc_statistics)tags/v0.1^2
@@ -3,6 +3,7 @@ | |||
import re | |||
from classes import BaseCommand | |||
import tasks | |||
import wiki | |||
class Command(BaseCommand): | |||
@@ -14,41 +15,50 @@ class Command(BaseCommand): | |||
self.site._maxlag = None | |||
self.data = data | |||
try: | |||
self.statistics = tasks.get("afc_statistics") | |||
except KeyError: | |||
e = "Cannot run command: requires afc_statistics task." | |||
self.logger.error(e) | |||
return | |||
if not data.args: | |||
msg = "what submission do you want me to give information about?" | |||
self.connection.reply(data, msg) | |||
return | |||
title = ' '.join(data.args) | |||
title = " ".join(data.args) | |||
title = title.replace("http://en.wikipedia.org/wiki/", "") | |||
title = title.replace("http://enwp.org/", "").strip() | |||
# Given '!report Foo', first try [[Foo]]: | |||
if self.report(title): | |||
return | |||
page = self.get_page(title) | |||
if page: | |||
return self.report(page) | |||
# Then try [[Wikipedia:Articles for creation/Foo]]: | |||
title2 = "".join(("Wikipedia:Articles for creation/", title)) | |||
if self.report(title2): | |||
return | |||
newtitle = "/".join(("Wikipedia:Articles for creation", title)) | |||
page = self.get_page(newtitle) | |||
if page: | |||
return self.report(page) | |||
# Then try [[Wikipedia talk:Articles for creation/Foo]]: | |||
title3 = "".join(("Wikipedia talk:Articles for creation/", title)) | |||
if self.report(title3): | |||
return | |||
newtitle = "/".join(("Wikipedia talk:Articles for creation", title)) | |||
page = self.get_page(newtitle) | |||
if page: | |||
return self.report(page) | |||
msg = "submission \x0302{0}\x0301 not found.".format(title) | |||
self.connection.reply(data, msg) | |||
def report(self, title): | |||
data = self.data | |||
def get_page(self, title): | |||
page = self.site.get_page(title, follow_redirects=False) | |||
if not page.exists()[0]: | |||
return | |||
if page.exists()[0]: | |||
return page | |||
def report(self, page): | |||
url = page.url().replace("en.wikipedia.org/wiki", "enwp.org") | |||
short = re.sub("wikipedia( talk)?\:articles for creation\/", "", title, | |||
flags=re.IGNORECASE) | |||
short = self.statistics.get_short_title(page.title()) | |||
status = self.get_status(page) | |||
user = self.site.get_user(page.creator()) | |||
user_name = user.name() | |||
@@ -60,31 +70,26 @@ class Command(BaseCommand): | |||
if status == "accepted": | |||
msg3 = "Reviewed by \x0302{0}\x0301 ({1})" | |||
self.connection.reply(data, msg1.format(short, url)) | |||
self.connection.say(data.chan, msg2.format(status)) | |||
self.connection.say(data.chan, msg3.format(user_name, user_url)) | |||
return True | |||
self.connection.reply(self.data, msg1.format(short, url)) | |||
self.connection.say(self.data.chan, msg2.format(status)) | |||
self.connection.say(self.data.chan, msg3.format(user_name, user_url)) | |||
def get_status(self, page): | |||
content = page.get() | |||
if page.is_redirect(): | |||
target = page.get_redirect_target() | |||
if self.site.get_page(target).namespace() == 0: | |||
if self.site.get_page(target).namespace() == wiki.NS_MAIN: | |||
return "accepted" | |||
return "redirect" | |||
elif re.search("\{\{afc submission\|r\|(.*?)\}\}", content, re.I): | |||
statuses = self.statistics.get_statuses(page.get()) | |||
if "R" in statuses: | |||
return "being reviewed" | |||
elif re.search("\{\{afc submission\|h?\|(.*?)\}\}", content, re.I): | |||
return "pending" | |||
elif re.search("\{\{afc submission\|t\|(.*?)\}\}", content, re.I): | |||
elif "H" in statuses: | |||
return "pending draft" | |||
elif "P" in statuses: | |||
return "pending submission" | |||
elif "T" in statuses: | |||
return "unsubmitted draft" | |||
elif re.search("\{\{afc submission\|d\|(.*?)\}\}", content, re.I): | |||
regex = "\{\{afc submission\|d\|(.*?)(\||\}\})" | |||
try: | |||
reason = re.findall(regex, content, re.I)[0][0] | |||
except IndexError: | |||
return "declined" | |||
return "declined with reason \"{0}\"".format(reason) | |||
elif "D" in statuses: | |||
return "declined" | |||
return "unkown" |
@@ -92,12 +92,11 @@ class Command(BaseCommand): | |||
def count_submissions(self): | |||
"""Returns the number of open AFC submissions (count of CAT:PEND).""" | |||
cat = self.site.get_category("Pending AfC submissions") | |||
subs = len(cat.members(limit=500)) | |||
subs = len(cat.members(limit=2500, use_sql=True)) | |||
# Remove [[Wikipedia:Articles for creation/Redirects]] and | |||
# [[Wikipedia:Files for upload]], which aren't real submissions: | |||
subs -= 2 | |||
return subs | |||
return subs - 2 | |||
def count_redirects(self): | |||
"""Returns the number of open redirect submissions. Calculated as the | |||
@@ -16,7 +16,7 @@ import time | |||
from classes import BaseTask | |||
import config | |||
__all__ = ["load", "schedule", "start", "get_all"] | |||
__all__ = ["load", "schedule", "start", "get", "get_all"] | |||
# Base directory when searching for tasks: | |||
base_dir = os.path.join(config.root_dir, "bot", "tasks") | |||
@@ -77,7 +77,7 @@ def schedule(now=time.gmtime()): | |||
"""Start all tasks that are supposed to be run at a given time.""" | |||
# Get list of tasks to run this turn: | |||
tasks = config.schedule(now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, | |||
now.tm_wday) | |||
now.tm_wday) | |||
for task in tasks: | |||
if isinstance(task, list): # they've specified kwargs | |||
@@ -106,6 +106,13 @@ def start(task_name, **kwargs): | |||
task_thread.start() | |||
def get(task_name): | |||
"""Return the class instance associated with a certain task name. | |||
Will raise KeyError if the task is not found. | |||
""" | |||
return _tasks[task_name] | |||
def get_all(): | |||
"""Return our dict of all loaded tasks.""" | |||
return _tasks |
@@ -0,0 +1,151 @@ | |||
# -*- coding: utf-8 -*- | |||
from datetime import datetime, timedelta | |||
from os.path import expanduser | |||
from threading import Lock | |||
from classes import BaseTask | |||
import wiki | |||
# Valid submission statuses: | |||
STATUS_NONE = 0 | |||
STATUS_PEND = 1 | |||
STATUS_DECLINE = 2 | |||
STATUS_ACCEPT = 3 | |||
class Task(BaseTask): | |||
"""A task to generate charts about AfC submissions over time. | |||
The main function of the task is to work through the "AfC submissions by | |||
date" categories (e.g. [[Category:AfC submissions by date/12 July 2011]]) | |||
and determine the number of declined, accepted, and currently pending | |||
submissions every day. | |||
This information is saved to a MySQL database ("u_earwig_afc_history") and | |||
used to generate attractive graphs showing the number of AfC submissions | |||
over time. | |||
""" | |||
name = "afc_history" | |||
def __init__(self): | |||
cfg = config.tasks.get(self.name, {}) | |||
self.destination = cfg.get("destination", "afc_history.png") | |||
self.categories = cfg.get("categories", {}) | |||
# Connection data for our SQL database: | |||
kwargs = cfg.get("sql", {}) | |||
kwargs["read_default_file"] = expanduser("~/.my.cnf") | |||
self.conn_data = kwargs | |||
self.db_access_lock = Lock() | |||
def run(self, **kwargs): | |||
self.site = wiki.get_site() | |||
with self.db_access_lock: | |||
self.conn = oursql.connect(**self.conn_data) | |||
action = kwargs.get("action") | |||
try: | |||
if action == "update": | |||
self.update(kwargs.get("days", 90)) | |||
elif action == "generate": | |||
self.generate(kwargs.get("days", 90)) | |||
finally: | |||
self.conn.close() | |||
def update(self, num_days): | |||
self.logger.info("Updating past {0} days".format(num_days)) | |||
generator = self.backwards_cat_iterator() | |||
for d in xrange(num_days): | |||
category = generator.next() | |||
date = category.title().split("/")[-1] | |||
self.update_date(date, category) | |||
self.logger.info("Update complete") | |||
def generate(self, data): | |||
self.logger.info("Generating chart for past {0} days".format(num_days)) | |||
data = {} | |||
generator = self.backwards_cat_iterator() | |||
for d in xrange(num_days): | |||
category = generator.next() | |||
date = category.title().split("/")[-1] | |||
data[date] = self.get_date_counts(date) | |||
dest = expanduser(self.destination) | |||
with open(dest, "wb") as fp: | |||
fp.write(data) | |||
self.logger.info("Chart saved to {0}".format(dest)) | |||
def backwards_cat_iterator(self): | |||
date_base = self.categories["dateBase"] | |||
current = datetime.utcnow() | |||
while 1: | |||
subcat = current.stftime("%d %B %Y") | |||
title = "/".join((date_base, subcat)) | |||
yield self.site.get_category(title) | |||
current -= timedelta(1) # Subtract one day from date | |||
def update_date(self, date, category): | |||
msg = "Updating {0} ([[{1}]])".format(date, category.title()) | |||
self.logger.debug(msg) | |||
q_select = "SELECT page_id, page_status FROM page WHERE page_date = ?" | |||
q_delete = "DELETE FROM page WHERE page_id = ?" | |||
q_update = "UPDATE page SET page_status = ? WHERE page_id = ?" | |||
q_insert = "INSERT INTO page VALUES (?, ?, ?)" | |||
members = category.members(use_sql=True) | |||
tracked = [] | |||
statuses = {} | |||
with self.conn.cursor() as cursor: | |||
cursor.execute(q_select, (date,)) | |||
for pageid, status in cursor: | |||
tracked.append(pageid) | |||
statuses[pageid] = status | |||
for title, pageid in members: | |||
status = self.get_status(title, pageid) | |||
if status == STATUS_NONE: | |||
if pageid in tracked: | |||
cursor.execute(q_delete, (pageid,)) | |||
continue | |||
if pageid in tracked: | |||
if status != statuses[pageid]: | |||
cursor.execute(q_update, (status, pageid)) | |||
else: | |||
cursor.execute(q_insert, (pageid, date, status)) | |||
def get_status(self, title, pageid): | |||
page = self.site.get_page(title) | |||
ns = page.namespace() | |||
if ns == wiki.NS_FILE_TALK: # Ignore accepted FFU requests | |||
return CHART_NONE | |||
if ns == wiki.NS_TALK: | |||
new_page = page.toggle_talk() | |||
if new_page.is_redirect(): | |||
return CHART_NONE # Ignore accepted AFC/R requests | |||
return CHART_ACCEPT | |||
cats = self.categories | |||
query = "SELECT 1 FROM categorylinks WHERE cl_from = ? AND cl_to = ?" | |||
match = lambda cat: list(self.site.sql_query(query, (cat, pageid))) | |||
if match(cats["pending"]): | |||
return STATUS_PEND | |||
elif match(cats["unsubmitted"]): | |||
return STATUS_NONE | |||
elif match(cats["declined"]): | |||
return STATUS_DECLINE | |||
return STATUS_NONE | |||
def get_date_counts(self, date): | |||
query = "SELECT COUNT(*) FROM page WHERE page_date = ? AND page_status = ?" | |||
statuses = [STATUS_PEND, STATUS_DECLINE, STATUS_ACCEPT] | |||
counts = {} | |||
with self.conn.cursor() as cursor: | |||
for status in statuses: | |||
cursor.execute(query, (date, status)) | |||
count = cursor.fetchall()[0][0] | |||
counts[status] = count | |||
return counts |
@@ -39,7 +39,7 @@ class Task(BaseTask): | |||
# Set some wiki-related attributes: | |||
self.pagename = cfg.get("page", "Template:AFC statistics") | |||
self.pending_cat = cfg.get("pending", "Pending AfC submissions") | |||
self.ignore_list = cfg.get("ignore_list", []) | |||
self.ignore_list = cfg.get("ignoreList", []) | |||
default_summary = "Updating statistics for [[WP:WPAFC|WikiProject Articles for creation]]." | |||
self.summary = self.make_summary(cfg.get("summary", default_summary)) | |||