Browse Source

Some SQL updates, starting work on afc_history task.

* get() -> return a Task instance by name (tasks)
* Using SQL to save API queries. (commands.{afc_report,afc_status})
* ignore_list -> ignoreList in config. (tasks.afc_statistics)
tags/v0.1^2
Ben Kurtovic 13 years ago
parent
commit
d949269944
5 changed files with 202 additions and 40 deletions
  1. +39
    -34
      bot/commands/afc_report.py
  2. +2
    -3
      bot/commands/afc_status.py
  3. +9
    -2
      bot/tasks/__init__.py
  4. +151
    -0
      bot/tasks/afc_history.py
  5. +1
    -1
      bot/tasks/afc_statistics.py

+ 39
- 34
bot/commands/afc_report.py View File

@@ -3,6 +3,7 @@
import re

from classes import BaseCommand
import tasks
import wiki

class Command(BaseCommand):
@@ -14,41 +15,50 @@ class Command(BaseCommand):
self.site._maxlag = None
self.data = data

try:
self.statistics = tasks.get("afc_statistics")
except KeyError:
e = "Cannot run command: requires afc_statistics task."
self.logger.error(e)
return

if not data.args:
msg = "what submission do you want me to give information about?"
self.connection.reply(data, msg)
return

title = ' '.join(data.args)
title = " ".join(data.args)
title = title.replace("http://en.wikipedia.org/wiki/", "")
title = title.replace("http://enwp.org/", "").strip()

# Given '!report Foo', first try [[Foo]]:
if self.report(title):
return
page = self.get_page(title)
if page:
return self.report(page)

# Then try [[Wikipedia:Articles for creation/Foo]]:
title2 = "".join(("Wikipedia:Articles for creation/", title))
if self.report(title2):
return
newtitle = "/".join(("Wikipedia:Articles for creation", title))
page = self.get_page(newtitle)
if page:
return self.report(page)

# Then try [[Wikipedia talk:Articles for creation/Foo]]:
title3 = "".join(("Wikipedia talk:Articles for creation/", title))
if self.report(title3):
return
newtitle = "/".join(("Wikipedia talk:Articles for creation", title))
page = self.get_page(newtitle)
if page:
return self.report(page)

msg = "submission \x0302{0}\x0301 not found.".format(title)
self.connection.reply(data, msg)

def report(self, title):
data = self.data
def get_page(self, title):
page = self.site.get_page(title, follow_redirects=False)
if not page.exists()[0]:
return
if page.exists()[0]:
return page

def report(self, page):
url = page.url().replace("en.wikipedia.org/wiki", "enwp.org")
short = re.sub("wikipedia( talk)?\:articles for creation\/", "", title,
flags=re.IGNORECASE)
short = self.statistics.get_short_title(page.title())
status = self.get_status(page)
user = self.site.get_user(page.creator())
user_name = user.name()
@@ -60,31 +70,26 @@ class Command(BaseCommand):
if status == "accepted":
msg3 = "Reviewed by \x0302{0}\x0301 ({1})"

self.connection.reply(data, msg1.format(short, url))
self.connection.say(data.chan, msg2.format(status))
self.connection.say(data.chan, msg3.format(user_name, user_url))

return True
self.connection.reply(self.data, msg1.format(short, url))
self.connection.say(self.data.chan, msg2.format(status))
self.connection.say(self.data.chan, msg3.format(user_name, user_url))

def get_status(self, page):
content = page.get()

if page.is_redirect():
target = page.get_redirect_target()
if self.site.get_page(target).namespace() == 0:
if self.site.get_page(target).namespace() == wiki.NS_MAIN:
return "accepted"
return "redirect"
elif re.search("\{\{afc submission\|r\|(.*?)\}\}", content, re.I):

statuses = self.statistics.get_statuses(page.get())
if "R" in statuses:
return "being reviewed"
elif re.search("\{\{afc submission\|h?\|(.*?)\}\}", content, re.I):
return "pending"
elif re.search("\{\{afc submission\|t\|(.*?)\}\}", content, re.I):
elif "H" in statuses:
return "pending draft"
elif "P" in statuses:
return "pending submission"
elif "T" in statuses:
return "unsubmitted draft"
elif re.search("\{\{afc submission\|d\|(.*?)\}\}", content, re.I):
regex = "\{\{afc submission\|d\|(.*?)(\||\}\})"
try:
reason = re.findall(regex, content, re.I)[0][0]
except IndexError:
return "declined"
return "declined with reason \"{0}\"".format(reason)
elif "D" in statuses:
return "declined"
return "unkown"

+ 2
- 3
bot/commands/afc_status.py View File

@@ -92,12 +92,11 @@ class Command(BaseCommand):
def count_submissions(self):
"""Returns the number of open AFC submissions (count of CAT:PEND)."""
cat = self.site.get_category("Pending AfC submissions")
subs = len(cat.members(limit=500))
subs = len(cat.members(limit=2500, use_sql=True))

# Remove [[Wikipedia:Articles for creation/Redirects]] and
# [[Wikipedia:Files for upload]], which aren't real submissions:
subs -= 2
return subs
return subs - 2

def count_redirects(self):
"""Returns the number of open redirect submissions. Calculated as the


+ 9
- 2
bot/tasks/__init__.py View File

@@ -16,7 +16,7 @@ import time
from classes import BaseTask
import config

__all__ = ["load", "schedule", "start", "get_all"]
__all__ = ["load", "schedule", "start", "get", "get_all"]

# Base directory when searching for tasks:
base_dir = os.path.join(config.root_dir, "bot", "tasks")
@@ -77,7 +77,7 @@ def schedule(now=time.gmtime()):
"""Start all tasks that are supposed to be run at a given time."""
# Get list of tasks to run this turn:
tasks = config.schedule(now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon,
now.tm_wday)
now.tm_wday)

for task in tasks:
if isinstance(task, list): # they've specified kwargs
@@ -106,6 +106,13 @@ def start(task_name, **kwargs):

task_thread.start()

def get(task_name):
"""Return the class instance associated with a certain task name.

Will raise KeyError if the task is not found.
"""
return _tasks[task_name]

def get_all():
"""Return our dict of all loaded tasks."""
return _tasks

+ 151
- 0
bot/tasks/afc_history.py View File

@@ -0,0 +1,151 @@
# -*- coding: utf-8 -*-

from datetime import datetime, timedelta
from os.path import expanduser
from threading import Lock

from classes import BaseTask
import wiki

# Valid submission statuses:
STATUS_NONE = 0
STATUS_PEND = 1
STATUS_DECLINE = 2
STATUS_ACCEPT = 3

class Task(BaseTask):
"""A task to generate charts about AfC submissions over time.

The main function of the task is to work through the "AfC submissions by
date" categories (e.g. [[Category:AfC submissions by date/12 July 2011]])
and determine the number of declined, accepted, and currently pending
submissions every day.

This information is saved to a MySQL database ("u_earwig_afc_history") and
used to generate attractive graphs showing the number of AfC submissions
over time.
"""
name = "afc_history"

def __init__(self):
cfg = config.tasks.get(self.name, {})
self.destination = cfg.get("destination", "afc_history.png")
self.categories = cfg.get("categories", {})

# Connection data for our SQL database:
kwargs = cfg.get("sql", {})
kwargs["read_default_file"] = expanduser("~/.my.cnf")
self.conn_data = kwargs
self.db_access_lock = Lock()

def run(self, **kwargs):
self.site = wiki.get_site()
with self.db_access_lock:
self.conn = oursql.connect(**self.conn_data)
action = kwargs.get("action")
try:
if action == "update":
self.update(kwargs.get("days", 90))
elif action == "generate":
self.generate(kwargs.get("days", 90))
finally:
self.conn.close()

def update(self, num_days):
self.logger.info("Updating past {0} days".format(num_days))
generator = self.backwards_cat_iterator()
for d in xrange(num_days):
category = generator.next()
date = category.title().split("/")[-1]
self.update_date(date, category)
self.logger.info("Update complete")

def generate(self, data):
self.logger.info("Generating chart for past {0} days".format(num_days))
data = {}
generator = self.backwards_cat_iterator()
for d in xrange(num_days):
category = generator.next()
date = category.title().split("/")[-1]
data[date] = self.get_date_counts(date)

dest = expanduser(self.destination)
with open(dest, "wb") as fp:
fp.write(data)
self.logger.info("Chart saved to {0}".format(dest))

def backwards_cat_iterator(self):
date_base = self.categories["dateBase"]
current = datetime.utcnow()
while 1:
subcat = current.stftime("%d %B %Y")
title = "/".join((date_base, subcat))
yield self.site.get_category(title)
current -= timedelta(1) # Subtract one day from date

def update_date(self, date, category):
msg = "Updating {0} ([[{1}]])".format(date, category.title())
self.logger.debug(msg)

q_select = "SELECT page_id, page_status FROM page WHERE page_date = ?"
q_delete = "DELETE FROM page WHERE page_id = ?"
q_update = "UPDATE page SET page_status = ? WHERE page_id = ?"
q_insert = "INSERT INTO page VALUES (?, ?, ?)"
members = category.members(use_sql=True)
tracked = []
statuses = {}

with self.conn.cursor() as cursor:
cursor.execute(q_select, (date,))
for pageid, status in cursor:
tracked.append(pageid)
statuses[pageid] = status

for title, pageid in members:
status = self.get_status(title, pageid)
if status == STATUS_NONE:
if pageid in tracked:
cursor.execute(q_delete, (pageid,))
continue
if pageid in tracked:
if status != statuses[pageid]:
cursor.execute(q_update, (status, pageid))
else:
cursor.execute(q_insert, (pageid, date, status))

def get_status(self, title, pageid):
page = self.site.get_page(title)
ns = page.namespace()

if ns == wiki.NS_FILE_TALK: # Ignore accepted FFU requests
return CHART_NONE

if ns == wiki.NS_TALK:
new_page = page.toggle_talk()
if new_page.is_redirect():
return CHART_NONE # Ignore accepted AFC/R requests
return CHART_ACCEPT

cats = self.categories
query = "SELECT 1 FROM categorylinks WHERE cl_from = ? AND cl_to = ?"
match = lambda cat: list(self.site.sql_query(query, (cat, pageid)))

if match(cats["pending"]):
return STATUS_PEND
elif match(cats["unsubmitted"]):
return STATUS_NONE
elif match(cats["declined"]):
return STATUS_DECLINE
return STATUS_NONE

def get_date_counts(self, date):
query = "SELECT COUNT(*) FROM page WHERE page_date = ? AND page_status = ?"
statuses = [STATUS_PEND, STATUS_DECLINE, STATUS_ACCEPT]
counts = {}
with self.conn.cursor() as cursor:
for status in statuses:
cursor.execute(query, (date, status))
count = cursor.fetchall()[0][0]
counts[status] = count
return counts

+ 1
- 1
bot/tasks/afc_statistics.py View File

@@ -39,7 +39,7 @@ class Task(BaseTask):
# Set some wiki-related attributes:
self.pagename = cfg.get("page", "Template:AFC statistics")
self.pending_cat = cfg.get("pending", "Pending AfC submissions")
self.ignore_list = cfg.get("ignore_list", [])
self.ignore_list = cfg.get("ignoreList", [])
default_summary = "Updating statistics for [[WP:WPAFC|WikiProject Articles for creation]]."
self.summary = self.make_summary(cfg.get("summary", default_summary))



Loading…
Cancel
Save