From d57f2623d23009bdedfce441abed6266fdf8f095 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 22 Jun 2011 20:11:29 -0400 Subject: [PATCH] parse remaining parts of config.xml; getting rid of config.watcher and moving to irc/watcher_logic.py; convert irc/watcher.py and wiki/task_manager.py to new config system; other changes/fixes/whatever --- core/config.py | 53 +++++++++++++++++++++++++++++-------- core/main.py | 6 ++--- irc/frontend.py | 2 +- irc/watcher.py | 66 ++++++++++++++++++++++++++++++---------------- irc/watcher_logic.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++ wiki/task_manager.py | 70 ++++++++++++++++++++++++++++++------------------- 6 files changed, 206 insertions(+), 65 deletions(-) create mode 100644 irc/watcher_logic.py diff --git a/core/config.py b/core/config.py index 486367a..b1331ce 100644 --- a/core/config.py +++ b/core/config.py @@ -8,13 +8,12 @@ including encrypting and decrypting passwords and making a new config file from scratch at the inital bot run. Usually you'll just want to do "from core import config" and access config data -from within config's five global variables: +from within config's four global variables: * config.components * config.wiki * config.irc * config.schedule -* config.watcher """ from collections import defaultdict @@ -170,10 +169,10 @@ def parse_config(key): exit(1) def _parse_config(key): - """Parse config data from a DOM object into the five global variables that + """Parse config data from a DOM object into the four global variables that store our config info. The key is used to unencrypt passwords stored in the XML config file.""" - global components, wiki, irc, schedule, watcher + global components, wiki, irc, schedule _load_config() # we might be re-loading unnecessarily here, but no harm in # that! @@ -183,7 +182,6 @@ def _parse_config(key): wiki = parse_wiki(data, key) irc = parse_irc(data, key) schedule = parse_schedule(data) - watcher = parse_watcher(data) def parse_components(data): """Parse everything within the XML tag of our config file. @@ -270,11 +268,44 @@ def parse_irc(data, key): irc.permissions[group_name].append(hostname) return irc - + def parse_schedule(data): - """Parse everything within the tag of our XML config file.""" - pass + """Store the element in schedule.data and the _schedule() + function as schedule.check().""" + schedule = Container() + schedule.check = _schedule + schedule.data = get_first_element(data, "schedule") + return schedule -def parse_watcher(data): - """Parse everything within the tag of our XML config file.""" - pass +def _schedule(minute, hour, month_day, month, week_day): + """Return a list of tasks that are scheduled to run at the time specified + by the function args. The schedule data comes from our config file's + tag, which is stored as schedule.data. Call this function with + config.schedule.check(args).""" + tasks = [] # tasks to run this turn, each as a tuple of (task_name, + # kwargs), or just task_name + + now = {"minute": minute, "hour": hour, "month_day": month_day, + "month": month, "week_day": week_day} + + for when in schedule.data.getElementsByTagName("when"): + do = True + for key, value in now.items(): + if when.hasAttribute(key): + req = when.getAttribute(key) + if attribute_to_int(req, when, key) != value: + do = False + break + if do: + for task in when.getElementsByTagName("task"): + name = get_required_attribute(task, "name") + args = dict() + for key in task.attributes.keys(): + args[key] = task.getAttribute(key) + del args["name"] + if args: + tasks.append((name, args)) + else: + tasks.append(name) + + return tasks diff --git a/core/main.py b/core/main.py index 31d6149..649e1f6 100644 --- a/core/main.py +++ b/core/main.py @@ -42,8 +42,8 @@ root_dir = os.path.split(script_dir)[0] # the bot's "root" directory relative sys.path.append(root_dir) # make sure we look in the root dir for modules from core import config -from irc import frontend#, watcher -#from wiki import task_manager +from irc import frontend, watcher +from wiki import task_manager f_conn = None w_conn = None @@ -126,7 +126,7 @@ def run(): task_manager.load_tasks() # watcher on another thread iff it if components["irc_watcher"]: # is enabled print "\nStarting IRC watcher..." - t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,)) + t_watcher = threading.Thread(target=irc_watcher, args=()) t_watcher.name = "irc-watcher" t_watcher.daemon = True t_watcher.start() diff --git a/irc/frontend.py b/irc/frontend.py index d8d4991..38d326a 100644 --- a/irc/frontend.py +++ b/irc/frontend.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ -EarwigBot's Front-end IRC Component +EarwigBot's IRC Front-end Component The IRC frontend runs on a normal IRC server and expects users to interact with it and give it commands. Commands are stored as "command classes", subclasses diff --git a/irc/watcher.py b/irc/watcher.py index 2aff8dd..bbc17c9 100644 --- a/irc/watcher.py +++ b/irc/watcher.py @@ -1,20 +1,34 @@ # -*- coding: utf-8 -*- -## Imports -from config.irc import * -from config.main import * -from config.watcher import * +""" +EarwigBot's IRC Watcher Component +The IRC watcher runs on a wiki recent-changes server and listens for edits. +Users cannot interact with this part of the bot. When an event occurs, run it +through irc/watcher_logic.py's process() function, which can result in either +wiki bot tasks being started (listed in wiki/tasks/) or messages being sent to +channels in the IRC frontend. +""" + +from core import config from irc.connection import * from irc.rc import RC +from irc import watcher_logic -global frontend_conn +frontend_conn = None def get_connection(): - connection = Connection(WATCHER_HOST, WATCHER_PORT, NICK, IDENT, REALNAME) + """Return a new Connection() instance with information about our server + connection, but don't actually connect yet.""" + cf = config.irc.watcher + connection = Connection(cf.host, cf.port, cf.nick, cf.nick, cf.realname) return connection -def main(connection, f_conn): +def main(connection, f_conn=None): + """Main loop for the Watcher IRC Bot component. get_connection() should + have already been called and the connection should have been started with + connection.connect(). Accept the frontend connection as well as an optional + parameter in order to send messages directly to frontend IRC channels.""" global frontend_conn frontend_conn = f_conn read_buffer = str() @@ -33,26 +47,32 @@ def main(connection, f_conn): if line[1] == "PRIVMSG": chan = line[2] - if chan != WATCHER_CHAN: # if we're getting a msg from another channel, ignore it + + # ignore messages originating from channels not in our list, to + # prevent someone PMing us false data + if chan not in config.irc.watcher.channels: continue msg = ' '.join(line[3:])[1:] - rc = RC(msg) # create a new RC object to store this change's data - rc.parse() - check(rc) + rc = RC(msg) # new RC object to store this event's data + rc.parse() # parse a message into pagenames, usernames, etc. + process(rc) # report to frontend channels or start tasks - if line[0] == "PING": # If we are pinged, pong back to the server + if line[0] == "PING": # if we are pinged, pong back to the server connection.send("PONG %s" % line[1]) - if line[1] == "376": # Join the recent changes channel when we've finished starting up - connection.join(WATCHER_CHAN) - -def check(rc): - """check if we're supposed to report this message anywhere""" - results = process(rc) # process the message in config/watcher.py, and get a list of channels to send it to - if not results: - return - pretty = rc.get_pretty() - if enable_irc_frontend: - for chan in results: + # when we've finished starting up, join all watcher channels + if line[1] == "376": + for chan in config.irc.watcher.channels: + connection.join(chan) + +def process(rc): + """Process a message from IRC (technically, an RC object). The actual + processing is configurable, so we don't have that hard-coded here. We + simply call irc/watcher_logic.py's process() function and expect a list of + channels back, which we report the event data to.""" + chans = watcher_logic.process(rc) + if chans and frontend_conn: + pretty = rc.get_pretty() + for chan in chans: frontend_conn.say(chan, pretty) diff --git a/irc/watcher_logic.py b/irc/watcher_logic.py new file mode 100644 index 0000000..bcce171 --- /dev/null +++ b/irc/watcher_logic.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- + +""" +EarwigBot's IRC Watcher Logic + +This file contains (configurable!) rules that EarwigBot's watcher uses after it +recieves an event from IRC. + +This should, ideally, be in config.xml somehow, but Python code makes more +sense for this sort of thing... so... +""" + +import re + +from wiki import task_manager as tasks + +afc_prefix = "wikipedia( talk)?:(wikiproject )?articles for creation" + +# compile some regexps used when finding specific events +r_page = re.compile(afc_prefix) +r_ffu = re.compile("wikipedia( talk)?:files for upload") +r_move1 = re.compile("moved \[\[{}".format(afc_prefix)) +r_move2 = re.compile("moved \[\[(.*?)\]\] to \[\[{}".format(afc_prefix)) +r_moved_pages = re.compile("^moved \[\[(.*?)\]\] to \[\[(.*?)\]\]") +r_delete = re.compile("deleted \"\[\[{}".format(afc_prefix)) +r_deleted_page = re.compile("^deleted \"\[\[(.*?)\]\]") +r_restore = re.compile("restored \"\[\[{}".format(afc_prefix)) +r_restored_page = re.compile("^restored \"\[\[(.*?)\]\]") +r_protect = re.compile("protected \"\[\[{}".format(afc_prefix)) + +def process(rc): + """Given an RC() object, return a list of channels to report this event to. + Also, start any wiki bot tasks within this function if necessary.""" + chans = set() # channels to report this message to + page_name = rc.page.lower() + comment = rc.comment.lower() + + if "!earwigbot" in rc.msg.lower(): + chans.update(("##earwigbot", "#wikipedia-en-afc")) + + if r_page.search(page_name): + tasks.start_task("afc_statistics", action="process_edit", page=rc.page) + tasks.start_task("afc_copyvios", action="process_edit", page=rc.page) + chans.add("#wikipedia-en-afc") + + elif r_ffu.match(page_name): + chans.add("#wikipedia-en-afc") + + elif page_name.startswith("template:afc submission"): + chans.add("#wikipedia-en-afc") + + elif rc.flags == "move" and (r_move1.match(comment) or + r_move2.match(comment)): + p = r_moved_pages.findall(rc.comment)[0] + tasks.start_task("afc_statistics", action="process_move", pages=p) + tasks.start_task("afc_copyvios", action="process_move", pages=p) + chans.add("#wikipedia-en-afc") + + elif rc.flags == "delete" and r_delete.match(comment): + p = r_deleted_page.findall(rc.comment)[0][0] + tasks.start_task("afc_statistics", action="process_delete", page=p) + tasks.start_task("afc_copyvios", action="process_delete", page=p) + chans.add("#wikipedia-en-afc") + + elif rc.flags == "restore" and r_restore.match(comment): + p = r_restored_page.findall(rc.comment)[0][0] + tasks.start_task("afc_statistics", action="process_restore", page=p) + tasks.start_task("afc_copyvios", action="process_restore", page=p) + chans.add("#wikipedia-en-afc") + + elif rc.flags == "protect" and r_protect.match(comment): + chans.add("#wikipedia-en-afc") + + return chans diff --git a/wiki/task_manager.py b/wiki/task_manager.py index b17cbbb..3c37a63 100644 --- a/wiki/task_manager.py +++ b/wiki/task_manager.py @@ -1,25 +1,32 @@ # -*- coding: utf-8 -*- -# A module to manage bot tasks. +""" +EarwigBot's Wiki Bot Task Manager + +This module provides some functions to run and load bot tasks from wiki/tasks/. +""" import time import traceback import threading import os -from config import schedule +from core import config -task_list = dict() # the key is the task's name, the value is the task's class instance +# store loaded tasks as a dict where the key is the task name and the value is +# an instance of the task class (wiki.tasks.task_file.Task()) +task_list = dict() def load_tasks(): - """Load all valid task classes from wiki/tasks/, and add them to the task_list.""" - files = os.listdir(os.path.join("wiki", "tasks")) # get all files in wiki/tasks/ - files.sort() # alphabetically sort list of files + """Load all valid task classes from wiki/tasks/, and add them to the + task_list variable.""" + files = os.listdir(os.path.join("wiki", "tasks")) + files.sort() # alphabetically sort all files in wiki/tasks/ for f in files: - if not os.path.isfile(os.path.join("wiki", "tasks", f)): # ignore non-files - continue - if f.startswith("_") or not f.endswith(".py"): # ignore non-python files or files beginning with "_" - continue + if not os.path.isfile(os.path.join("wiki", "tasks", f)): + continue # ignore non-files + if f.startswith("_") or not f.endswith(".py"): + continue # ignore non-python files or files beginning with an _ load_class_from_file(f) print "Found %s tasks: %s." % (len(task_list), ', '.join(task_list.keys())) @@ -27,10 +34,10 @@ def load_class_from_file(f): """Look in a given file for the task class.""" global task_list - module = f[:-3] # strip .py from end + module = f[:-3] # strip .py from end try: exec "from wiki.tasks import %s as m" % module - except: # importing the file failed for some reason... + except: # importing the file failed for some reason... print "Couldn't load task file %s:" % f traceback.print_exc() return @@ -46,26 +53,34 @@ def load_class_from_file(f): def start_tasks(now=time.gmtime()): """Start all tasks that are supposed to be run at a given time.""" - tasks = schedule.check(now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, now.tm_wday) # get list of tasks to run this turn + tasks = config.schedule.check(now.tm_min, now.tm_hour, now.tm_mday, + now.tm_mon, now.tm_wday) # get list of tasks to run this turn + for task in tasks: - if isinstance(task, tuple): # they've specified kwargs, so pass those to start_task - start_task(task[0], **task[1]) - else: # otherwise, just pass task_name + if isinstance(task, tuple): # they've specified kwargs + start_task(task[0], **task[1]) # so pass those to start_task + else: # otherwise, just pass task_name start_task(task) def start_task(task_name, **kwargs): - """Start a given task in a new thread. Pass args to the task's run function.""" - print "Starting task '{}' in a new thread...".format(task_name) - + """Start a given task in a new thread. Pass args to the task's run() + function.""" + print "Starting task '{0}' in a new thread...".format(task_name) + try: - task = task_list[task_name] # get the class for this task, a subclass of BaseTask + task = task_list[task_name] except KeyError: - print "Couldn't find task '{}': wiki/tasks/{}.py does not exist.".format(task_name, task_name) + print ("Couldn't find task '{0}': wiki/tasks/{1}.py does not " + + "exist.").format(task_name, task_name) return - - task_thread = threading.Thread(target=lambda: task_wrapper(task, **kwargs)) # Normally we'd do task_wrapper(task, **kwargs), but because of threading we'd have to do Thread(target=task_wrapper, args=(task, **kwargs)), which doesn't work because the **kwargs is inside a tuple, not inside function params. Use lambda to get around the args=tuple nonsense - task_thread.name = "{} ({})".format(task_name, time.strftime("%b %d %H:%M:%S")) - task_thread.daemon = True # stop bot task threads automagically if the main bot stops + + task_thread = threading.Thread(target=lambda: task_wrapper(task, **kwargs)) + task_thread.name = "{0} ({1})".format(task_name, time.strftime( + "%b %d %H:%M:%S")) + + # stop bot task threads automagically if the main bot stops + task_thread.daemon = True + task_thread.start() def task_wrapper(task, **kwargs): @@ -73,7 +88,8 @@ def task_wrapper(task, **kwargs): try: task.run(**kwargs) except: - print "Task '{}' raised an exception and had to stop:".format(task.task_name) + print "Task '{0}' raised an exception and had to stop:".format( + task.task_name) traceback.print_exc() else: - print "Task '{}' finished without error.".format(task.task_name) + print "Task '{0}' finished without error.".format(task.task_name)