Browse Source

parse remaining parts of config.xml; getting rid of config.watcher and moving to irc/watcher_logic.py; convert irc/watcher.py and wiki/task_manager.py to new config system; other changes/fixes/whatever

tags/v0.1^2
Ben Kurtovic 13 years ago
parent
commit
d57f2623d2
6 changed files with 206 additions and 65 deletions
  1. +42
    -11
      core/config.py
  2. +3
    -3
      core/main.py
  3. +1
    -1
      irc/frontend.py
  4. +43
    -23
      irc/watcher.py
  5. +74
    -0
      irc/watcher_logic.py
  6. +43
    -27
      wiki/task_manager.py

+ 42
- 11
core/config.py View File

@@ -8,13 +8,12 @@ including encrypting and decrypting passwords and making a new config file from
scratch at the inital bot run. scratch at the inital bot run.


Usually you'll just want to do "from core import config" and access config data Usually you'll just want to do "from core import config" and access config data
from within config's five global variables:
from within config's four global variables:


* config.components * config.components
* config.wiki * config.wiki
* config.irc * config.irc
* config.schedule * config.schedule
* config.watcher
""" """


from collections import defaultdict from collections import defaultdict
@@ -170,10 +169,10 @@ def parse_config(key):
exit(1) exit(1)


def _parse_config(key): def _parse_config(key):
"""Parse config data from a DOM object into the five global variables that
"""Parse config data from a DOM object into the four global variables that
store our config info. The key is used to unencrypt passwords stored in the store our config info. The key is used to unencrypt passwords stored in the
XML config file.""" XML config file."""
global components, wiki, irc, schedule, watcher
global components, wiki, irc, schedule


_load_config() # we might be re-loading unnecessarily here, but no harm in _load_config() # we might be re-loading unnecessarily here, but no harm in
# that! # that!
@@ -183,7 +182,6 @@ def _parse_config(key):
wiki = parse_wiki(data, key) wiki = parse_wiki(data, key)
irc = parse_irc(data, key) irc = parse_irc(data, key)
schedule = parse_schedule(data) schedule = parse_schedule(data)
watcher = parse_watcher(data)


def parse_components(data): def parse_components(data):
"""Parse everything within the <components> XML tag of our config file. """Parse everything within the <components> XML tag of our config file.
@@ -270,11 +268,44 @@ def parse_irc(data, key):
irc.permissions[group_name].append(hostname) irc.permissions[group_name].append(hostname)


return irc return irc
def parse_schedule(data): def parse_schedule(data):
"""Parse everything within the <schedule> tag of our XML config file."""
pass
"""Store the <schedule> element in schedule.data and the _schedule()
function as schedule.check()."""
schedule = Container()
schedule.check = _schedule
schedule.data = get_first_element(data, "schedule")
return schedule
def parse_watcher(data):
"""Parse everything within the <watcher> tag of our XML config file."""
pass
def _schedule(minute, hour, month_day, month, week_day):
"""Return a list of tasks that are scheduled to run at the time specified
by the function args. The schedule data comes from our config file's
<schedule> tag, which is stored as schedule.data. Call this function with
config.schedule.check(args)."""
tasks = [] # tasks to run this turn, each as a tuple of (task_name,
# kwargs), or just task_name

now = {"minute": minute, "hour": hour, "month_day": month_day,
"month": month, "week_day": week_day}

for when in schedule.data.getElementsByTagName("when"):
do = True
for key, value in now.items():
if when.hasAttribute(key):
req = when.getAttribute(key)
if attribute_to_int(req, when, key) != value:
do = False
break
if do:
for task in when.getElementsByTagName("task"):
name = get_required_attribute(task, "name")
args = dict()
for key in task.attributes.keys():
args[key] = task.getAttribute(key)
del args["name"]
if args:
tasks.append((name, args))
else:
tasks.append(name)

return tasks

+ 3
- 3
core/main.py View File

@@ -42,8 +42,8 @@ root_dir = os.path.split(script_dir)[0] # the bot's "root" directory relative
sys.path.append(root_dir) # make sure we look in the root dir for modules sys.path.append(root_dir) # make sure we look in the root dir for modules


from core import config from core import config
from irc import frontend#, watcher
#from wiki import task_manager
from irc import frontend, watcher
from wiki import task_manager


f_conn = None f_conn = None
w_conn = None w_conn = None
@@ -126,7 +126,7 @@ def run():
task_manager.load_tasks() # watcher on another thread iff it task_manager.load_tasks() # watcher on another thread iff it
if components["irc_watcher"]: # is enabled if components["irc_watcher"]: # is enabled
print "\nStarting IRC watcher..." print "\nStarting IRC watcher..."
t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,))
t_watcher = threading.Thread(target=irc_watcher, args=())
t_watcher.name = "irc-watcher" t_watcher.name = "irc-watcher"
t_watcher.daemon = True t_watcher.daemon = True
t_watcher.start() t_watcher.start()


+ 1
- 1
irc/frontend.py View File

@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-


""" """
EarwigBot's Front-end IRC Component
EarwigBot's IRC Front-end Component


The IRC frontend runs on a normal IRC server and expects users to interact with The IRC frontend runs on a normal IRC server and expects users to interact with
it and give it commands. Commands are stored as "command classes", subclasses it and give it commands. Commands are stored as "command classes", subclasses


+ 43
- 23
irc/watcher.py View File

@@ -1,20 +1,34 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-


## Imports
from config.irc import *
from config.main import *
from config.watcher import *
"""
EarwigBot's IRC Watcher Component


The IRC watcher runs on a wiki recent-changes server and listens for edits.
Users cannot interact with this part of the bot. When an event occurs, run it
through irc/watcher_logic.py's process() function, which can result in either
wiki bot tasks being started (listed in wiki/tasks/) or messages being sent to
channels in the IRC frontend.
"""

from core import config
from irc.connection import * from irc.connection import *
from irc.rc import RC from irc.rc import RC
from irc import watcher_logic


global frontend_conn
frontend_conn = None


def get_connection(): def get_connection():
connection = Connection(WATCHER_HOST, WATCHER_PORT, NICK, IDENT, REALNAME)
"""Return a new Connection() instance with information about our server
connection, but don't actually connect yet."""
cf = config.irc.watcher
connection = Connection(cf.host, cf.port, cf.nick, cf.nick, cf.realname)
return connection return connection


def main(connection, f_conn):
def main(connection, f_conn=None):
"""Main loop for the Watcher IRC Bot component. get_connection() should
have already been called and the connection should have been started with
connection.connect(). Accept the frontend connection as well as an optional
parameter in order to send messages directly to frontend IRC channels."""
global frontend_conn global frontend_conn
frontend_conn = f_conn frontend_conn = f_conn
read_buffer = str() read_buffer = str()
@@ -33,26 +47,32 @@ def main(connection, f_conn):


if line[1] == "PRIVMSG": if line[1] == "PRIVMSG":
chan = line[2] chan = line[2]
if chan != WATCHER_CHAN: # if we're getting a msg from another channel, ignore it

# ignore messages originating from channels not in our list, to
# prevent someone PMing us false data
if chan not in config.irc.watcher.channels:
continue continue


msg = ' '.join(line[3:])[1:] msg = ' '.join(line[3:])[1:]
rc = RC(msg) # create a new RC object to store this change's data
rc.parse()
check(rc)
rc = RC(msg) # new RC object to store this event's data
rc.parse() # parse a message into pagenames, usernames, etc.
process(rc) # report to frontend channels or start tasks


if line[0] == "PING": # If we are pinged, pong back to the server
if line[0] == "PING": # if we are pinged, pong back to the server
connection.send("PONG %s" % line[1]) connection.send("PONG %s" % line[1])


if line[1] == "376": # Join the recent changes channel when we've finished starting up
connection.join(WATCHER_CHAN)

def check(rc):
"""check if we're supposed to report this message anywhere"""
results = process(rc) # process the message in config/watcher.py, and get a list of channels to send it to
if not results:
return
pretty = rc.get_pretty()
if enable_irc_frontend:
for chan in results:
# when we've finished starting up, join all watcher channels
if line[1] == "376":
for chan in config.irc.watcher.channels:
connection.join(chan)

def process(rc):
"""Process a message from IRC (technically, an RC object). The actual
processing is configurable, so we don't have that hard-coded here. We
simply call irc/watcher_logic.py's process() function and expect a list of
channels back, which we report the event data to."""
chans = watcher_logic.process(rc)
if chans and frontend_conn:
pretty = rc.get_pretty()
for chan in chans:
frontend_conn.say(chan, pretty) frontend_conn.say(chan, pretty)

+ 74
- 0
irc/watcher_logic.py View File

@@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-

"""
EarwigBot's IRC Watcher Logic

This file contains (configurable!) rules that EarwigBot's watcher uses after it
recieves an event from IRC.

This should, ideally, be in config.xml somehow, but Python code makes more
sense for this sort of thing... so...
"""

import re

from wiki import task_manager as tasks

afc_prefix = "wikipedia( talk)?:(wikiproject )?articles for creation"

# compile some regexps used when finding specific events
r_page = re.compile(afc_prefix)
r_ffu = re.compile("wikipedia( talk)?:files for upload")
r_move1 = re.compile("moved \[\[{}".format(afc_prefix))
r_move2 = re.compile("moved \[\[(.*?)\]\] to \[\[{}".format(afc_prefix))
r_moved_pages = re.compile("^moved \[\[(.*?)\]\] to \[\[(.*?)\]\]")
r_delete = re.compile("deleted \"\[\[{}".format(afc_prefix))
r_deleted_page = re.compile("^deleted \"\[\[(.*?)\]\]")
r_restore = re.compile("restored \"\[\[{}".format(afc_prefix))
r_restored_page = re.compile("^restored \"\[\[(.*?)\]\]")
r_protect = re.compile("protected \"\[\[{}".format(afc_prefix))

def process(rc):
"""Given an RC() object, return a list of channels to report this event to.
Also, start any wiki bot tasks within this function if necessary."""
chans = set() # channels to report this message to
page_name = rc.page.lower()
comment = rc.comment.lower()
if "!earwigbot" in rc.msg.lower():
chans.update(("##earwigbot", "#wikipedia-en-afc"))
if r_page.search(page_name):
tasks.start_task("afc_statistics", action="process_edit", page=rc.page)
tasks.start_task("afc_copyvios", action="process_edit", page=rc.page)
chans.add("#wikipedia-en-afc")
elif r_ffu.match(page_name):
chans.add("#wikipedia-en-afc")
elif page_name.startswith("template:afc submission"):
chans.add("#wikipedia-en-afc")
elif rc.flags == "move" and (r_move1.match(comment) or
r_move2.match(comment)):
p = r_moved_pages.findall(rc.comment)[0]
tasks.start_task("afc_statistics", action="process_move", pages=p)
tasks.start_task("afc_copyvios", action="process_move", pages=p)
chans.add("#wikipedia-en-afc")
elif rc.flags == "delete" and r_delete.match(comment):
p = r_deleted_page.findall(rc.comment)[0][0]
tasks.start_task("afc_statistics", action="process_delete", page=p)
tasks.start_task("afc_copyvios", action="process_delete", page=p)
chans.add("#wikipedia-en-afc")
elif rc.flags == "restore" and r_restore.match(comment):
p = r_restored_page.findall(rc.comment)[0][0]
tasks.start_task("afc_statistics", action="process_restore", page=p)
tasks.start_task("afc_copyvios", action="process_restore", page=p)
chans.add("#wikipedia-en-afc")
elif rc.flags == "protect" and r_protect.match(comment):
chans.add("#wikipedia-en-afc")

return chans

+ 43
- 27
wiki/task_manager.py View File

@@ -1,25 +1,32 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-


# A module to manage bot tasks.
"""
EarwigBot's Wiki Bot Task Manager

This module provides some functions to run and load bot tasks from wiki/tasks/.
"""


import time import time
import traceback import traceback
import threading import threading
import os import os


from config import schedule
from core import config


task_list = dict() # the key is the task's name, the value is the task's class instance
# store loaded tasks as a dict where the key is the task name and the value is
# an instance of the task class (wiki.tasks.task_file.Task())
task_list = dict()


def load_tasks(): def load_tasks():
"""Load all valid task classes from wiki/tasks/, and add them to the task_list."""
files = os.listdir(os.path.join("wiki", "tasks")) # get all files in wiki/tasks/
files.sort() # alphabetically sort list of files
"""Load all valid task classes from wiki/tasks/, and add them to the
task_list variable."""
files = os.listdir(os.path.join("wiki", "tasks"))
files.sort() # alphabetically sort all files in wiki/tasks/
for f in files: for f in files:
if not os.path.isfile(os.path.join("wiki", "tasks", f)): # ignore non-files
continue
if f.startswith("_") or not f.endswith(".py"): # ignore non-python files or files beginning with "_"
continue
if not os.path.isfile(os.path.join("wiki", "tasks", f)):
continue # ignore non-files
if f.startswith("_") or not f.endswith(".py"):
continue # ignore non-python files or files beginning with an _
load_class_from_file(f) load_class_from_file(f)
print "Found %s tasks: %s." % (len(task_list), ', '.join(task_list.keys())) print "Found %s tasks: %s." % (len(task_list), ', '.join(task_list.keys()))


@@ -27,10 +34,10 @@ def load_class_from_file(f):
"""Look in a given file for the task class.""" """Look in a given file for the task class."""
global task_list global task_list
module = f[:-3] # strip .py from end
module = f[:-3] # strip .py from end
try: try:
exec "from wiki.tasks import %s as m" % module exec "from wiki.tasks import %s as m" % module
except: # importing the file failed for some reason...
except: # importing the file failed for some reason...
print "Couldn't load task file %s:" % f print "Couldn't load task file %s:" % f
traceback.print_exc() traceback.print_exc()
return return
@@ -46,26 +53,34 @@ def load_class_from_file(f):


def start_tasks(now=time.gmtime()): def start_tasks(now=time.gmtime()):
"""Start all tasks that are supposed to be run at a given time.""" """Start all tasks that are supposed to be run at a given time."""
tasks = schedule.check(now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, now.tm_wday) # get list of tasks to run this turn
tasks = config.schedule.check(now.tm_min, now.tm_hour, now.tm_mday,
now.tm_mon, now.tm_wday) # get list of tasks to run this turn

for task in tasks: for task in tasks:
if isinstance(task, tuple): # they've specified kwargs, so pass those to start_task
start_task(task[0], **task[1])
else: # otherwise, just pass task_name
if isinstance(task, tuple): # they've specified kwargs
start_task(task[0], **task[1]) # so pass those to start_task
else: # otherwise, just pass task_name
start_task(task) start_task(task)


def start_task(task_name, **kwargs): def start_task(task_name, **kwargs):
"""Start a given task in a new thread. Pass args to the task's run function."""
print "Starting task '{}' in a new thread...".format(task_name)
"""Start a given task in a new thread. Pass args to the task's run()
function."""
print "Starting task '{0}' in a new thread...".format(task_name)

try: try:
task = task_list[task_name] # get the class for this task, a subclass of BaseTask
task = task_list[task_name]
except KeyError: except KeyError:
print "Couldn't find task '{}': wiki/tasks/{}.py does not exist.".format(task_name, task_name)
print ("Couldn't find task '{0}': wiki/tasks/{1}.py does not " +
"exist.").format(task_name, task_name)
return return
task_thread = threading.Thread(target=lambda: task_wrapper(task, **kwargs)) # Normally we'd do task_wrapper(task, **kwargs), but because of threading we'd have to do Thread(target=task_wrapper, args=(task, **kwargs)), which doesn't work because the **kwargs is inside a tuple, not inside function params. Use lambda to get around the args=tuple nonsense
task_thread.name = "{} ({})".format(task_name, time.strftime("%b %d %H:%M:%S"))
task_thread.daemon = True # stop bot task threads automagically if the main bot stops

task_thread = threading.Thread(target=lambda: task_wrapper(task, **kwargs))
task_thread.name = "{0} ({1})".format(task_name, time.strftime(
"%b %d %H:%M:%S"))

# stop bot task threads automagically if the main bot stops
task_thread.daemon = True

task_thread.start() task_thread.start()


def task_wrapper(task, **kwargs): def task_wrapper(task, **kwargs):
@@ -73,7 +88,8 @@ def task_wrapper(task, **kwargs):
try: try:
task.run(**kwargs) task.run(**kwargs)
except: except:
print "Task '{}' raised an exception and had to stop:".format(task.task_name)
print "Task '{0}' raised an exception and had to stop:".format(
task.task_name)
traceback.print_exc() traceback.print_exc()
else: else:
print "Task '{}' finished without error.".format(task.task_name)
print "Task '{0}' finished without error.".format(task.task_name)

Loading…
Cancel
Save