@@ -8,13 +8,12 @@ including encrypting and decrypting passwords and making a new config file from | |||
scratch at the inital bot run. | |||
Usually you'll just want to do "from core import config" and access config data | |||
from within config's five global variables: | |||
from within config's four global variables: | |||
* config.components | |||
* config.wiki | |||
* config.irc | |||
* config.schedule | |||
* config.watcher | |||
""" | |||
from collections import defaultdict | |||
@@ -170,10 +169,10 @@ def parse_config(key): | |||
exit(1) | |||
def _parse_config(key): | |||
"""Parse config data from a DOM object into the five global variables that | |||
"""Parse config data from a DOM object into the four global variables that | |||
store our config info. The key is used to unencrypt passwords stored in the | |||
XML config file.""" | |||
global components, wiki, irc, schedule, watcher | |||
global components, wiki, irc, schedule | |||
_load_config() # we might be re-loading unnecessarily here, but no harm in | |||
# that! | |||
@@ -183,7 +182,6 @@ def _parse_config(key): | |||
wiki = parse_wiki(data, key) | |||
irc = parse_irc(data, key) | |||
schedule = parse_schedule(data) | |||
watcher = parse_watcher(data) | |||
def parse_components(data): | |||
"""Parse everything within the <components> XML tag of our config file. | |||
@@ -270,11 +268,44 @@ def parse_irc(data, key): | |||
irc.permissions[group_name].append(hostname) | |||
return irc | |||
def parse_schedule(data): | |||
"""Parse everything within the <schedule> tag of our XML config file.""" | |||
pass | |||
"""Store the <schedule> element in schedule.data and the _schedule() | |||
function as schedule.check().""" | |||
schedule = Container() | |||
schedule.check = _schedule | |||
schedule.data = get_first_element(data, "schedule") | |||
return schedule | |||
def parse_watcher(data): | |||
"""Parse everything within the <watcher> tag of our XML config file.""" | |||
pass | |||
def _schedule(minute, hour, month_day, month, week_day): | |||
"""Return a list of tasks that are scheduled to run at the time specified | |||
by the function args. The schedule data comes from our config file's | |||
<schedule> tag, which is stored as schedule.data. Call this function with | |||
config.schedule.check(args).""" | |||
tasks = [] # tasks to run this turn, each as a tuple of (task_name, | |||
# kwargs), or just task_name | |||
now = {"minute": minute, "hour": hour, "month_day": month_day, | |||
"month": month, "week_day": week_day} | |||
for when in schedule.data.getElementsByTagName("when"): | |||
do = True | |||
for key, value in now.items(): | |||
if when.hasAttribute(key): | |||
req = when.getAttribute(key) | |||
if attribute_to_int(req, when, key) != value: | |||
do = False | |||
break | |||
if do: | |||
for task in when.getElementsByTagName("task"): | |||
name = get_required_attribute(task, "name") | |||
args = dict() | |||
for key in task.attributes.keys(): | |||
args[key] = task.getAttribute(key) | |||
del args["name"] | |||
if args: | |||
tasks.append((name, args)) | |||
else: | |||
tasks.append(name) | |||
return tasks |
@@ -42,8 +42,8 @@ root_dir = os.path.split(script_dir)[0] # the bot's "root" directory relative | |||
sys.path.append(root_dir) # make sure we look in the root dir for modules | |||
from core import config | |||
from irc import frontend#, watcher | |||
#from wiki import task_manager | |||
from irc import frontend, watcher | |||
from wiki import task_manager | |||
f_conn = None | |||
w_conn = None | |||
@@ -126,7 +126,7 @@ def run(): | |||
task_manager.load_tasks() # watcher on another thread iff it | |||
if components["irc_watcher"]: # is enabled | |||
print "\nStarting IRC watcher..." | |||
t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,)) | |||
t_watcher = threading.Thread(target=irc_watcher, args=()) | |||
t_watcher.name = "irc-watcher" | |||
t_watcher.daemon = True | |||
t_watcher.start() | |||
@@ -1,7 +1,7 @@ | |||
# -*- coding: utf-8 -*- | |||
""" | |||
EarwigBot's Front-end IRC Component | |||
EarwigBot's IRC Front-end Component | |||
The IRC frontend runs on a normal IRC server and expects users to interact with | |||
it and give it commands. Commands are stored as "command classes", subclasses | |||
@@ -1,20 +1,34 @@ | |||
# -*- coding: utf-8 -*- | |||
## Imports | |||
from config.irc import * | |||
from config.main import * | |||
from config.watcher import * | |||
""" | |||
EarwigBot's IRC Watcher Component | |||
The IRC watcher runs on a wiki recent-changes server and listens for edits. | |||
Users cannot interact with this part of the bot. When an event occurs, run it | |||
through irc/watcher_logic.py's process() function, which can result in either | |||
wiki bot tasks being started (listed in wiki/tasks/) or messages being sent to | |||
channels in the IRC frontend. | |||
""" | |||
from core import config | |||
from irc.connection import * | |||
from irc.rc import RC | |||
from irc import watcher_logic | |||
global frontend_conn | |||
frontend_conn = None | |||
def get_connection(): | |||
connection = Connection(WATCHER_HOST, WATCHER_PORT, NICK, IDENT, REALNAME) | |||
"""Return a new Connection() instance with information about our server | |||
connection, but don't actually connect yet.""" | |||
cf = config.irc.watcher | |||
connection = Connection(cf.host, cf.port, cf.nick, cf.nick, cf.realname) | |||
return connection | |||
def main(connection, f_conn): | |||
def main(connection, f_conn=None): | |||
"""Main loop for the Watcher IRC Bot component. get_connection() should | |||
have already been called and the connection should have been started with | |||
connection.connect(). Accept the frontend connection as well as an optional | |||
parameter in order to send messages directly to frontend IRC channels.""" | |||
global frontend_conn | |||
frontend_conn = f_conn | |||
read_buffer = str() | |||
@@ -33,26 +47,32 @@ def main(connection, f_conn): | |||
if line[1] == "PRIVMSG": | |||
chan = line[2] | |||
if chan != WATCHER_CHAN: # if we're getting a msg from another channel, ignore it | |||
# ignore messages originating from channels not in our list, to | |||
# prevent someone PMing us false data | |||
if chan not in config.irc.watcher.channels: | |||
continue | |||
msg = ' '.join(line[3:])[1:] | |||
rc = RC(msg) # create a new RC object to store this change's data | |||
rc.parse() | |||
check(rc) | |||
rc = RC(msg) # new RC object to store this event's data | |||
rc.parse() # parse a message into pagenames, usernames, etc. | |||
process(rc) # report to frontend channels or start tasks | |||
if line[0] == "PING": # If we are pinged, pong back to the server | |||
if line[0] == "PING": # if we are pinged, pong back to the server | |||
connection.send("PONG %s" % line[1]) | |||
if line[1] == "376": # Join the recent changes channel when we've finished starting up | |||
connection.join(WATCHER_CHAN) | |||
def check(rc): | |||
"""check if we're supposed to report this message anywhere""" | |||
results = process(rc) # process the message in config/watcher.py, and get a list of channels to send it to | |||
if not results: | |||
return | |||
pretty = rc.get_pretty() | |||
if enable_irc_frontend: | |||
for chan in results: | |||
# when we've finished starting up, join all watcher channels | |||
if line[1] == "376": | |||
for chan in config.irc.watcher.channels: | |||
connection.join(chan) | |||
def process(rc): | |||
"""Process a message from IRC (technically, an RC object). The actual | |||
processing is configurable, so we don't have that hard-coded here. We | |||
simply call irc/watcher_logic.py's process() function and expect a list of | |||
channels back, which we report the event data to.""" | |||
chans = watcher_logic.process(rc) | |||
if chans and frontend_conn: | |||
pretty = rc.get_pretty() | |||
for chan in chans: | |||
frontend_conn.say(chan, pretty) |
@@ -0,0 +1,74 @@ | |||
# -*- coding: utf-8 -*- | |||
""" | |||
EarwigBot's IRC Watcher Logic | |||
This file contains (configurable!) rules that EarwigBot's watcher uses after it | |||
recieves an event from IRC. | |||
This should, ideally, be in config.xml somehow, but Python code makes more | |||
sense for this sort of thing... so... | |||
""" | |||
import re | |||
from wiki import task_manager as tasks | |||
afc_prefix = "wikipedia( talk)?:(wikiproject )?articles for creation" | |||
# compile some regexps used when finding specific events | |||
r_page = re.compile(afc_prefix) | |||
r_ffu = re.compile("wikipedia( talk)?:files for upload") | |||
r_move1 = re.compile("moved \[\[{}".format(afc_prefix)) | |||
r_move2 = re.compile("moved \[\[(.*?)\]\] to \[\[{}".format(afc_prefix)) | |||
r_moved_pages = re.compile("^moved \[\[(.*?)\]\] to \[\[(.*?)\]\]") | |||
r_delete = re.compile("deleted \"\[\[{}".format(afc_prefix)) | |||
r_deleted_page = re.compile("^deleted \"\[\[(.*?)\]\]") | |||
r_restore = re.compile("restored \"\[\[{}".format(afc_prefix)) | |||
r_restored_page = re.compile("^restored \"\[\[(.*?)\]\]") | |||
r_protect = re.compile("protected \"\[\[{}".format(afc_prefix)) | |||
def process(rc): | |||
"""Given an RC() object, return a list of channels to report this event to. | |||
Also, start any wiki bot tasks within this function if necessary.""" | |||
chans = set() # channels to report this message to | |||
page_name = rc.page.lower() | |||
comment = rc.comment.lower() | |||
if "!earwigbot" in rc.msg.lower(): | |||
chans.update(("##earwigbot", "#wikipedia-en-afc")) | |||
if r_page.search(page_name): | |||
tasks.start_task("afc_statistics", action="process_edit", page=rc.page) | |||
tasks.start_task("afc_copyvios", action="process_edit", page=rc.page) | |||
chans.add("#wikipedia-en-afc") | |||
elif r_ffu.match(page_name): | |||
chans.add("#wikipedia-en-afc") | |||
elif page_name.startswith("template:afc submission"): | |||
chans.add("#wikipedia-en-afc") | |||
elif rc.flags == "move" and (r_move1.match(comment) or | |||
r_move2.match(comment)): | |||
p = r_moved_pages.findall(rc.comment)[0] | |||
tasks.start_task("afc_statistics", action="process_move", pages=p) | |||
tasks.start_task("afc_copyvios", action="process_move", pages=p) | |||
chans.add("#wikipedia-en-afc") | |||
elif rc.flags == "delete" and r_delete.match(comment): | |||
p = r_deleted_page.findall(rc.comment)[0][0] | |||
tasks.start_task("afc_statistics", action="process_delete", page=p) | |||
tasks.start_task("afc_copyvios", action="process_delete", page=p) | |||
chans.add("#wikipedia-en-afc") | |||
elif rc.flags == "restore" and r_restore.match(comment): | |||
p = r_restored_page.findall(rc.comment)[0][0] | |||
tasks.start_task("afc_statistics", action="process_restore", page=p) | |||
tasks.start_task("afc_copyvios", action="process_restore", page=p) | |||
chans.add("#wikipedia-en-afc") | |||
elif rc.flags == "protect" and r_protect.match(comment): | |||
chans.add("#wikipedia-en-afc") | |||
return chans |
@@ -1,25 +1,32 @@ | |||
# -*- coding: utf-8 -*- | |||
# A module to manage bot tasks. | |||
""" | |||
EarwigBot's Wiki Bot Task Manager | |||
This module provides some functions to run and load bot tasks from wiki/tasks/. | |||
""" | |||
import time | |||
import traceback | |||
import threading | |||
import os | |||
from config import schedule | |||
from core import config | |||
task_list = dict() # the key is the task's name, the value is the task's class instance | |||
# store loaded tasks as a dict where the key is the task name and the value is | |||
# an instance of the task class (wiki.tasks.task_file.Task()) | |||
task_list = dict() | |||
def load_tasks(): | |||
"""Load all valid task classes from wiki/tasks/, and add them to the task_list.""" | |||
files = os.listdir(os.path.join("wiki", "tasks")) # get all files in wiki/tasks/ | |||
files.sort() # alphabetically sort list of files | |||
"""Load all valid task classes from wiki/tasks/, and add them to the | |||
task_list variable.""" | |||
files = os.listdir(os.path.join("wiki", "tasks")) | |||
files.sort() # alphabetically sort all files in wiki/tasks/ | |||
for f in files: | |||
if not os.path.isfile(os.path.join("wiki", "tasks", f)): # ignore non-files | |||
continue | |||
if f.startswith("_") or not f.endswith(".py"): # ignore non-python files or files beginning with "_" | |||
continue | |||
if not os.path.isfile(os.path.join("wiki", "tasks", f)): | |||
continue # ignore non-files | |||
if f.startswith("_") or not f.endswith(".py"): | |||
continue # ignore non-python files or files beginning with an _ | |||
load_class_from_file(f) | |||
print "Found %s tasks: %s." % (len(task_list), ', '.join(task_list.keys())) | |||
@@ -27,10 +34,10 @@ def load_class_from_file(f): | |||
"""Look in a given file for the task class.""" | |||
global task_list | |||
module = f[:-3] # strip .py from end | |||
module = f[:-3] # strip .py from end | |||
try: | |||
exec "from wiki.tasks import %s as m" % module | |||
except: # importing the file failed for some reason... | |||
except: # importing the file failed for some reason... | |||
print "Couldn't load task file %s:" % f | |||
traceback.print_exc() | |||
return | |||
@@ -46,26 +53,34 @@ def load_class_from_file(f): | |||
def start_tasks(now=time.gmtime()): | |||
"""Start all tasks that are supposed to be run at a given time.""" | |||
tasks = schedule.check(now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, now.tm_wday) # get list of tasks to run this turn | |||
tasks = config.schedule.check(now.tm_min, now.tm_hour, now.tm_mday, | |||
now.tm_mon, now.tm_wday) # get list of tasks to run this turn | |||
for task in tasks: | |||
if isinstance(task, tuple): # they've specified kwargs, so pass those to start_task | |||
start_task(task[0], **task[1]) | |||
else: # otherwise, just pass task_name | |||
if isinstance(task, tuple): # they've specified kwargs | |||
start_task(task[0], **task[1]) # so pass those to start_task | |||
else: # otherwise, just pass task_name | |||
start_task(task) | |||
def start_task(task_name, **kwargs): | |||
"""Start a given task in a new thread. Pass args to the task's run function.""" | |||
print "Starting task '{}' in a new thread...".format(task_name) | |||
"""Start a given task in a new thread. Pass args to the task's run() | |||
function.""" | |||
print "Starting task '{0}' in a new thread...".format(task_name) | |||
try: | |||
task = task_list[task_name] # get the class for this task, a subclass of BaseTask | |||
task = task_list[task_name] | |||
except KeyError: | |||
print "Couldn't find task '{}': wiki/tasks/{}.py does not exist.".format(task_name, task_name) | |||
print ("Couldn't find task '{0}': wiki/tasks/{1}.py does not " + | |||
"exist.").format(task_name, task_name) | |||
return | |||
task_thread = threading.Thread(target=lambda: task_wrapper(task, **kwargs)) # Normally we'd do task_wrapper(task, **kwargs), but because of threading we'd have to do Thread(target=task_wrapper, args=(task, **kwargs)), which doesn't work because the **kwargs is inside a tuple, not inside function params. Use lambda to get around the args=tuple nonsense | |||
task_thread.name = "{} ({})".format(task_name, time.strftime("%b %d %H:%M:%S")) | |||
task_thread.daemon = True # stop bot task threads automagically if the main bot stops | |||
task_thread = threading.Thread(target=lambda: task_wrapper(task, **kwargs)) | |||
task_thread.name = "{0} ({1})".format(task_name, time.strftime( | |||
"%b %d %H:%M:%S")) | |||
# stop bot task threads automagically if the main bot stops | |||
task_thread.daemon = True | |||
task_thread.start() | |||
def task_wrapper(task, **kwargs): | |||
@@ -73,7 +88,8 @@ def task_wrapper(task, **kwargs): | |||
try: | |||
task.run(**kwargs) | |||
except: | |||
print "Task '{}' raised an exception and had to stop:".format(task.task_name) | |||
print "Task '{0}' raised an exception and had to stop:".format( | |||
task.task_name) | |||
traceback.print_exc() | |||
else: | |||
print "Task '{}' finished without error.".format(task.task_name) | |||
print "Task '{0}' finished without error.".format(task.task_name) |