@@ -8,13 +8,12 @@ including encrypting and decrypting passwords and making a new config file from | |||||
scratch at the inital bot run. | scratch at the inital bot run. | ||||
Usually you'll just want to do "from core import config" and access config data | Usually you'll just want to do "from core import config" and access config data | ||||
from within config's five global variables: | |||||
from within config's four global variables: | |||||
* config.components | * config.components | ||||
* config.wiki | * config.wiki | ||||
* config.irc | * config.irc | ||||
* config.schedule | * config.schedule | ||||
* config.watcher | |||||
""" | """ | ||||
from collections import defaultdict | from collections import defaultdict | ||||
@@ -170,10 +169,10 @@ def parse_config(key): | |||||
exit(1) | exit(1) | ||||
def _parse_config(key): | def _parse_config(key): | ||||
"""Parse config data from a DOM object into the five global variables that | |||||
"""Parse config data from a DOM object into the four global variables that | |||||
store our config info. The key is used to unencrypt passwords stored in the | store our config info. The key is used to unencrypt passwords stored in the | ||||
XML config file.""" | XML config file.""" | ||||
global components, wiki, irc, schedule, watcher | |||||
global components, wiki, irc, schedule | |||||
_load_config() # we might be re-loading unnecessarily here, but no harm in | _load_config() # we might be re-loading unnecessarily here, but no harm in | ||||
# that! | # that! | ||||
@@ -183,7 +182,6 @@ def _parse_config(key): | |||||
wiki = parse_wiki(data, key) | wiki = parse_wiki(data, key) | ||||
irc = parse_irc(data, key) | irc = parse_irc(data, key) | ||||
schedule = parse_schedule(data) | schedule = parse_schedule(data) | ||||
watcher = parse_watcher(data) | |||||
def parse_components(data): | def parse_components(data): | ||||
"""Parse everything within the <components> XML tag of our config file. | """Parse everything within the <components> XML tag of our config file. | ||||
@@ -270,11 +268,44 @@ def parse_irc(data, key): | |||||
irc.permissions[group_name].append(hostname) | irc.permissions[group_name].append(hostname) | ||||
return irc | return irc | ||||
def parse_schedule(data): | def parse_schedule(data): | ||||
"""Parse everything within the <schedule> tag of our XML config file.""" | |||||
pass | |||||
"""Store the <schedule> element in schedule.data and the _schedule() | |||||
function as schedule.check().""" | |||||
schedule = Container() | |||||
schedule.check = _schedule | |||||
schedule.data = get_first_element(data, "schedule") | |||||
return schedule | |||||
def parse_watcher(data): | |||||
"""Parse everything within the <watcher> tag of our XML config file.""" | |||||
pass | |||||
def _schedule(minute, hour, month_day, month, week_day): | |||||
"""Return a list of tasks that are scheduled to run at the time specified | |||||
by the function args. The schedule data comes from our config file's | |||||
<schedule> tag, which is stored as schedule.data. Call this function with | |||||
config.schedule.check(args).""" | |||||
tasks = [] # tasks to run this turn, each as a tuple of (task_name, | |||||
# kwargs), or just task_name | |||||
now = {"minute": minute, "hour": hour, "month_day": month_day, | |||||
"month": month, "week_day": week_day} | |||||
for when in schedule.data.getElementsByTagName("when"): | |||||
do = True | |||||
for key, value in now.items(): | |||||
if when.hasAttribute(key): | |||||
req = when.getAttribute(key) | |||||
if attribute_to_int(req, when, key) != value: | |||||
do = False | |||||
break | |||||
if do: | |||||
for task in when.getElementsByTagName("task"): | |||||
name = get_required_attribute(task, "name") | |||||
args = dict() | |||||
for key in task.attributes.keys(): | |||||
args[key] = task.getAttribute(key) | |||||
del args["name"] | |||||
if args: | |||||
tasks.append((name, args)) | |||||
else: | |||||
tasks.append(name) | |||||
return tasks |
@@ -42,8 +42,8 @@ root_dir = os.path.split(script_dir)[0] # the bot's "root" directory relative | |||||
sys.path.append(root_dir) # make sure we look in the root dir for modules | sys.path.append(root_dir) # make sure we look in the root dir for modules | ||||
from core import config | from core import config | ||||
from irc import frontend#, watcher | |||||
#from wiki import task_manager | |||||
from irc import frontend, watcher | |||||
from wiki import task_manager | |||||
f_conn = None | f_conn = None | ||||
w_conn = None | w_conn = None | ||||
@@ -126,7 +126,7 @@ def run(): | |||||
task_manager.load_tasks() # watcher on another thread iff it | task_manager.load_tasks() # watcher on another thread iff it | ||||
if components["irc_watcher"]: # is enabled | if components["irc_watcher"]: # is enabled | ||||
print "\nStarting IRC watcher..." | print "\nStarting IRC watcher..." | ||||
t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,)) | |||||
t_watcher = threading.Thread(target=irc_watcher, args=()) | |||||
t_watcher.name = "irc-watcher" | t_watcher.name = "irc-watcher" | ||||
t_watcher.daemon = True | t_watcher.daemon = True | ||||
t_watcher.start() | t_watcher.start() | ||||
@@ -1,7 +1,7 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
""" | """ | ||||
EarwigBot's Front-end IRC Component | |||||
EarwigBot's IRC Front-end Component | |||||
The IRC frontend runs on a normal IRC server and expects users to interact with | The IRC frontend runs on a normal IRC server and expects users to interact with | ||||
it and give it commands. Commands are stored as "command classes", subclasses | it and give it commands. Commands are stored as "command classes", subclasses | ||||
@@ -1,20 +1,34 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
## Imports | |||||
from config.irc import * | |||||
from config.main import * | |||||
from config.watcher import * | |||||
""" | |||||
EarwigBot's IRC Watcher Component | |||||
The IRC watcher runs on a wiki recent-changes server and listens for edits. | |||||
Users cannot interact with this part of the bot. When an event occurs, run it | |||||
through irc/watcher_logic.py's process() function, which can result in either | |||||
wiki bot tasks being started (listed in wiki/tasks/) or messages being sent to | |||||
channels in the IRC frontend. | |||||
""" | |||||
from core import config | |||||
from irc.connection import * | from irc.connection import * | ||||
from irc.rc import RC | from irc.rc import RC | ||||
from irc import watcher_logic | |||||
global frontend_conn | |||||
frontend_conn = None | |||||
def get_connection(): | def get_connection(): | ||||
connection = Connection(WATCHER_HOST, WATCHER_PORT, NICK, IDENT, REALNAME) | |||||
"""Return a new Connection() instance with information about our server | |||||
connection, but don't actually connect yet.""" | |||||
cf = config.irc.watcher | |||||
connection = Connection(cf.host, cf.port, cf.nick, cf.nick, cf.realname) | |||||
return connection | return connection | ||||
def main(connection, f_conn): | |||||
def main(connection, f_conn=None): | |||||
"""Main loop for the Watcher IRC Bot component. get_connection() should | |||||
have already been called and the connection should have been started with | |||||
connection.connect(). Accept the frontend connection as well as an optional | |||||
parameter in order to send messages directly to frontend IRC channels.""" | |||||
global frontend_conn | global frontend_conn | ||||
frontend_conn = f_conn | frontend_conn = f_conn | ||||
read_buffer = str() | read_buffer = str() | ||||
@@ -33,26 +47,32 @@ def main(connection, f_conn): | |||||
if line[1] == "PRIVMSG": | if line[1] == "PRIVMSG": | ||||
chan = line[2] | chan = line[2] | ||||
if chan != WATCHER_CHAN: # if we're getting a msg from another channel, ignore it | |||||
# ignore messages originating from channels not in our list, to | |||||
# prevent someone PMing us false data | |||||
if chan not in config.irc.watcher.channels: | |||||
continue | continue | ||||
msg = ' '.join(line[3:])[1:] | msg = ' '.join(line[3:])[1:] | ||||
rc = RC(msg) # create a new RC object to store this change's data | |||||
rc.parse() | |||||
check(rc) | |||||
rc = RC(msg) # new RC object to store this event's data | |||||
rc.parse() # parse a message into pagenames, usernames, etc. | |||||
process(rc) # report to frontend channels or start tasks | |||||
if line[0] == "PING": # If we are pinged, pong back to the server | |||||
if line[0] == "PING": # if we are pinged, pong back to the server | |||||
connection.send("PONG %s" % line[1]) | connection.send("PONG %s" % line[1]) | ||||
if line[1] == "376": # Join the recent changes channel when we've finished starting up | |||||
connection.join(WATCHER_CHAN) | |||||
def check(rc): | |||||
"""check if we're supposed to report this message anywhere""" | |||||
results = process(rc) # process the message in config/watcher.py, and get a list of channels to send it to | |||||
if not results: | |||||
return | |||||
pretty = rc.get_pretty() | |||||
if enable_irc_frontend: | |||||
for chan in results: | |||||
# when we've finished starting up, join all watcher channels | |||||
if line[1] == "376": | |||||
for chan in config.irc.watcher.channels: | |||||
connection.join(chan) | |||||
def process(rc): | |||||
"""Process a message from IRC (technically, an RC object). The actual | |||||
processing is configurable, so we don't have that hard-coded here. We | |||||
simply call irc/watcher_logic.py's process() function and expect a list of | |||||
channels back, which we report the event data to.""" | |||||
chans = watcher_logic.process(rc) | |||||
if chans and frontend_conn: | |||||
pretty = rc.get_pretty() | |||||
for chan in chans: | |||||
frontend_conn.say(chan, pretty) | frontend_conn.say(chan, pretty) |
@@ -0,0 +1,74 @@ | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
EarwigBot's IRC Watcher Logic | |||||
This file contains (configurable!) rules that EarwigBot's watcher uses after it | |||||
recieves an event from IRC. | |||||
This should, ideally, be in config.xml somehow, but Python code makes more | |||||
sense for this sort of thing... so... | |||||
""" | |||||
import re | |||||
from wiki import task_manager as tasks | |||||
afc_prefix = "wikipedia( talk)?:(wikiproject )?articles for creation" | |||||
# compile some regexps used when finding specific events | |||||
r_page = re.compile(afc_prefix) | |||||
r_ffu = re.compile("wikipedia( talk)?:files for upload") | |||||
r_move1 = re.compile("moved \[\[{}".format(afc_prefix)) | |||||
r_move2 = re.compile("moved \[\[(.*?)\]\] to \[\[{}".format(afc_prefix)) | |||||
r_moved_pages = re.compile("^moved \[\[(.*?)\]\] to \[\[(.*?)\]\]") | |||||
r_delete = re.compile("deleted \"\[\[{}".format(afc_prefix)) | |||||
r_deleted_page = re.compile("^deleted \"\[\[(.*?)\]\]") | |||||
r_restore = re.compile("restored \"\[\[{}".format(afc_prefix)) | |||||
r_restored_page = re.compile("^restored \"\[\[(.*?)\]\]") | |||||
r_protect = re.compile("protected \"\[\[{}".format(afc_prefix)) | |||||
def process(rc): | |||||
"""Given an RC() object, return a list of channels to report this event to. | |||||
Also, start any wiki bot tasks within this function if necessary.""" | |||||
chans = set() # channels to report this message to | |||||
page_name = rc.page.lower() | |||||
comment = rc.comment.lower() | |||||
if "!earwigbot" in rc.msg.lower(): | |||||
chans.update(("##earwigbot", "#wikipedia-en-afc")) | |||||
if r_page.search(page_name): | |||||
tasks.start_task("afc_statistics", action="process_edit", page=rc.page) | |||||
tasks.start_task("afc_copyvios", action="process_edit", page=rc.page) | |||||
chans.add("#wikipedia-en-afc") | |||||
elif r_ffu.match(page_name): | |||||
chans.add("#wikipedia-en-afc") | |||||
elif page_name.startswith("template:afc submission"): | |||||
chans.add("#wikipedia-en-afc") | |||||
elif rc.flags == "move" and (r_move1.match(comment) or | |||||
r_move2.match(comment)): | |||||
p = r_moved_pages.findall(rc.comment)[0] | |||||
tasks.start_task("afc_statistics", action="process_move", pages=p) | |||||
tasks.start_task("afc_copyvios", action="process_move", pages=p) | |||||
chans.add("#wikipedia-en-afc") | |||||
elif rc.flags == "delete" and r_delete.match(comment): | |||||
p = r_deleted_page.findall(rc.comment)[0][0] | |||||
tasks.start_task("afc_statistics", action="process_delete", page=p) | |||||
tasks.start_task("afc_copyvios", action="process_delete", page=p) | |||||
chans.add("#wikipedia-en-afc") | |||||
elif rc.flags == "restore" and r_restore.match(comment): | |||||
p = r_restored_page.findall(rc.comment)[0][0] | |||||
tasks.start_task("afc_statistics", action="process_restore", page=p) | |||||
tasks.start_task("afc_copyvios", action="process_restore", page=p) | |||||
chans.add("#wikipedia-en-afc") | |||||
elif rc.flags == "protect" and r_protect.match(comment): | |||||
chans.add("#wikipedia-en-afc") | |||||
return chans |
@@ -1,25 +1,32 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# A module to manage bot tasks. | |||||
""" | |||||
EarwigBot's Wiki Bot Task Manager | |||||
This module provides some functions to run and load bot tasks from wiki/tasks/. | |||||
""" | |||||
import time | import time | ||||
import traceback | import traceback | ||||
import threading | import threading | ||||
import os | import os | ||||
from config import schedule | |||||
from core import config | |||||
task_list = dict() # the key is the task's name, the value is the task's class instance | |||||
# store loaded tasks as a dict where the key is the task name and the value is | |||||
# an instance of the task class (wiki.tasks.task_file.Task()) | |||||
task_list = dict() | |||||
def load_tasks(): | def load_tasks(): | ||||
"""Load all valid task classes from wiki/tasks/, and add them to the task_list.""" | |||||
files = os.listdir(os.path.join("wiki", "tasks")) # get all files in wiki/tasks/ | |||||
files.sort() # alphabetically sort list of files | |||||
"""Load all valid task classes from wiki/tasks/, and add them to the | |||||
task_list variable.""" | |||||
files = os.listdir(os.path.join("wiki", "tasks")) | |||||
files.sort() # alphabetically sort all files in wiki/tasks/ | |||||
for f in files: | for f in files: | ||||
if not os.path.isfile(os.path.join("wiki", "tasks", f)): # ignore non-files | |||||
continue | |||||
if f.startswith("_") or not f.endswith(".py"): # ignore non-python files or files beginning with "_" | |||||
continue | |||||
if not os.path.isfile(os.path.join("wiki", "tasks", f)): | |||||
continue # ignore non-files | |||||
if f.startswith("_") or not f.endswith(".py"): | |||||
continue # ignore non-python files or files beginning with an _ | |||||
load_class_from_file(f) | load_class_from_file(f) | ||||
print "Found %s tasks: %s." % (len(task_list), ', '.join(task_list.keys())) | print "Found %s tasks: %s." % (len(task_list), ', '.join(task_list.keys())) | ||||
@@ -27,10 +34,10 @@ def load_class_from_file(f): | |||||
"""Look in a given file for the task class.""" | """Look in a given file for the task class.""" | ||||
global task_list | global task_list | ||||
module = f[:-3] # strip .py from end | |||||
module = f[:-3] # strip .py from end | |||||
try: | try: | ||||
exec "from wiki.tasks import %s as m" % module | exec "from wiki.tasks import %s as m" % module | ||||
except: # importing the file failed for some reason... | |||||
except: # importing the file failed for some reason... | |||||
print "Couldn't load task file %s:" % f | print "Couldn't load task file %s:" % f | ||||
traceback.print_exc() | traceback.print_exc() | ||||
return | return | ||||
@@ -46,26 +53,34 @@ def load_class_from_file(f): | |||||
def start_tasks(now=time.gmtime()): | def start_tasks(now=time.gmtime()): | ||||
"""Start all tasks that are supposed to be run at a given time.""" | """Start all tasks that are supposed to be run at a given time.""" | ||||
tasks = schedule.check(now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, now.tm_wday) # get list of tasks to run this turn | |||||
tasks = config.schedule.check(now.tm_min, now.tm_hour, now.tm_mday, | |||||
now.tm_mon, now.tm_wday) # get list of tasks to run this turn | |||||
for task in tasks: | for task in tasks: | ||||
if isinstance(task, tuple): # they've specified kwargs, so pass those to start_task | |||||
start_task(task[0], **task[1]) | |||||
else: # otherwise, just pass task_name | |||||
if isinstance(task, tuple): # they've specified kwargs | |||||
start_task(task[0], **task[1]) # so pass those to start_task | |||||
else: # otherwise, just pass task_name | |||||
start_task(task) | start_task(task) | ||||
def start_task(task_name, **kwargs): | def start_task(task_name, **kwargs): | ||||
"""Start a given task in a new thread. Pass args to the task's run function.""" | |||||
print "Starting task '{}' in a new thread...".format(task_name) | |||||
"""Start a given task in a new thread. Pass args to the task's run() | |||||
function.""" | |||||
print "Starting task '{0}' in a new thread...".format(task_name) | |||||
try: | try: | ||||
task = task_list[task_name] # get the class for this task, a subclass of BaseTask | |||||
task = task_list[task_name] | |||||
except KeyError: | except KeyError: | ||||
print "Couldn't find task '{}': wiki/tasks/{}.py does not exist.".format(task_name, task_name) | |||||
print ("Couldn't find task '{0}': wiki/tasks/{1}.py does not " + | |||||
"exist.").format(task_name, task_name) | |||||
return | return | ||||
task_thread = threading.Thread(target=lambda: task_wrapper(task, **kwargs)) # Normally we'd do task_wrapper(task, **kwargs), but because of threading we'd have to do Thread(target=task_wrapper, args=(task, **kwargs)), which doesn't work because the **kwargs is inside a tuple, not inside function params. Use lambda to get around the args=tuple nonsense | |||||
task_thread.name = "{} ({})".format(task_name, time.strftime("%b %d %H:%M:%S")) | |||||
task_thread.daemon = True # stop bot task threads automagically if the main bot stops | |||||
task_thread = threading.Thread(target=lambda: task_wrapper(task, **kwargs)) | |||||
task_thread.name = "{0} ({1})".format(task_name, time.strftime( | |||||
"%b %d %H:%M:%S")) | |||||
# stop bot task threads automagically if the main bot stops | |||||
task_thread.daemon = True | |||||
task_thread.start() | task_thread.start() | ||||
def task_wrapper(task, **kwargs): | def task_wrapper(task, **kwargs): | ||||
@@ -73,7 +88,8 @@ def task_wrapper(task, **kwargs): | |||||
try: | try: | ||||
task.run(**kwargs) | task.run(**kwargs) | ||||
except: | except: | ||||
print "Task '{}' raised an exception and had to stop:".format(task.task_name) | |||||
print "Task '{0}' raised an exception and had to stop:".format( | |||||
task.task_name) | |||||
traceback.print_exc() | traceback.print_exc() | ||||
else: | else: | ||||
print "Task '{}' finished without error.".format(task.task_name) | |||||
print "Task '{0}' finished without error.".format(task.task_name) |