Sfoglia il codice sorgente

parse remaining parts of config.xml; getting rid of config.watcher and moving to irc/watcher_logic.py; convert irc/watcher.py and wiki/task_manager.py to new config system; other changes/fixes/whatever

tags/v0.1^2
Ben Kurtovic 13 anni fa
parent
commit
d57f2623d2
6 ha cambiato i file con 206 aggiunte e 65 eliminazioni
  1. +42
    -11
      core/config.py
  2. +3
    -3
      core/main.py
  3. +1
    -1
      irc/frontend.py
  4. +43
    -23
      irc/watcher.py
  5. +74
    -0
      irc/watcher_logic.py
  6. +43
    -27
      wiki/task_manager.py

+ 42
- 11
core/config.py Vedi File

@@ -8,13 +8,12 @@ including encrypting and decrypting passwords and making a new config file from
scratch at the inital bot run.

Usually you'll just want to do "from core import config" and access config data
from within config's five global variables:
from within config's four global variables:

* config.components
* config.wiki
* config.irc
* config.schedule
* config.watcher
"""

from collections import defaultdict
@@ -170,10 +169,10 @@ def parse_config(key):
exit(1)

def _parse_config(key):
"""Parse config data from a DOM object into the five global variables that
"""Parse config data from a DOM object into the four global variables that
store our config info. The key is used to unencrypt passwords stored in the
XML config file."""
global components, wiki, irc, schedule, watcher
global components, wiki, irc, schedule

_load_config() # we might be re-loading unnecessarily here, but no harm in
# that!
@@ -183,7 +182,6 @@ def _parse_config(key):
wiki = parse_wiki(data, key)
irc = parse_irc(data, key)
schedule = parse_schedule(data)
watcher = parse_watcher(data)

def parse_components(data):
"""Parse everything within the <components> XML tag of our config file.
@@ -270,11 +268,44 @@ def parse_irc(data, key):
irc.permissions[group_name].append(hostname)

return irc
def parse_schedule(data):
"""Parse everything within the <schedule> tag of our XML config file."""
pass
"""Store the <schedule> element in schedule.data and the _schedule()
function as schedule.check()."""
schedule = Container()
schedule.check = _schedule
schedule.data = get_first_element(data, "schedule")
return schedule
def parse_watcher(data):
"""Parse everything within the <watcher> tag of our XML config file."""
pass
def _schedule(minute, hour, month_day, month, week_day):
"""Return a list of tasks that are scheduled to run at the time specified
by the function args. The schedule data comes from our config file's
<schedule> tag, which is stored as schedule.data. Call this function with
config.schedule.check(args)."""
tasks = [] # tasks to run this turn, each as a tuple of (task_name,
# kwargs), or just task_name

now = {"minute": minute, "hour": hour, "month_day": month_day,
"month": month, "week_day": week_day}

for when in schedule.data.getElementsByTagName("when"):
do = True
for key, value in now.items():
if when.hasAttribute(key):
req = when.getAttribute(key)
if attribute_to_int(req, when, key) != value:
do = False
break
if do:
for task in when.getElementsByTagName("task"):
name = get_required_attribute(task, "name")
args = dict()
for key in task.attributes.keys():
args[key] = task.getAttribute(key)
del args["name"]
if args:
tasks.append((name, args))
else:
tasks.append(name)

return tasks

+ 3
- 3
core/main.py Vedi File

@@ -42,8 +42,8 @@ root_dir = os.path.split(script_dir)[0] # the bot's "root" directory relative
sys.path.append(root_dir) # make sure we look in the root dir for modules

from core import config
from irc import frontend#, watcher
#from wiki import task_manager
from irc import frontend, watcher
from wiki import task_manager

f_conn = None
w_conn = None
@@ -126,7 +126,7 @@ def run():
task_manager.load_tasks() # watcher on another thread iff it
if components["irc_watcher"]: # is enabled
print "\nStarting IRC watcher..."
t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,))
t_watcher = threading.Thread(target=irc_watcher, args=())
t_watcher.name = "irc-watcher"
t_watcher.daemon = True
t_watcher.start()


+ 1
- 1
irc/frontend.py Vedi File

@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-

"""
EarwigBot's Front-end IRC Component
EarwigBot's IRC Front-end Component

The IRC frontend runs on a normal IRC server and expects users to interact with
it and give it commands. Commands are stored as "command classes", subclasses


+ 43
- 23
irc/watcher.py Vedi File

@@ -1,20 +1,34 @@
# -*- coding: utf-8 -*-

## Imports
from config.irc import *
from config.main import *
from config.watcher import *
"""
EarwigBot's IRC Watcher Component

The IRC watcher runs on a wiki recent-changes server and listens for edits.
Users cannot interact with this part of the bot. When an event occurs, run it
through irc/watcher_logic.py's process() function, which can result in either
wiki bot tasks being started (listed in wiki/tasks/) or messages being sent to
channels in the IRC frontend.
"""

from core import config
from irc.connection import *
from irc.rc import RC
from irc import watcher_logic

global frontend_conn
frontend_conn = None

def get_connection():
connection = Connection(WATCHER_HOST, WATCHER_PORT, NICK, IDENT, REALNAME)
"""Return a new Connection() instance with information about our server
connection, but don't actually connect yet."""
cf = config.irc.watcher
connection = Connection(cf.host, cf.port, cf.nick, cf.nick, cf.realname)
return connection

def main(connection, f_conn):
def main(connection, f_conn=None):
"""Main loop for the Watcher IRC Bot component. get_connection() should
have already been called and the connection should have been started with
connection.connect(). Accept the frontend connection as well as an optional
parameter in order to send messages directly to frontend IRC channels."""
global frontend_conn
frontend_conn = f_conn
read_buffer = str()
@@ -33,26 +47,32 @@ def main(connection, f_conn):

if line[1] == "PRIVMSG":
chan = line[2]
if chan != WATCHER_CHAN: # if we're getting a msg from another channel, ignore it

# ignore messages originating from channels not in our list, to
# prevent someone PMing us false data
if chan not in config.irc.watcher.channels:
continue

msg = ' '.join(line[3:])[1:]
rc = RC(msg) # create a new RC object to store this change's data
rc.parse()
check(rc)
rc = RC(msg) # new RC object to store this event's data
rc.parse() # parse a message into pagenames, usernames, etc.
process(rc) # report to frontend channels or start tasks

if line[0] == "PING": # If we are pinged, pong back to the server
if line[0] == "PING": # if we are pinged, pong back to the server
connection.send("PONG %s" % line[1])

if line[1] == "376": # Join the recent changes channel when we've finished starting up
connection.join(WATCHER_CHAN)

def check(rc):
"""check if we're supposed to report this message anywhere"""
results = process(rc) # process the message in config/watcher.py, and get a list of channels to send it to
if not results:
return
pretty = rc.get_pretty()
if enable_irc_frontend:
for chan in results:
# when we've finished starting up, join all watcher channels
if line[1] == "376":
for chan in config.irc.watcher.channels:
connection.join(chan)

def process(rc):
"""Process a message from IRC (technically, an RC object). The actual
processing is configurable, so we don't have that hard-coded here. We
simply call irc/watcher_logic.py's process() function and expect a list of
channels back, which we report the event data to."""
chans = watcher_logic.process(rc)
if chans and frontend_conn:
pretty = rc.get_pretty()
for chan in chans:
frontend_conn.say(chan, pretty)

+ 74
- 0
irc/watcher_logic.py Vedi File

@@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-

"""
EarwigBot's IRC Watcher Logic

This file contains (configurable!) rules that EarwigBot's watcher uses after it
recieves an event from IRC.

This should, ideally, be in config.xml somehow, but Python code makes more
sense for this sort of thing... so...
"""

import re

from wiki import task_manager as tasks

afc_prefix = "wikipedia( talk)?:(wikiproject )?articles for creation"

# compile some regexps used when finding specific events
r_page = re.compile(afc_prefix)
r_ffu = re.compile("wikipedia( talk)?:files for upload")
r_move1 = re.compile("moved \[\[{}".format(afc_prefix))
r_move2 = re.compile("moved \[\[(.*?)\]\] to \[\[{}".format(afc_prefix))
r_moved_pages = re.compile("^moved \[\[(.*?)\]\] to \[\[(.*?)\]\]")
r_delete = re.compile("deleted \"\[\[{}".format(afc_prefix))
r_deleted_page = re.compile("^deleted \"\[\[(.*?)\]\]")
r_restore = re.compile("restored \"\[\[{}".format(afc_prefix))
r_restored_page = re.compile("^restored \"\[\[(.*?)\]\]")
r_protect = re.compile("protected \"\[\[{}".format(afc_prefix))

def process(rc):
"""Given an RC() object, return a list of channels to report this event to.
Also, start any wiki bot tasks within this function if necessary."""
chans = set() # channels to report this message to
page_name = rc.page.lower()
comment = rc.comment.lower()
if "!earwigbot" in rc.msg.lower():
chans.update(("##earwigbot", "#wikipedia-en-afc"))
if r_page.search(page_name):
tasks.start_task("afc_statistics", action="process_edit", page=rc.page)
tasks.start_task("afc_copyvios", action="process_edit", page=rc.page)
chans.add("#wikipedia-en-afc")
elif r_ffu.match(page_name):
chans.add("#wikipedia-en-afc")
elif page_name.startswith("template:afc submission"):
chans.add("#wikipedia-en-afc")
elif rc.flags == "move" and (r_move1.match(comment) or
r_move2.match(comment)):
p = r_moved_pages.findall(rc.comment)[0]
tasks.start_task("afc_statistics", action="process_move", pages=p)
tasks.start_task("afc_copyvios", action="process_move", pages=p)
chans.add("#wikipedia-en-afc")
elif rc.flags == "delete" and r_delete.match(comment):
p = r_deleted_page.findall(rc.comment)[0][0]
tasks.start_task("afc_statistics", action="process_delete", page=p)
tasks.start_task("afc_copyvios", action="process_delete", page=p)
chans.add("#wikipedia-en-afc")
elif rc.flags == "restore" and r_restore.match(comment):
p = r_restored_page.findall(rc.comment)[0][0]
tasks.start_task("afc_statistics", action="process_restore", page=p)
tasks.start_task("afc_copyvios", action="process_restore", page=p)
chans.add("#wikipedia-en-afc")
elif rc.flags == "protect" and r_protect.match(comment):
chans.add("#wikipedia-en-afc")

return chans

+ 43
- 27
wiki/task_manager.py Vedi File

@@ -1,25 +1,32 @@
# -*- coding: utf-8 -*-

# A module to manage bot tasks.
"""
EarwigBot's Wiki Bot Task Manager

This module provides some functions to run and load bot tasks from wiki/tasks/.
"""

import time
import traceback
import threading
import os

from config import schedule
from core import config

task_list = dict() # the key is the task's name, the value is the task's class instance
# store loaded tasks as a dict where the key is the task name and the value is
# an instance of the task class (wiki.tasks.task_file.Task())
task_list = dict()

def load_tasks():
"""Load all valid task classes from wiki/tasks/, and add them to the task_list."""
files = os.listdir(os.path.join("wiki", "tasks")) # get all files in wiki/tasks/
files.sort() # alphabetically sort list of files
"""Load all valid task classes from wiki/tasks/, and add them to the
task_list variable."""
files = os.listdir(os.path.join("wiki", "tasks"))
files.sort() # alphabetically sort all files in wiki/tasks/
for f in files:
if not os.path.isfile(os.path.join("wiki", "tasks", f)): # ignore non-files
continue
if f.startswith("_") or not f.endswith(".py"): # ignore non-python files or files beginning with "_"
continue
if not os.path.isfile(os.path.join("wiki", "tasks", f)):
continue # ignore non-files
if f.startswith("_") or not f.endswith(".py"):
continue # ignore non-python files or files beginning with an _
load_class_from_file(f)
print "Found %s tasks: %s." % (len(task_list), ', '.join(task_list.keys()))

@@ -27,10 +34,10 @@ def load_class_from_file(f):
"""Look in a given file for the task class."""
global task_list
module = f[:-3] # strip .py from end
module = f[:-3] # strip .py from end
try:
exec "from wiki.tasks import %s as m" % module
except: # importing the file failed for some reason...
except: # importing the file failed for some reason...
print "Couldn't load task file %s:" % f
traceback.print_exc()
return
@@ -46,26 +53,34 @@ def load_class_from_file(f):

def start_tasks(now=time.gmtime()):
"""Start all tasks that are supposed to be run at a given time."""
tasks = schedule.check(now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, now.tm_wday) # get list of tasks to run this turn
tasks = config.schedule.check(now.tm_min, now.tm_hour, now.tm_mday,
now.tm_mon, now.tm_wday) # get list of tasks to run this turn

for task in tasks:
if isinstance(task, tuple): # they've specified kwargs, so pass those to start_task
start_task(task[0], **task[1])
else: # otherwise, just pass task_name
if isinstance(task, tuple): # they've specified kwargs
start_task(task[0], **task[1]) # so pass those to start_task
else: # otherwise, just pass task_name
start_task(task)

def start_task(task_name, **kwargs):
"""Start a given task in a new thread. Pass args to the task's run function."""
print "Starting task '{}' in a new thread...".format(task_name)
"""Start a given task in a new thread. Pass args to the task's run()
function."""
print "Starting task '{0}' in a new thread...".format(task_name)

try:
task = task_list[task_name] # get the class for this task, a subclass of BaseTask
task = task_list[task_name]
except KeyError:
print "Couldn't find task '{}': wiki/tasks/{}.py does not exist.".format(task_name, task_name)
print ("Couldn't find task '{0}': wiki/tasks/{1}.py does not " +
"exist.").format(task_name, task_name)
return
task_thread = threading.Thread(target=lambda: task_wrapper(task, **kwargs)) # Normally we'd do task_wrapper(task, **kwargs), but because of threading we'd have to do Thread(target=task_wrapper, args=(task, **kwargs)), which doesn't work because the **kwargs is inside a tuple, not inside function params. Use lambda to get around the args=tuple nonsense
task_thread.name = "{} ({})".format(task_name, time.strftime("%b %d %H:%M:%S"))
task_thread.daemon = True # stop bot task threads automagically if the main bot stops

task_thread = threading.Thread(target=lambda: task_wrapper(task, **kwargs))
task_thread.name = "{0} ({1})".format(task_name, time.strftime(
"%b %d %H:%M:%S"))

# stop bot task threads automagically if the main bot stops
task_thread.daemon = True

task_thread.start()

def task_wrapper(task, **kwargs):
@@ -73,7 +88,8 @@ def task_wrapper(task, **kwargs):
try:
task.run(**kwargs)
except:
print "Task '{}' raised an exception and had to stop:".format(task.task_name)
print "Task '{0}' raised an exception and had to stop:".format(
task.task_name)
traceback.print_exc()
else:
print "Task '{}' finished without error.".format(task.task_name)
print "Task '{0}' finished without error.".format(task.task_name)

Caricamento…
Annulla
Salva