@@ -0,0 +1,24 @@ | |||
# -*- coding: utf-8 -*- | |||
# EarwigBot Configuration File | |||
# This file tells the bot which of its components should be enabled. | |||
# The IRC frontend (configured in config/irc.py) sits on a public IRC network, | |||
# responds to commands given to it, and reports edits (if the IRC watcher | |||
# component is enabled). | |||
enable_irc_frontend = True | |||
# The IRC watcher (connection details configured in config/irc.py as well) sits | |||
# on an IRC network that gives a recent changes feed, usually irc.wikimedia.net. | |||
# It looks for edits matching certain (often regex) patterns (rules configured | |||
# in config/watcher.py), and either reports them to the IRC frontend (if | |||
# enabled), or activates a task on the WikiBot (if configured to do). | |||
enable_irc_watcher = True | |||
# EarwigBot doesn't have to edit a wiki, although this is its main purpose. If | |||
# the wiki schedule is disabled, it will not be able to handle scheduled tasks | |||
# that involve editing (such as creating a daily category every day at midnight | |||
# UTC), but it can still edit through rules given in the watcher, and bot tasks | |||
# can still be activated by the command line. The schedule is configured in | |||
# config/schedule.py. | |||
enable_wiki_schedule = True |
@@ -0,0 +1,28 @@ | |||
# -*- coding: utf-8 -*- | |||
# EarwigBot Configuration File | |||
# This file tells the bot when to run certain wiki-editing tasks. | |||
def check(minute, hour, month_day, month, week_day): | |||
tasks = [] # tasks to run this turn, each as a tuple of (task_name, kwargs) or just task_name | |||
if minute == 0: # run every hour on the hour | |||
tasks.append(("afc_statistics", {"action": "save"})) # save statistics to [[Template:AFC_statistics]] | |||
if hour == 0: # run every day at midnight | |||
tasks.append("afc_dailycats") # create daily categories for WP:AFC | |||
tasks.append("feed_dailycats") # create daily categories for WP:FEED | |||
if week_day == 0: # run every Sunday at midnight (that is, the start of Sunday, not the end) | |||
tasks.append("afc_undated") # clear [[Category:Undated AfC submissions]] | |||
if week_day == 1: # run every Monday at midnight | |||
tasks.append("afc_catdelink") # delink mainspace categories in declined AfC submissions | |||
if week_day == 2: # run every Tuesday at midnight | |||
tasks.append("wrongmime") # tag files whose extensions do not agree with their MIME type | |||
if week_day == 3: # run every Wednesday at midnight | |||
tasks.append("blptag") # add |blp=yes to {{WPB}} or {{WPBS}} when it is used along with {{WP Biography}} | |||
return tasks |
@@ -5,30 +5,66 @@ | |||
import re | |||
from wiki import task_manager | |||
# Define different report channels on our front-end server. They /must/ be in CHANS in config/irc.py or the bot will not be able to send messages to them (unless they have -n set). | |||
AFC_CHANS = ["#wikipedia-en-afc"] # report recent AfC changes/give AfC status messages upon join | |||
#AFC_CHANS = ["#wikipedia-en-afc"] # report recent AfC changes/give AfC status messages upon join | |||
AFC_CHANS = ["##earwigbot"] # report recent AfC changes/give AfC status messages upon join | |||
BOT_CHANS = ["##earwigbot", "#wikipedia-en-afc"] # report edits containing "!earwigbot" | |||
# Define some commonly used strings. | |||
afc_prefix = "wikipedia( talk)?:(wikiproject )?articles for creation" | |||
# Define our compiled regexps used when finding certain edits. | |||
r_page = re.compile(afc_prefix) | |||
r_ffu = re.compile("wikipedia( talk)?:files for upload") | |||
r_move1 = re.compile("moved \[\[{}".format(afc_prefix)) # an AFC page was either moved locally or out | |||
r_move2 = re.compile("moved \[\[(.*?)\]\] to \[\[{}".format(afc_prefix)) # an outside page was moved into AFC | |||
r_moved_pages = re.compile("^moved \[\[(.*?)\]\] to \[\[(.*?)\]\]") | |||
r_delete = re.compile("deleted \"\[\[{}".format(afc_prefix)) | |||
r_deleted_page = re.compile("^deleted \"\[\[(.*?)\]\]") | |||
r_restore = re.compile("restored \"\[\[{}".format(afc_prefix)) | |||
r_restored_page = re.compile("^restored \"\[\[(.*?)\]\]") | |||
r_protect = re.compile("protected \"\[\[{}".format(afc_prefix)) | |||
def process(rc): | |||
chans = set() # channels to report this message to | |||
page_name = rc.page.lower() | |||
comment = rc.comment.lower() | |||
if "!earwigbot" in rc.msg.lower(): | |||
chans.update(BOT_CHANS) | |||
if re.match("wikipedia( talk)?:(wikiproject )?articles for creation", page_name): | |||
if r_page.search(page_name): | |||
task_manager.start_task("afc_statistics", action="process_edit", page=rc.page) | |||
task_manager.start_task("afc_copyvios", action="process_edit", page=rc.page) | |||
chans.update(AFC_CHANS) | |||
elif re.match("wikipedia( talk)?:files for upload", page_name): | |||
elif r_ffu.match(page_name): | |||
chans.update(AFC_CHANS) | |||
elif page_name.startswith("template:afc submission"): | |||
chans.update(AFC_CHANS) | |||
elif rc.flags == "delete" and re.match("deleted \"\[\[wikipedia( talk)?:(wikiproject )?articles for creation", rc.comment.lower()): | |||
elif rc.flags == "move" and (r_move1.match(comment) or r_move2.match(comment)): | |||
p = r_moved_pages.findall(rc.comment)[0] | |||
task_manager.start_task("afc_statistics", action="process_move", pages=p) | |||
task_manager.start_task("afc_copyvios", action="process_move", pages=p) | |||
chans.update(AFC_CHANS) | |||
elif rc.flags == "protect" and re.match("protected \"\[\[wikipedia( talk)?:(wikiproject )?articles for creation", rc.comment.lower()): | |||
elif rc.flags == "delete" and r_delete.match(comment): | |||
p = r_deleted_page.findall(rc.comment)[0][0] | |||
task_manager.start_task("afc_statistics", action="process_delete", page=p) | |||
task_manager.start_task("afc_copyvios", action="process_delete", page=p) | |||
chans.update(AFC_CHANS) | |||
elif rc.flags == "delete" and r_restore.match(comment): | |||
p = r_restored_page.findall(rc.comment)[0][0] | |||
task_manager.start_task("afc_statistics", action="process_restore", page=p) | |||
task_manager.start_task("afc_copyvios", action="process_restore", page=p) | |||
chans.update(AFC_CHANS) | |||
elif rc.flags == "protect" and r_protect.match(comment): | |||
chans.update(AFC_CHANS) | |||
return chans |
@@ -1,11 +1,23 @@ | |||
# -*- coding: utf-8 -*- | |||
## EarwigBot's Core | |||
## Basically, this creates threads for our IRC watcher component and Wikipedia component, and then runs the main IRC bot on the main thread. | |||
## The IRC bot component of EarwigBot has two parts: a front-end and a watcher. | |||
## The front-end runs on a normal IRC server and expects users to interact with it/give it commands. | |||
## The watcher runs on a wiki recent-changes server and listens for edits. Users cannot interact with this part of the bot. | |||
## EarwigBot has three components that can run independently of each other: an | |||
## IRC front-end, an IRC watcher, and a wiki scheduler. | |||
## * The IRC front-end runs on a normal IRC server and expects users to | |||
## interact with it/give it commands. | |||
## * The IRC watcher runs on a wiki recent-changes server and listens for | |||
## edits. Users cannot interact with this part of the bot. | |||
## * The wiki scheduler runs wiki-editing bot tasks in separate threads at | |||
## user-defined times through a cron-like interface. | |||
## There is a "priority" system here: | |||
## 1. If the IRC frontend is enabled, it will run on the main thread, and the | |||
## IRC watcher and wiki scheduler (if enabled) will run on separate threads. | |||
## 2. If the wiki scheduler is enabled, it will run on the main thread, and the | |||
## IRC watcher (if enabled) will run on a separate thread. | |||
## 3. If the IRC watcher is enabled, it will run on the main (and only) thread. | |||
## Else, the bot will stop, as no components are enabled. | |||
import threading | |||
import time | |||
@@ -16,35 +28,95 @@ import os | |||
parent_dir = os.path.split(sys.path[0])[0] | |||
sys.path.append(parent_dir) # make sure we look in the parent directory for modules | |||
from config.main import * | |||
from irc import frontend, watcher | |||
from wiki import task_manager | |||
f_conn = None | |||
w_conn = None | |||
def irc_watcher(f_conn): | |||
"""Function to handle the IRC watcher as another thread (if frontend and/or | |||
scheduler is enabled), otherwise run as the main thread.""" | |||
global w_conn | |||
print "\nStarting IRC watcher..." | |||
while 1: # restart the watcher component if (just) it breaks | |||
w_conn = watcher.get_connection() | |||
w_conn.connect() | |||
print # print a blank line here to signify that the bot has finished starting up | |||
try: | |||
watcher.main(w_conn, f_conn) | |||
except: | |||
traceback.print_exc() | |||
time.sleep(5) # sleep a bit before restarting watcher | |||
print "watcher has stopped; restarting component..." | |||
print "\nWatcher has stopped; restarting component..." | |||
def run(): | |||
def wiki_scheduler(): | |||
"""Function to handle the wiki scheduler as another thread, or as the | |||
primary thread if the IRC frontend is not enabled.""" | |||
while 1: | |||
time_start = time.time() | |||
now = time.gmtime(time_start) | |||
task_manager.start_tasks(now) | |||
time_end = time.time() | |||
time_diff = time_start - time_end | |||
if time_diff < 60: # sleep until the next minute | |||
time.sleep(60 - time_diff) | |||
def irc_frontend(): | |||
"""If the IRC frontend is enabled, make it run on our primary thread, and | |||
enable the wiki scheduler and IRC watcher on new threads if they are | |||
enabled.""" | |||
global f_conn | |||
print "\nStarting IRC frontend..." | |||
f_conn = frontend.get_connection() | |||
frontend.startup(f_conn) | |||
t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,)) | |||
t_watcher.daemon = True | |||
t_watcher.start() | |||
if enable_wiki_schedule: | |||
print "\nStarting wiki scheduler..." | |||
task_manager.load_tasks() | |||
t_scheduler = threading.Thread(target=wiki_scheduler) | |||
t_scheduler.name = "wiki-scheduler" | |||
t_scheduler.daemon = True | |||
t_scheduler.start() | |||
if enable_irc_watcher: | |||
t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,)) | |||
t_watcher.name = "irc-watcher" | |||
t_watcher.daemon = True | |||
t_watcher.start() | |||
frontend.main() | |||
w_conn.close() | |||
if enable_irc_watcher: | |||
w_conn.close() | |||
f_conn.close() | |||
def run(): | |||
if enable_irc_frontend: # make the frontend run on our primary thread if enabled, and enable additional components through that function | |||
irc_frontend() | |||
elif enable_wiki_schedule: # the scheduler is enabled - run it on the main thread, but also run the IRC watcher on another thread if it is enabled | |||
print "\nStarting wiki scheduler..." | |||
task_manager.load_tasks() | |||
if enable_irc_watcher: | |||
t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,)) | |||
t_watcher.name = "irc-watcher" | |||
t_watcher.daemon = True | |||
t_watcher.start() | |||
wiki_scheduler() | |||
elif enable_irc_watcher: # the IRC watcher is our only enabled component, so run its function only and don't worry about anything else | |||
irc_watcher() | |||
else: # nothing is enabled! | |||
exit("\nNo bot parts are enabled; stopping...") | |||
if __name__ == "__main__": | |||
run() | |||
try: | |||
run() | |||
except KeyboardInterrupt: | |||
exit("\nKeyboardInterrupt: stopping main bot loop.") |
@@ -4,12 +4,19 @@ import time | |||
from subprocess import * | |||
try: | |||
from config import irc, secure, watcher | |||
from config import irc, main, schedule, secure, watcher | |||
except ImportError: | |||
print """Missing a config file! Make sure you have configured the bot. All *.py.default files in config/ | |||
should have their .default extension removed, and the info inside should be corrected.""" | |||
exit() | |||
while 1: | |||
call(['python', 'core/main.py']) | |||
time.sleep(5) # sleep for five seconds between bot runs | |||
def main(): | |||
while 1: | |||
call(['python', 'core/main.py']) | |||
time.sleep(5) # sleep for five seconds between bot runs | |||
if __name__ == "__main__": | |||
try: | |||
main() | |||
except KeyboardInterrupt: | |||
exit("\nKeyboardInterrupt: stopping bot wrapper.") |
@@ -7,7 +7,7 @@ import traceback | |||
commands = [] | |||
def init_commands(connection): | |||
def load_commands(connection): | |||
"""load all valid command classes from irc/commmands/ into the commands variable""" | |||
files = os.listdir(os.path.join("irc", "commands")) # get all files in irc/commands/ | |||
files.sort() # alphabetically sort list of files | |||
@@ -15,18 +15,14 @@ def init_commands(connection): | |||
for f in files: | |||
if f.startswith("_") or not f.endswith(".py"): # ignore non-python files or files beginning with "_" | |||
continue | |||
module = f[:-3] # strip .py from end | |||
try: | |||
exec "from irc.commands import %s" % module | |||
except: # importing the file failed for some reason... | |||
print "Couldn't load file %s:" % f | |||
traceback.print_exc() | |||
continue | |||
m = eval(module) # 'module' is a string, so get the actual object for processing | |||
process_module(connection, m) | |||
process_module(connection, eval(module)) # 'module' is a string, so get the actual object for processing by eval-ing it | |||
pretty_cmnds = map(lambda c: c.__class__.__name__, commands) | |||
print "Found %s command classes: %s." % (len(commands), ', '.join(pretty_cmnds)) | |||
@@ -146,7 +146,7 @@ class Git(BaseCommand): | |||
try: | |||
remote = self.exec_shell("git config --get branch.%s.remote" % branch) | |||
url = self.exec_shell("git config --get remote.%s.url" % remote) | |||
self.connection.reply(self.data, "done; %s. [from %s]" % (changes, url)) | |||
self.connection.reply(self.data, "done; %s [from %s]." % (changes, url)) | |||
except subprocess.CalledProcessError: # something in .git/config is not specified correctly, so we cannot get the remote's url | |||
self.connection.reply(self.data, "done; %s." % changes) | |||
@@ -5,6 +5,10 @@ | |||
import socket | |||
import threading | |||
class BrokenSocketException(Exception): | |||
"""A socket has broken, because it is not sending data.""" | |||
pass | |||
class Connection(object): | |||
def __init__(self, host=None, port=None, nick=None, ident=None, realname=None): | |||
"""a class to interface with IRC""" | |||
@@ -33,7 +37,7 @@ class Connection(object): | |||
"""receive (get) data from the server""" | |||
data = self.sock.recv(4096) | |||
if not data: # socket giving us no data, so it is dead/broken | |||
raise RuntimeError("socket is dead") | |||
raise BrokenSocketException() | |||
return data | |||
def send(self, msg): | |||
@@ -7,7 +7,7 @@ from config.irc import * | |||
from config.secure import * | |||
from irc import command_handler | |||
from irc.connection import Connection | |||
from irc.connection import * | |||
from irc.data import Data | |||
connection = None | |||
@@ -19,7 +19,7 @@ def get_connection(): | |||
def startup(conn): | |||
global connection | |||
connection = conn | |||
command_handler.init_commands(connection) | |||
command_handler.load_commands(connection) | |||
connection.connect() | |||
def main(): | |||
@@ -28,8 +28,8 @@ def main(): | |||
while 1: | |||
try: | |||
read_buffer = read_buffer + connection.get() | |||
except RuntimeError: # socket broke | |||
print "socket has broken on front-end; restarting bot..." | |||
except BrokenSocketException: | |||
print "Socket has broken on front-end; restarting bot..." | |||
return | |||
lines = read_buffer.split("\n") | |||
@@ -61,7 +61,7 @@ def main(): | |||
if data.msg.startswith("!restart"): # hardcode the !restart command (we can't restart from within an ordinary command) | |||
if data.host in OWNERS: | |||
print "restarting bot per owner request..." | |||
print "Restarting bot per owner request..." | |||
return | |||
if line[0] == "PING": # If we are pinged, pong back to the server | |||
@@ -2,9 +2,10 @@ | |||
## Imports | |||
from config.irc import * | |||
from config.main import * | |||
from config.watcher import * | |||
from irc.connection import Connection | |||
from irc.connection import * | |||
from irc.rc import RC | |||
global frontend_conn | |||
@@ -16,13 +17,12 @@ def get_connection(): | |||
def main(connection, f_conn): | |||
global frontend_conn | |||
frontend_conn = f_conn | |||
connection.connect() | |||
read_buffer = str() | |||
while 1: | |||
try: | |||
read_buffer = read_buffer + connection.get() | |||
except RuntimeError: # socket broke | |||
except BrokenSocketException: | |||
return | |||
lines = read_buffer.split("\n") | |||
@@ -53,5 +53,6 @@ def check(rc): | |||
if not results: | |||
return | |||
pretty = rc.get_pretty() | |||
for chan in results: | |||
frontend_conn.say(chan, pretty) | |||
if enable_irc_frontend: | |||
for chan in results: | |||
frontend_conn.say(chan, pretty) |
@@ -0,0 +1,12 @@ | |||
# -*- coding: utf-8 -*- | |||
# A base class for bot tasks that edit Wikipedia. | |||
class BaseTask(object): | |||
def __init__(self): | |||
"""A base class for bot tasks that edit Wikipedia.""" | |||
self.task_name = None | |||
def run(self, **kwargs): | |||
"""Run this task.""" | |||
pass |
@@ -0,0 +1,80 @@ | |||
# -*- coding: utf-8 -*- | |||
# A module to manage bot tasks. | |||
import time | |||
import traceback | |||
import threading | |||
import os | |||
from config import schedule | |||
task_list = dict() # the key is the task's name, the value is the task's class instance | |||
def load_tasks(): | |||
"""Load all valid task classes from wiki/tasks/, and add them to the task_list.""" | |||
files = os.listdir(os.path.join("wiki", "tasks")) # get all files in wiki/tasks/ | |||
files.sort() # alphabetically sort list of files | |||
for f in files: | |||
if not os.path.isfile(os.path.join("wiki", "tasks", f)): # ignore non-files | |||
continue | |||
if f.startswith("_") or not f.endswith(".py"): # ignore non-python files or files beginning with "_" | |||
continue | |||
load_class_from_file(f) | |||
print "Found %s tasks: %s." % (len(task_list), ', '.join(task_list.keys())) | |||
def load_class_from_file(f): | |||
"""Look in a given file for the task class.""" | |||
global task_list | |||
module = f[:-3] # strip .py from end | |||
try: | |||
exec "from wiki.tasks import %s as m" % module | |||
except: # importing the file failed for some reason... | |||
print "Couldn't load task file %s:" % f | |||
traceback.print_exc() | |||
return | |||
try: | |||
task_class = m.Task() | |||
except: | |||
print "Couldn't find or get task class in file %s:" % f | |||
traceback.print_exc() | |||
return | |||
task_name = task_class.task_name | |||
task_list[task_name] = task_class | |||
print "Added task %s from wiki/tasks/%s..." % (task_name, f) | |||
def start_tasks(now=time.gmtime()): | |||
"""Start all tasks that are supposed to be run at a given time.""" | |||
tasks = schedule.check(now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, now.tm_wday) # get list of tasks to run this turn | |||
for task in tasks: | |||
if isinstance(task, tuple): # they've specified kwargs, so pass those to start_task | |||
start_task(task[0], **task[1]) | |||
else: # otherwise, just pass task_name | |||
start_task(task) | |||
def start_task(task_name, **kwargs): | |||
"""Start a given task in a new thread. Pass args to the task's run function.""" | |||
print "Starting task '{}' in a new thread...".format(task_name) | |||
try: | |||
task = task_list[task_name] # get the class for this task, a subclass of BaseTask | |||
except KeyError: | |||
print "Couldn't find task '{}': wiki/tasks/{}.py does not exist.".format(task_name, task_name) | |||
return | |||
# task_thread = threading.Thread(target=task_wrapper, args=(task, kwargs)) | |||
task_thread = threading.Thread(target=lambda: task_wrapper(task, **kwargs)) # Normally we'd do task_wrapper(task, **kwargs), but because of threading we'd have to do Thread(target=task_wrapper, args=(task, **kwargs)), which doesn't work because the **kwargs is inside a tuple, not inside function params. Use lambda to get around the args=tuple nonsense | |||
task_thread.name = "task {} (spawned at {} UTC)".format(task_name, time.asctime()) | |||
task_thread.daemon = True # stop bot task threads automagically if the main bot stops | |||
task_thread.start() | |||
def task_wrapper(task, **kwargs): | |||
"""Wrapper for task classes: run the task and catch any errors.""" | |||
try: | |||
task.run(**kwargs) | |||
except: | |||
print "Task '{}' raised an exception and had to stop:".format(task.task_name) | |||
traceback.print_exc() | |||
else: | |||
print "Task '{}' finished without error.".format(task.task_name) |
@@ -0,0 +1,12 @@ | |||
# -*- coding: utf-8 -*- | |||
# A task to delink mainspace categories in declined [[WP:AFC]] submissions. | |||
from wiki.base_task import BaseTask | |||
class Task(BaseTask): | |||
def __init__(self): | |||
self.task_name = "afc_catdelink" | |||
def run(self, **kwargs): | |||
pass |
@@ -0,0 +1,12 @@ | |||
# -*- coding: utf-8 -*- | |||
# A task to check newly-edited [[WP:AFC]] submissions for copyright violations. | |||
from wiki.base_task import BaseTask | |||
class Task(BaseTask): | |||
def __init__(self): | |||
self.task_name = "afc_copyvios" | |||
def run(self, **kwargs): | |||
pass |
@@ -0,0 +1,12 @@ | |||
# -*- coding: utf-8 -*- | |||
# A task to create daily categories for [[WP:AFC]]. | |||
from wiki.base_task import BaseTask | |||
class Task(BaseTask): | |||
def __init__(self): | |||
self.task_name = "afc_dailycats" | |||
def run(self, **kwargs): | |||
pass |
@@ -0,0 +1,12 @@ | |||
# -*- coding: utf-8 -*- | |||
# A task to generate statistics for [[WP:AFC]] and save them to [[Template:AFC_statistics]]. | |||
from wiki.base_task import BaseTask | |||
class Task(BaseTask): | |||
def __init__(self): | |||
self.task_name = "afc_statistics" | |||
def run(self, **kwargs): | |||
pass |
@@ -0,0 +1,12 @@ | |||
# -*- coding: utf-8 -*- | |||
# A task to clear [[Category:Undated AfC submissions]]. | |||
from wiki.base_task import BaseTask | |||
class Task(BaseTask): | |||
def __init__(self): | |||
self.task_name = "afc_undated" | |||
def run(self, **kwargs): | |||
pass |
@@ -0,0 +1,12 @@ | |||
# -*- coding: utf-8 -*- | |||
# A task to add |blp=yes to {{WPB}} or {{WPBS}} when it is used along with {{WP Biography}}. | |||
from wiki.base_task import BaseTask | |||
class Task(BaseTask): | |||
def __init__(self): | |||
self.task_name = "blptag" | |||
def run(self, **kwargs): | |||
pass |
@@ -0,0 +1,12 @@ | |||
# -*- coding: utf-8 -*- | |||
# A task to create daily categories for [[WP:FEED]]. | |||
from wiki.base_task import BaseTask | |||
class Task(BaseTask): | |||
def __init__(self): | |||
self.task_name = "feed_dailycats" | |||
def run(self, **kwargs): | |||
pass |
@@ -0,0 +1,12 @@ | |||
# -*- coding: utf-8 -*- | |||
# A task to tag files whose extensions do not agree with their MIME type. | |||
from wiki.base_task import BaseTask | |||
class Task(BaseTask): | |||
def __init__(self): | |||
self.task_name = "wrongmime" | |||
def run(self, **kwargs): | |||
pass |