diff --git a/.gitignore b/.gitignore index 282791f..1e66a11 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ # Ignore python bytecode: *.pyc -# Ignore secure config files: -config/secure.py +# Ignore bot-specific config file: +config.json # Ignore pydev's nonsense: .project diff --git a/config/__init__.py b/config/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/config/irc.py b/config/irc.py deleted file mode 100644 index ee9ef3e..0000000 --- a/config/irc.py +++ /dev/null @@ -1,25 +0,0 @@ -# -*- coding: utf-8 -*- - -# EarwigBot Configuration File -# This file contains information that the bot uses to connect to IRC. - -# our main (front-end) server's hostname and port -HOST = "irc.freenode.net" -PORT = 6667 - -# our watcher server's hostname, port, and RC channel -WATCHER_HOST = "irc.wikimedia.org" -WATCHER_PORT = 6667 -WATCHER_CHAN = "#en.wikipedia" - -# our nick, ident, and real name, used on both servers -NICK = "EarwigBot" -IDENT = "earwigbot" -REALNAME = "[[w:en:User:EarwigBot]]" - -# channels to join on main server's startup -CHANS = ["##earwigbot", "##earwig", "#wikipedia-en-afc"] - -# hardcoded hostnames of users with certain permissions -OWNERS = ["wikipedia/The-Earwig"] # can use owner-only commands (!restart and !git) -ADMINS = ["wikipedia/The-Earwig", "wikipedia/LeonardBloom"] # can use high-risk commands, e.g. !op diff --git a/config/main.py b/config/main.py deleted file mode 100644 index 6e8c082..0000000 --- a/config/main.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- - -# EarwigBot Configuration File -# This file tells the bot which of its components should be enabled. - -# The IRC frontend (configured in config/irc.py) sits on a public IRC network, -# responds to commands given to it, and reports edits (if the IRC watcher -# component is enabled). -enable_irc_frontend = True - -# The IRC watcher (connection details configured in config/irc.py as well) sits -# on an IRC network that gives a recent changes feed, usually irc.wikimedia.net. -# It looks for edits matching certain (often regex) patterns (rules configured -# in config/watcher.py), and either reports them to the IRC frontend (if -# enabled), or activates a task on the WikiBot (if configured to do). -enable_irc_watcher = True - -# EarwigBot doesn't have to edit a wiki, although this is its main purpose. If -# the wiki schedule is disabled, it will not be able to handle scheduled tasks -# that involve editing (such as creating a daily category every day at midnight -# UTC), but it can still edit through rules given in the watcher, and bot tasks -# can still be activated by the command line. The schedule is configured in -# config/schedule.py. -enable_wiki_schedule = True diff --git a/config/schedule.py b/config/schedule.py deleted file mode 100644 index 093050b..0000000 --- a/config/schedule.py +++ /dev/null @@ -1,28 +0,0 @@ -# -*- coding: utf-8 -*- - -# EarwigBot Configuration File -# This file tells the bot when to run certain wiki-editing tasks. - -def check(minute, hour, month_day, month, week_day): - tasks = [] # tasks to run this turn, each as a tuple of (task_name, kwargs) or just task_name - - if minute == 0: # run every hour on the hour - tasks.append(("afc_statistics", {"action": "save"})) # save statistics to [[Template:AFC_statistics]] - - if hour == 0: # run every day at midnight - tasks.append("afc_dailycats") # create daily categories for WP:AFC - tasks.append("feed_dailycats") # create daily categories for WP:FEED - - if week_day == 0: # run every Sunday at midnight (that is, the start of Sunday, not the end) - tasks.append("afc_undated") # clear [[Category:Undated AfC submissions]] - - if week_day == 1: # run every Monday at midnight - tasks.append("afc_catdelink") # delink mainspace categories in declined AfC submissions - - if week_day == 2: # run every Tuesday at midnight - tasks.append("wrongmime") # tag files whose extensions do not agree with their MIME type - - if week_day == 3: # run every Wednesday at midnight - tasks.append("blptag") # add |blp=yes to {{WPB}} or {{WPBS}} when it is used along with {{WP Biography}} - - return tasks diff --git a/config/secure.default.py b/config/secure.default.py deleted file mode 100644 index 0db882e..0000000 --- a/config/secure.default.py +++ /dev/null @@ -1,9 +0,0 @@ -# -*- coding: utf-8 -*- - -# EarwigBot Configuration File -# This file contains information that should be kept hidden, including passwords. - -# IRC: identify ourselves to NickServ? -NS_AUTH = False -NS_USER = "" -NS_PASS = "" diff --git a/config/watcher.py b/config/watcher.py deleted file mode 100644 index 6e2fe28..0000000 --- a/config/watcher.py +++ /dev/null @@ -1,69 +0,0 @@ -# -*- coding: utf-8 -*- - -# EarwigBot Configuration File -# This file contains rules for the bot's watcher component. - -import re - -from wiki import task_manager - -# Define different report channels on our front-end server. They /must/ be in CHANS in config/irc.py or the bot will not be able to send messages to them (unless they have -n set). -AFC_CHANS = ["#wikipedia-en-afc"] # report recent AfC changes/give AfC status messages upon join -BOT_CHANS = ["##earwigbot", "#wikipedia-en-afc"] # report edits containing "!earwigbot" - -# Define some commonly used strings. -afc_prefix = "wikipedia( talk)?:(wikiproject )?articles for creation" - -# Define our compiled regexps used when finding certain edits. -r_page = re.compile(afc_prefix) -r_ffu = re.compile("wikipedia( talk)?:files for upload") -r_move1 = re.compile("moved \[\[{}".format(afc_prefix)) # an AFC page was either moved locally or out -r_move2 = re.compile("moved \[\[(.*?)\]\] to \[\[{}".format(afc_prefix)) # an outside page was moved into AFC -r_moved_pages = re.compile("^moved \[\[(.*?)\]\] to \[\[(.*?)\]\]") -r_delete = re.compile("deleted \"\[\[{}".format(afc_prefix)) -r_deleted_page = re.compile("^deleted \"\[\[(.*?)\]\]") -r_restore = re.compile("restored \"\[\[{}".format(afc_prefix)) -r_restored_page = re.compile("^restored \"\[\[(.*?)\]\]") -r_protect = re.compile("protected \"\[\[{}".format(afc_prefix)) - -def process(rc): - chans = set() # channels to report this message to - page_name = rc.page.lower() - comment = rc.comment.lower() - - if "!earwigbot" in rc.msg.lower(): - chans.update(BOT_CHANS) - - if r_page.search(page_name): - task_manager.start_task("afc_statistics", action="process_edit", page=rc.page) - task_manager.start_task("afc_copyvios", action="process_edit", page=rc.page) - chans.update(AFC_CHANS) - - elif r_ffu.match(page_name): - chans.update(AFC_CHANS) - - elif page_name.startswith("template:afc submission"): - chans.update(AFC_CHANS) - - elif rc.flags == "move" and (r_move1.match(comment) or r_move2.match(comment)): - p = r_moved_pages.findall(rc.comment)[0] - task_manager.start_task("afc_statistics", action="process_move", pages=p) - task_manager.start_task("afc_copyvios", action="process_move", pages=p) - chans.update(AFC_CHANS) - - elif rc.flags == "delete" and r_delete.match(comment): - p = r_deleted_page.findall(rc.comment)[0][0] - task_manager.start_task("afc_statistics", action="process_delete", page=p) - task_manager.start_task("afc_copyvios", action="process_delete", page=p) - chans.update(AFC_CHANS) - - elif rc.flags == "restore" and r_restore.match(comment): - p = r_restored_page.findall(rc.comment)[0][0] - task_manager.start_task("afc_statistics", action="process_restore", page=p) - task_manager.start_task("afc_copyvios", action="process_restore", page=p) - chans.update(AFC_CHANS) - - elif rc.flags == "protect" and r_protect.match(comment): - chans.update(AFC_CHANS) - - return chans diff --git a/core/config.py b/core/config.py new file mode 100644 index 0000000..29c3fe1 --- /dev/null +++ b/core/config.py @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- + +""" +EarwigBot's JSON Config File Parser + +This handles all tasks involving reading and writing to our config file, +including encrypting and decrypting passwords and making a new config file from +scratch at the inital bot run. + +Usually you'll just want to do "from core import config" and access config data +from within config's three global variables and one function: + +* config.components - a list of enabled components +* config.wiki - a dict of config information for wiki-editing +* config.irc - a dict of config information for IRC +* config.schedule() - returns a list of tasks scheduled to run now +""" + +import json +from os import makedirs, path + +from lib import blowfish + +script_dir = path.dirname(path.abspath(__file__)) +root_dir = path.split(script_dir)[0] +config_path = path.join(root_dir, "config.json") + +_config = None # holds data loaded from our config file + +# set our three easy-config-access global variables to None +components, wiki, irc = (None, None, None) + +def load_config(): + """Load data from our JSON config file (config.json) into _config.""" + global _config + with open(config_path, 'r') as fp: + try: + _config = json.load(fp) + except ValueError as error: + print "Error parsing config file {0}:".format(config_path) + print error + exit(1) + +def verify_config(): + """Check to see if we have a valid config file, and if not, notify the + user. If there is no config file at all, offer to make one; otherwise, + exit.""" + if path.exists(config_path): + load_config() + try: + return _config["encryptPasswords"] # are passwords encrypted? + except KeyError: + return False # assume passwords are not encrypted by default + else: + print "You haven't configured the bot yet!" + choice = raw_input("Would you like to do this now? [y/n] ") + if choice.lower().startswith("y"): + return make_new_config() + else: + exit() + +def parse_config(key): + """Store data from our config file in three global variables for easy + access, and use the key to unencrypt passwords. Catch password decryption + errors and report them to the user.""" + global components, wiki, irc + + load_config() # we might be re-loading unnecessarily here, but no harm in + # that! + try: + components = _config["components"] + except KeyError: + components = [] + try: + wiki = _config["wiki"] + except KeyError: + wiki = {} + try: + irc = _config["irc"] + except KeyError: + irc = {} + + try: + try: + if _config["encryptPasswords"]: + decrypt(key, "wiki['password']") + decrypt(key, "irc['frontend']['nickservPassword']") + decrypt(key, "irc['watcher']['nickservPassword']") + except KeyError: + pass + except blowfish.BlowfishError as error: + print "\nError decrypting passwords:" + print "{0}: {1}.".format(error.__class__.__name__, error) + exit(1) + +def decrypt(key, item): + """Decrypt 'item' with blowfish.decrypt() using the given key and set it to + the decrypted result. 'item' should be a string, like + decrypt(key, "wiki['password']"), NOT decrypt(key, wiki['password'), + because that won't work.""" + global irc, wiki + try: + result = blowfish.decrypt(key, eval(item)) + except KeyError: + return + exec "{0} = result".format(item) + +def schedule(minute, hour, month_day, month, week_day): + """Return a list of tasks that are scheduled to run at the time specified + by the function arguments. The schedule data comes from our config file's + 'schedule' field, which is stored as _config["schedule"]. Call this + function with config.schedule(args).""" + tasks = [] # tasks to run this turn, each as a tuple of either (task_name, + # kwargs), or just task_name + + now = {"minute": minute, "hour": hour, "month_day": month_day, + "month": month, "week_day": week_day} + + try: + data = _config["schedule"] + except KeyError: + data = [] + for event in data: + do = True + for key, value in now.items(): + try: + requirement = event[key] + except KeyError: + continue + if requirement != value: + do = False + break + if do: + try: + tasks.extend(event["tasks"]) + except KeyError: + pass + + return tasks + +def make_new_config(): + """Make a new config file based on the user's input.""" + makedirs(config_dir) + + encrypt = raw_input("Would you like to encrypt passwords stored in " + + "config.json? [y/n] ") + if encrypt.lower().startswith("y"): + is_encrypted = True + else: + is_encrypted = False + + return is_encrypted diff --git a/core/main.py b/core/main.py index 3000def..0b49842 100644 --- a/core/main.py +++ b/core/main.py @@ -1,23 +1,34 @@ +#! /usr/bin/python # -*- coding: utf-8 -*- -## EarwigBot's Core - -## EarwigBot has three components that can run independently of each other: an -## IRC front-end, an IRC watcher, and a wiki scheduler. -## * The IRC front-end runs on a normal IRC server and expects users to -## interact with it/give it commands. -## * The IRC watcher runs on a wiki recent-changes server and listens for -## edits. Users cannot interact with this part of the bot. -## * The wiki scheduler runs wiki-editing bot tasks in separate threads at -## user-defined times through a cron-like interface. - -## There is a "priority" system here: -## 1. If the IRC frontend is enabled, it will run on the main thread, and the -## IRC watcher and wiki scheduler (if enabled) will run on separate threads. -## 2. If the wiki scheduler is enabled, it will run on the main thread, and the -## IRC watcher (if enabled) will run on a separate thread. -## 3. If the IRC watcher is enabled, it will run on the main (and only) thread. -## Else, the bot will stop, as no components are enabled. +""" +EarwigBot's Core + +This (should) not be run directly; the wrapper in "earwigbot.py" is preferred, +but it should work fine alone, as long as you enter the password-unlock key at +the initial hidden prompt. + +The core is essentially responsible for starting the various bot components +(irc, scheduler, etc) and making sure they are all happy. An explanation of the +different components follows: + +EarwigBot has three components that can run independently of each other: an IRC +front-end, an IRC watcher, and a wiki scheduler. +* The IRC front-end runs on a normal IRC server and expects users to interact + with it/give it commands. +* The IRC watcher runs on a wiki recent-changes server and listens for edits. + Users cannot interact with this part of the bot. +* The wiki scheduler runs wiki-editing bot tasks in separate threads at + user-defined times through a cron-like interface. + +There is a "priority" system here: +1. If the IRC frontend is enabled, it will run on the main thread, and the IRC + watcher and wiki scheduler (if enabled) will run on separate threads. +2. If the wiki scheduler is enabled, it will run on the main thread, and the + IRC watcher (if enabled) will run on a separate thread. +3. If the IRC watcher is enabled, it will run on the main (and only) thread. +Else, the bot will stop, as no components are enabled. +""" import threading import time @@ -25,30 +36,31 @@ import traceback import sys import os -parent_dir = os.path.split(sys.path[0])[0] -sys.path.append(parent_dir) # make sure we look in the parent directory for modules +script_dir = os.path.dirname(os.path.abspath(__file__)) +root_dir = os.path.split(script_dir)[0] # the bot's "root" directory relative + # to its different components +sys.path.append(root_dir) # make sure we look in the root dir for modules -from config.main import * +from core import config from irc import frontend, watcher from wiki import task_manager f_conn = None w_conn = None -def irc_watcher(f_conn): +def irc_watcher(f_conn=None): """Function to handle the IRC watcher as another thread (if frontend and/or scheduler is enabled), otherwise run as the main thread.""" global w_conn - print "\nStarting IRC watcher..." - while 1: # restart the watcher component if (just) it breaks + while 1: # restart the watcher component if it breaks (and nothing else) w_conn = watcher.get_connection() w_conn.connect() - print # print a blank line here to signify that the bot has finished starting up + print # blank line to signify that the bot has finished starting up try: watcher.main(w_conn, f_conn) except: traceback.print_exc() - time.sleep(5) # sleep a bit before restarting watcher + time.sleep(5) # sleep a bit before restarting watcher print "\nWatcher has stopped; restarting component..." def wiki_scheduler(): @@ -57,12 +69,12 @@ def wiki_scheduler(): while 1: time_start = time.time() now = time.gmtime(time_start) - + task_manager.start_tasks(now) - + time_end = time.time() time_diff = time_start - time_end - if time_diff < 60: # sleep until the next minute + if time_diff < 60: # sleep until the next minute time.sleep(60 - time_diff) def irc_frontend(): @@ -70,20 +82,21 @@ def irc_frontend(): enable the wiki scheduler and IRC watcher on new threads if they are enabled.""" global f_conn - - print "\nStarting IRC frontend..." + + print "Starting IRC frontend..." f_conn = frontend.get_connection() frontend.startup(f_conn) - - if enable_wiki_schedule: + + if "wiki_schedule" in config.components: print "\nStarting wiki scheduler..." task_manager.load_tasks() t_scheduler = threading.Thread(target=wiki_scheduler) t_scheduler.name = "wiki-scheduler" t_scheduler.daemon = True t_scheduler.start() - - if enable_irc_watcher: + + if "irc_watcher" in config.components: + print "\nStarting IRC watcher..." t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,)) t_watcher.name = "irc-watcher" t_watcher.daemon = True @@ -91,32 +104,45 @@ def irc_frontend(): frontend.main() - if enable_irc_watcher: + if "irc_watcher" in config.components: w_conn.close() f_conn.close() - + def run(): - if enable_irc_frontend: # make the frontend run on our primary thread if enabled, and enable additional components through that function - irc_frontend() - - elif enable_wiki_schedule: # the scheduler is enabled - run it on the main thread, but also run the IRC watcher on another thread if it is enabled - print "\nStarting wiki scheduler..." - task_manager.load_tasks() - if enable_irc_watcher: - t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,)) + try: + key = raw_input() # wait for our password unlock key from the bot's + except EOFError: # wrapper + key = None + config.parse_config(key) # load data from the config file and parse it + # using the unlock key + enabled = config.components + + if "irc_frontend" in enabled: # make the frontend run on our primary + irc_frontend() # thread if enabled, and enable additional + # components through that function + + elif "wiki_schedule" in enabled: # run the scheduler on the main + print "Starting wiki scheduler..." # thread, but also run the IRC + task_manager.load_tasks() # watcher on another thread iff it + if "irc_watcher" in enabled: # is enabled + print "\nStarting IRC watcher..." + t_watcher = threading.Thread(target=irc_watcher, args=()) t_watcher.name = "irc-watcher" t_watcher.daemon = True t_watcher.start() wiki_scheduler() - - elif enable_irc_watcher: # the IRC watcher is our only enabled component, so run its function only and don't worry about anything else - irc_watcher() - - else: # nothing is enabled! - exit("\nNo bot parts are enabled; stopping...") + + elif "irc_watcher" in enabled: # the IRC watcher is our only enabled + print "Starting IRC watcher..." # component, so run its function only + irc_watcher() # and don't worry about anything else + + else: # nothing is enabled! + print "No bot parts are enabled; stopping..." + exit(1) if __name__ == "__main__": try: run() except KeyboardInterrupt: - exit("\nKeyboardInterrupt: stopping main bot loop.") + print "\nKeyboardInterrupt: stopping main bot loop." + exit(1) diff --git a/earwigbot.py b/earwigbot.py old mode 100644 new mode 100755 index f7852ef..3542076 --- a/earwigbot.py +++ b/earwigbot.py @@ -1,22 +1,57 @@ +#! /usr/bin/python # -*- coding: utf-8 -*- -import time -from subprocess import * +""" +EarwigBot -try: - from config import irc, main, schedule, secure, watcher -except ImportError: - print """Missing a config file! Make sure you have configured the bot. All *.py.default files in config/ -should have their .default extension removed, and the info inside should be corrected.""" - exit() +A thin wrapper for EarwigBot's main bot code, specified by bot_script. This +wrapper will automatically restart the bot when it shuts down (from !restart, +for example). It requests the bot's password at startup and reuses it every +time the bot restarts internally, so you do not need to re-enter the password +after using !restart. + +For information about the bot as a whole, see the attached README.md file (in +markdown format!) and the LICENSE for licensing information. +""" + +from getpass import getpass +from subprocess import Popen, PIPE +from os import path +from sys import executable +from time import sleep + +from core.config import verify_config + +__author__ = "Ben Kurtovic" +__copyright__ = "Copyright (c) 2009-2011 by Ben Kurtovic" +__license__ = "MIT License" +__version__ = "0.1dev" +__email__ = "ben.kurtovic@verizon.net" + +bot_script = path.join(path.dirname(path.abspath(__file__)), "core", "main.py") def main(): + print "EarwigBot v{0}\n".format(__version__) + + is_encrypted = verify_config() + if is_encrypted: # passwords in the config file are encrypted + key = getpass("Enter key to unencrypt bot passwords: ") + else: + key = None + while 1: - call(['python', 'core/main.py']) - time.sleep(5) # sleep for five seconds between bot runs + bot = Popen([executable, bot_script], stdin=PIPE) + bot.communicate(key) # give the key to core.config.load_config() + return_code = bot.wait() + if return_code == 1: + exit() # let critical exceptions in the subprocess cause us to + # exit as well + else: + sleep(5) # sleep between bot runs following a non-critical + # subprocess exit if __name__ == "__main__": try: main() except KeyboardInterrupt: - exit("\nKeyboardInterrupt: stopping bot wrapper.") + print "\nKeyboardInterrupt: stopping bot wrapper." diff --git a/irc/classes/__init__.py b/irc/classes/__init__.py new file mode 100644 index 0000000..b92db69 --- /dev/null +++ b/irc/classes/__init__.py @@ -0,0 +1,4 @@ +from base_command import * +from connection import * +from data import * +from rc import * diff --git a/irc/base_command.py b/irc/classes/base_command.py similarity index 100% rename from irc/base_command.py rename to irc/classes/base_command.py diff --git a/irc/connection.py b/irc/classes/connection.py similarity index 100% rename from irc/connection.py rename to irc/classes/connection.py diff --git a/irc/data.py b/irc/classes/data.py similarity index 100% rename from irc/data.py rename to irc/classes/data.py diff --git a/irc/rc.py b/irc/classes/rc.py similarity index 100% rename from irc/rc.py rename to irc/classes/rc.py diff --git a/irc/commands/afc_status.py b/irc/commands/afc_status.py index 4b3553a..6a7f720 100644 --- a/irc/commands/afc_status.py +++ b/irc/commands/afc_status.py @@ -6,8 +6,7 @@ import json import re import urllib -from config.watcher import * -from irc.base_command import BaseCommand +from irc.classes import BaseCommand class AFCStatus(BaseCommand): def get_hooks(self): @@ -22,7 +21,7 @@ class AFCStatus(BaseCommand): data.command == "number" or data.command == "afc_status"): return True try: - if data.line[1] == "JOIN" and data.chan in AFC_CHANS: + if data.line[1] == "JOIN" and data.chan == "#wikipedia-en-afc": return True except IndexError: pass diff --git a/irc/commands/calc.py b/irc/commands/calc.py index ceaca78..fbd26a1 100644 --- a/irc/commands/calc.py +++ b/irc/commands/calc.py @@ -5,7 +5,7 @@ import re import urllib -from irc.base_command import BaseCommand +from irc.classes import BaseCommand class Calc(BaseCommand): def get_hooks(self): diff --git a/irc/commands/chanops.py b/irc/commands/chanops.py index e7b4683..210a830 100644 --- a/irc/commands/chanops.py +++ b/irc/commands/chanops.py @@ -2,8 +2,8 @@ # Voice/devoice/op/deop users in the channel. -from irc.base_command import BaseCommand -from config.irc import * +from irc.classes import BaseCommand +from core import config class ChanOps(BaseCommand): def get_hooks(self): @@ -19,7 +19,7 @@ class ChanOps(BaseCommand): return False def process(self, data): - if data.host not in ADMINS: + if data.host not in config.irc["permissions"]["admins"]: self.connection.reply(data, "you must be a bot admin to use this command.") return diff --git a/irc/commands/git.py b/irc/commands/git.py index 9ca793d..60c0ecc 100644 --- a/irc/commands/git.py +++ b/irc/commands/git.py @@ -2,10 +2,12 @@ # Commands to interface with the bot's git repository; use '!git help' for sub-command list. -import shlex, subprocess, re +import shlex +import subprocess +import re -from config.irc import * -from irc.base_command import BaseCommand +from irc.classes import BaseCommand +from core import config class Git(BaseCommand): def get_hooks(self): @@ -21,7 +23,7 @@ class Git(BaseCommand): def process(self, data): self.data = data - if data.host not in OWNERS: + if data.host not in config.irc["permissions"]["owners"]: self.connection.reply(data, "you must be a bot owner to use this command.") return diff --git a/irc/commands/help.py b/irc/commands/help.py index 1970337..54eefa2 100644 --- a/irc/commands/help.py +++ b/irc/commands/help.py @@ -2,8 +2,7 @@ # Generates help information. -from irc.base_command import BaseCommand -from irc.data import Data +from irc.classes import BaseCommand, Data from irc import command_handler class Help(BaseCommand): diff --git a/irc/commands/link.py b/irc/commands/link.py index 59f2d6e..4587451 100644 --- a/irc/commands/link.py +++ b/irc/commands/link.py @@ -4,7 +4,7 @@ import re -from irc.base_command import BaseCommand +from irc.classes import BaseCommand class Link(BaseCommand): def get_hooks(self): diff --git a/irc/commands/tasks.py b/irc/commands/tasks.py index d1903bc..7776db4 100644 --- a/irc/commands/tasks.py +++ b/irc/commands/tasks.py @@ -2,13 +2,12 @@ # Manage wiki tasks from IRC, and check on thread status. -import threading, re +import threading +import re -from irc.base_command import BaseCommand -from irc.data import * +from irc.classes import BaseCommand, Data, KwargParseException from wiki import task_manager -from config.main import * -from config.irc import * +from core import config class Tasks(BaseCommand): def get_hooks(self): @@ -24,7 +23,7 @@ class Tasks(BaseCommand): def process(self, data): self.data = data - if data.host not in OWNERS: + if data.host not in config.irc["permissions"]["owners"]: self.connection.reply(data, "at this time, you must be a bot owner to use this command.") return @@ -116,9 +115,9 @@ class Tasks(BaseCommand): def get_main_thread_name(self): """Return the "proper" name of the MainThread; e.g. "irc-frontend" or "irc-watcher".""" - if enable_irc_frontend: + if "irc_frontend" in config.components: return "irc-frontend" - elif enable_wiki_schedule: + elif "wiki_schedule" in config.components: return "wiki-scheduler" else: return "irc-watcher" diff --git a/irc/commands/test.py b/irc/commands/test.py index 69ecd2f..630a37f 100644 --- a/irc/commands/test.py +++ b/irc/commands/test.py @@ -4,7 +4,7 @@ import random -from irc.base_command import BaseCommand +from irc.classes import BaseCommand class Test(BaseCommand): def get_hooks(self): diff --git a/irc/frontend.py b/irc/frontend.py index 293bc0a..2a8537d 100644 --- a/irc/frontend.py +++ b/irc/frontend.py @@ -1,28 +1,42 @@ # -*- coding: utf-8 -*- -## Imports -import re, time +""" +EarwigBot's IRC Front-end Component -from config.irc import * -from config.secure import * +The IRC frontend runs on a normal IRC server and expects users to interact with +it and give it commands. Commands are stored as "command classes", subclasses +of BaseCommand in irc/base_command.py. All command classes are automatically +imported by irc/command_handler.py if they are in irc/commands. +""" +from re import findall + +from core import config from irc import command_handler -from irc.connection import * -from irc.data import Data +from irc.classes import Connection, Data, BrokenSocketException connection = None def get_connection(): - connection = Connection(HOST, PORT, NICK, IDENT, REALNAME) + """Return a new Connection() instance with information about our server + connection, but don't actually connect yet.""" + cf = config.irc["frontend"] + connection = Connection(cf["host"], cf["port"], cf["nick"], cf["ident"], + cf["realname"]) return connection def startup(conn): + """Accept a single arg, a Connection() object, and set our global variable + 'connection' to it. Load all command classes in irc/commands with + command_handler, and then establish a connection with the IRC server.""" global connection connection = conn command_handler.load_commands(connection) connection.connect() def main(): + """Main loop for the Frontend IRC Bot component. get_connection() and + startup() should have already been called.""" read_buffer = str() while 1: @@ -35,41 +49,56 @@ def main(): lines = read_buffer.split("\n") read_buffer = lines.pop() - for line in lines: + for line in lines: # handle a single message from IRC line = line.strip().split() - data = Data() + data = Data() # new Data() instance to store info about this line data.line = line if line[1] == "JOIN": - data.nick, data.ident, data.host = re.findall(":(.*?)!(.*?)@(.*?)\Z", line[0])[0] + data.nick, data.ident, data.host = findall( + ":(.*?)!(.*?)@(.*?)\Z", line[0])[0] data.chan = line[2][1:] - - command_handler.check("join", data) # check if there's anything we can respond to, and if so, respond + command_handler.check("join", data) # check for 'join' hooks in + # our commands if line[1] == "PRIVMSG": - data.nick, data.ident, data.host = re.findall(":(.*?)!(.*?)@(.*?)\Z", line[0])[0] + data.nick, data.ident, data.host = findall( + ":(.*?)!(.*?)@(.*?)\Z", line[0])[0] data.msg = ' '.join(line[3:])[1:] data.chan = line[2] - if data.chan == NICK: # this is a privmsg to us, so set 'chan' as the nick of the sender + if data.chan == config.irc["frontend"]["nick"]: + # this is a privmsg to us, so set 'chan' as the nick of the + # sender, then check for private-only command hooks data.chan = data.nick - command_handler.check("msg_private", data) # only respond if it's a private message + command_handler.check("msg_private", data) else: - command_handler.check("msg_public", data) # only respond if it's a public (channel) message + # check for public-only command hooks + command_handler.check("msg_public", data) - command_handler.check("msg", data) # check for general messages + # check for command hooks that apply to all messages + command_handler.check("msg", data) - if data.msg.startswith("!restart"): # hardcode the !restart command (we can't restart from within an ordinary command) - if data.host in OWNERS: + # hardcode the !restart command (we can't restart from within + # an ordinary command) + if data.msg in ["!restart", ".restart"]: + if data.host in config.irc["permissions"]["owners"]: print "Restarting bot per owner request..." return - if line[0] == "PING": # If we are pinged, pong back to the server + if line[0] == "PING": # if we are pinged, pong back to the server connection.send("PONG %s" % line[1]) - if line[1] == "376": - if NS_AUTH: # if we're supposed to auth to nickserv, do that - connection.say("NickServ", "IDENTIFY %s %s" % (NS_USER, NS_PASS)) - time.sleep(3) # sleep for a bit so we don't join channels un-authed - for chan in CHANS: # join all of our startup channels + if line[1] == "376": # we've successfully connected to the network + try: # if we're supposed to auth to nickserv, do that + ns_username = config.irc["frontend"]["nickservUsername"] + ns_password = config.irc["frontend"]["nickservPassword"] + except KeyError: + pass + else: + connection.say("NickServ", "IDENTIFY {0} {1}".format( + ns_username, ns_password)) + + # join all of our startup channels + for chan in config.irc["frontend"]["channels"]: connection.join(chan) diff --git a/irc/watcher.py b/irc/watcher.py index 2aff8dd..b031b20 100644 --- a/irc/watcher.py +++ b/irc/watcher.py @@ -1,20 +1,34 @@ # -*- coding: utf-8 -*- -## Imports -from config.irc import * -from config.main import * -from config.watcher import * +""" +EarwigBot's IRC Watcher Component -from irc.connection import * -from irc.rc import RC +The IRC watcher runs on a wiki recent-changes server and listens for edits. +Users cannot interact with this part of the bot. When an event occurs, run it +through irc/watcher_logic.py's process() function, which can result in either +wiki bot tasks being started (listed in wiki/tasks/) or messages being sent to +channels in the IRC frontend. +""" -global frontend_conn +from core import config +from irc.classes import Connection, RC, BrokenSocketException +from irc import watcher_logic + +frontend_conn = None def get_connection(): - connection = Connection(WATCHER_HOST, WATCHER_PORT, NICK, IDENT, REALNAME) + """Return a new Connection() instance with information about our server + connection, but don't actually connect yet.""" + cf = config.irc["watcher"] + connection = Connection(cf["host"], cf["port"], cf["nick"], cf["ident"], + cf["realname"]) return connection -def main(connection, f_conn): +def main(connection, f_conn=None): + """Main loop for the Watcher IRC Bot component. get_connection() should + have already been called and the connection should have been started with + connection.connect(). Accept the frontend connection as well as an optional + parameter in order to send messages directly to frontend IRC channels.""" global frontend_conn frontend_conn = f_conn read_buffer = str() @@ -33,26 +47,32 @@ def main(connection, f_conn): if line[1] == "PRIVMSG": chan = line[2] - if chan != WATCHER_CHAN: # if we're getting a msg from another channel, ignore it + + # ignore messages originating from channels not in our list, to + # prevent someone PMing us false data + if chan not in config.irc["watcher"]["channels"]: continue msg = ' '.join(line[3:])[1:] - rc = RC(msg) # create a new RC object to store this change's data - rc.parse() - check(rc) + rc = RC(msg) # new RC object to store this event's data + rc.parse() # parse a message into pagenames, usernames, etc. + process(rc) # report to frontend channels or start tasks - if line[0] == "PING": # If we are pinged, pong back to the server + if line[0] == "PING": # if we are pinged, pong back to the server connection.send("PONG %s" % line[1]) - if line[1] == "376": # Join the recent changes channel when we've finished starting up - connection.join(WATCHER_CHAN) - -def check(rc): - """check if we're supposed to report this message anywhere""" - results = process(rc) # process the message in config/watcher.py, and get a list of channels to send it to - if not results: - return - pretty = rc.get_pretty() - if enable_irc_frontend: - for chan in results: + # when we've finished starting up, join all watcher channels + if line[1] == "376": + for chan in config.irc["watcher"]["channels"]: + connection.join(chan) + +def process(rc): + """Process a message from IRC (technically, an RC object). The actual + processing is configurable, so we don't have that hard-coded here. We + simply call irc/watcher_logic.py's process() function and expect a list of + channels back, which we report the event data to.""" + chans = watcher_logic.process(rc) + if chans and frontend_conn: + pretty = rc.get_pretty() + for chan in chans: frontend_conn.say(chan, pretty) diff --git a/irc/watcher_logic.py b/irc/watcher_logic.py new file mode 100644 index 0000000..bcce171 --- /dev/null +++ b/irc/watcher_logic.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- + +""" +EarwigBot's IRC Watcher Logic + +This file contains (configurable!) rules that EarwigBot's watcher uses after it +recieves an event from IRC. + +This should, ideally, be in config.xml somehow, but Python code makes more +sense for this sort of thing... so... +""" + +import re + +from wiki import task_manager as tasks + +afc_prefix = "wikipedia( talk)?:(wikiproject )?articles for creation" + +# compile some regexps used when finding specific events +r_page = re.compile(afc_prefix) +r_ffu = re.compile("wikipedia( talk)?:files for upload") +r_move1 = re.compile("moved \[\[{}".format(afc_prefix)) +r_move2 = re.compile("moved \[\[(.*?)\]\] to \[\[{}".format(afc_prefix)) +r_moved_pages = re.compile("^moved \[\[(.*?)\]\] to \[\[(.*?)\]\]") +r_delete = re.compile("deleted \"\[\[{}".format(afc_prefix)) +r_deleted_page = re.compile("^deleted \"\[\[(.*?)\]\]") +r_restore = re.compile("restored \"\[\[{}".format(afc_prefix)) +r_restored_page = re.compile("^restored \"\[\[(.*?)\]\]") +r_protect = re.compile("protected \"\[\[{}".format(afc_prefix)) + +def process(rc): + """Given an RC() object, return a list of channels to report this event to. + Also, start any wiki bot tasks within this function if necessary.""" + chans = set() # channels to report this message to + page_name = rc.page.lower() + comment = rc.comment.lower() + + if "!earwigbot" in rc.msg.lower(): + chans.update(("##earwigbot", "#wikipedia-en-afc")) + + if r_page.search(page_name): + tasks.start_task("afc_statistics", action="process_edit", page=rc.page) + tasks.start_task("afc_copyvios", action="process_edit", page=rc.page) + chans.add("#wikipedia-en-afc") + + elif r_ffu.match(page_name): + chans.add("#wikipedia-en-afc") + + elif page_name.startswith("template:afc submission"): + chans.add("#wikipedia-en-afc") + + elif rc.flags == "move" and (r_move1.match(comment) or + r_move2.match(comment)): + p = r_moved_pages.findall(rc.comment)[0] + tasks.start_task("afc_statistics", action="process_move", pages=p) + tasks.start_task("afc_copyvios", action="process_move", pages=p) + chans.add("#wikipedia-en-afc") + + elif rc.flags == "delete" and r_delete.match(comment): + p = r_deleted_page.findall(rc.comment)[0][0] + tasks.start_task("afc_statistics", action="process_delete", page=p) + tasks.start_task("afc_copyvios", action="process_delete", page=p) + chans.add("#wikipedia-en-afc") + + elif rc.flags == "restore" and r_restore.match(comment): + p = r_restored_page.findall(rc.comment)[0][0] + tasks.start_task("afc_statistics", action="process_restore", page=p) + tasks.start_task("afc_copyvios", action="process_restore", page=p) + chans.add("#wikipedia-en-afc") + + elif rc.flags == "protect" and r_protect.match(comment): + chans.add("#wikipedia-en-afc") + + return chans diff --git a/wiki/task_manager.py b/wiki/task_manager.py index b17cbbb..de40db1 100644 --- a/wiki/task_manager.py +++ b/wiki/task_manager.py @@ -1,25 +1,32 @@ # -*- coding: utf-8 -*- -# A module to manage bot tasks. +""" +EarwigBot's Wiki Bot Task Manager + +This module provides some functions to run and load bot tasks from wiki/tasks/. +""" import time import traceback import threading import os -from config import schedule +from core import config -task_list = dict() # the key is the task's name, the value is the task's class instance +# store loaded tasks as a dict where the key is the task name and the value is +# an instance of the task class (wiki.tasks.task_file.Task()) +task_list = dict() def load_tasks(): - """Load all valid task classes from wiki/tasks/, and add them to the task_list.""" - files = os.listdir(os.path.join("wiki", "tasks")) # get all files in wiki/tasks/ - files.sort() # alphabetically sort list of files + """Load all valid task classes from wiki/tasks/, and add them to the + task_list variable.""" + files = os.listdir(os.path.join("wiki", "tasks")) + files.sort() # alphabetically sort all files in wiki/tasks/ for f in files: - if not os.path.isfile(os.path.join("wiki", "tasks", f)): # ignore non-files - continue - if f.startswith("_") or not f.endswith(".py"): # ignore non-python files or files beginning with "_" - continue + if not os.path.isfile(os.path.join("wiki", "tasks", f)): + continue # ignore non-files + if f.startswith("_") or not f.endswith(".py"): + continue # ignore non-python files or files beginning with an _ load_class_from_file(f) print "Found %s tasks: %s." % (len(task_list), ', '.join(task_list.keys())) @@ -27,10 +34,10 @@ def load_class_from_file(f): """Look in a given file for the task class.""" global task_list - module = f[:-3] # strip .py from end + module = f[:-3] # strip .py from end try: exec "from wiki.tasks import %s as m" % module - except: # importing the file failed for some reason... + except: # importing the file failed for some reason... print "Couldn't load task file %s:" % f traceback.print_exc() return @@ -46,26 +53,34 @@ def load_class_from_file(f): def start_tasks(now=time.gmtime()): """Start all tasks that are supposed to be run at a given time.""" - tasks = schedule.check(now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, now.tm_wday) # get list of tasks to run this turn + tasks = config.schedule(now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, + now.tm_wday) # get list of tasks to run this turn + for task in tasks: - if isinstance(task, tuple): # they've specified kwargs, so pass those to start_task - start_task(task[0], **task[1]) - else: # otherwise, just pass task_name + if isinstance(task, tuple): # they've specified kwargs + start_task(task[0], **task[1]) # so pass those to start_task + else: # otherwise, just pass task_name start_task(task) def start_task(task_name, **kwargs): - """Start a given task in a new thread. Pass args to the task's run function.""" - print "Starting task '{}' in a new thread...".format(task_name) - + """Start a given task in a new thread. Pass args to the task's run() + function.""" + print "Starting task '{0}' in a new thread...".format(task_name) + try: - task = task_list[task_name] # get the class for this task, a subclass of BaseTask + task = task_list[task_name] except KeyError: - print "Couldn't find task '{}': wiki/tasks/{}.py does not exist.".format(task_name, task_name) + print ("Couldn't find task '{0}': wiki/tasks/{1}.py does not " + + "exist.").format(task_name, task_name) return - - task_thread = threading.Thread(target=lambda: task_wrapper(task, **kwargs)) # Normally we'd do task_wrapper(task, **kwargs), but because of threading we'd have to do Thread(target=task_wrapper, args=(task, **kwargs)), which doesn't work because the **kwargs is inside a tuple, not inside function params. Use lambda to get around the args=tuple nonsense - task_thread.name = "{} ({})".format(task_name, time.strftime("%b %d %H:%M:%S")) - task_thread.daemon = True # stop bot task threads automagically if the main bot stops + + task_thread = threading.Thread(target=lambda: task_wrapper(task, **kwargs)) + task_thread.name = "{0} ({1})".format(task_name, time.strftime( + "%b %d %H:%M:%S")) + + # stop bot task threads automagically if the main bot stops + task_thread.daemon = True + task_thread.start() def task_wrapper(task, **kwargs): @@ -73,7 +88,8 @@ def task_wrapper(task, **kwargs): try: task.run(**kwargs) except: - print "Task '{}' raised an exception and had to stop:".format(task.task_name) + print "Task '{0}' raised an exception and had to stop:".format( + task.task_name) traceback.print_exc() else: - print "Task '{}' finished without error.".format(task.task_name) + print "Task '{0}' finished without error.".format(task.task_name)