Quellcode durchsuchen

Merge branch 'feature/config_rewrite' into develop

tags/v0.1^2
Ben Kurtovic vor 13 Jahren
Ursprung
Commit
7dc33cb9af
27 geänderte Dateien mit 520 neuen und 320 gelöschten Zeilen
  1. +2
    -2
      .gitignore
  2. +0
    -0
     
  3. +0
    -25
      config/irc.py
  4. +0
    -24
      config/main.py
  5. +0
    -28
      config/schedule.py
  6. +0
    -9
      config/secure.default.py
  7. +0
    -69
      config/watcher.py
  8. +152
    -0
      core/config.py
  9. +78
    -52
      core/main.py
  10. +46
    -11
      earwigbot.py
  11. +4
    -0
      irc/classes/__init__.py
  12. +0
    -0
      irc/classes/base_command.py
  13. +0
    -0
      irc/classes/connection.py
  14. +0
    -0
      irc/classes/data.py
  15. +0
    -0
      irc/classes/rc.py
  16. +2
    -3
      irc/commands/afc_status.py
  17. +1
    -1
      irc/commands/calc.py
  18. +3
    -3
      irc/commands/chanops.py
  19. +6
    -4
      irc/commands/git.py
  20. +1
    -2
      irc/commands/help.py
  21. +1
    -1
      irc/commands/link.py
  22. +7
    -8
      irc/commands/tasks.py
  23. +1
    -1
      irc/commands/test.py
  24. +54
    -25
      irc/frontend.py
  25. +45
    -25
      irc/watcher.py
  26. +74
    -0
      irc/watcher_logic.py
  27. +43
    -27
      wiki/task_manager.py

+ 2
- 2
.gitignore Datei anzeigen

@@ -1,8 +1,8 @@
# Ignore python bytecode:
*.pyc

# Ignore secure config files:
config/secure.py
# Ignore bot-specific config file:
config.json

# Ignore pydev's nonsense:
.project


+ 0
- 0
Datei anzeigen


+ 0
- 25
config/irc.py Datei anzeigen

@@ -1,25 +0,0 @@
# -*- coding: utf-8 -*-

# EarwigBot Configuration File
# This file contains information that the bot uses to connect to IRC.

# our main (front-end) server's hostname and port
HOST = "irc.freenode.net"
PORT = 6667

# our watcher server's hostname, port, and RC channel
WATCHER_HOST = "irc.wikimedia.org"
WATCHER_PORT = 6667
WATCHER_CHAN = "#en.wikipedia"

# our nick, ident, and real name, used on both servers
NICK = "EarwigBot"
IDENT = "earwigbot"
REALNAME = "[[w:en:User:EarwigBot]]"

# channels to join on main server's startup
CHANS = ["##earwigbot", "##earwig", "#wikipedia-en-afc"]

# hardcoded hostnames of users with certain permissions
OWNERS = ["wikipedia/The-Earwig"] # can use owner-only commands (!restart and !git)
ADMINS = ["wikipedia/The-Earwig", "wikipedia/LeonardBloom"] # can use high-risk commands, e.g. !op

+ 0
- 24
config/main.py Datei anzeigen

@@ -1,24 +0,0 @@
# -*- coding: utf-8 -*-

# EarwigBot Configuration File
# This file tells the bot which of its components should be enabled.

# The IRC frontend (configured in config/irc.py) sits on a public IRC network,
# responds to commands given to it, and reports edits (if the IRC watcher
# component is enabled).
enable_irc_frontend = True

# The IRC watcher (connection details configured in config/irc.py as well) sits
# on an IRC network that gives a recent changes feed, usually irc.wikimedia.net.
# It looks for edits matching certain (often regex) patterns (rules configured
# in config/watcher.py), and either reports them to the IRC frontend (if
# enabled), or activates a task on the WikiBot (if configured to do).
enable_irc_watcher = True

# EarwigBot doesn't have to edit a wiki, although this is its main purpose. If
# the wiki schedule is disabled, it will not be able to handle scheduled tasks
# that involve editing (such as creating a daily category every day at midnight
# UTC), but it can still edit through rules given in the watcher, and bot tasks
# can still be activated by the command line. The schedule is configured in
# config/schedule.py.
enable_wiki_schedule = True

+ 0
- 28
config/schedule.py Datei anzeigen

@@ -1,28 +0,0 @@
# -*- coding: utf-8 -*-

# EarwigBot Configuration File
# This file tells the bot when to run certain wiki-editing tasks.

def check(minute, hour, month_day, month, week_day):
tasks = [] # tasks to run this turn, each as a tuple of (task_name, kwargs) or just task_name

if minute == 0: # run every hour on the hour
tasks.append(("afc_statistics", {"action": "save"})) # save statistics to [[Template:AFC_statistics]]

if hour == 0: # run every day at midnight
tasks.append("afc_dailycats") # create daily categories for WP:AFC
tasks.append("feed_dailycats") # create daily categories for WP:FEED

if week_day == 0: # run every Sunday at midnight (that is, the start of Sunday, not the end)
tasks.append("afc_undated") # clear [[Category:Undated AfC submissions]]

if week_day == 1: # run every Monday at midnight
tasks.append("afc_catdelink") # delink mainspace categories in declined AfC submissions

if week_day == 2: # run every Tuesday at midnight
tasks.append("wrongmime") # tag files whose extensions do not agree with their MIME type

if week_day == 3: # run every Wednesday at midnight
tasks.append("blptag") # add |blp=yes to {{WPB}} or {{WPBS}} when it is used along with {{WP Biography}}

return tasks

+ 0
- 9
config/secure.default.py Datei anzeigen

@@ -1,9 +0,0 @@
# -*- coding: utf-8 -*-

# EarwigBot Configuration File
# This file contains information that should be kept hidden, including passwords.

# IRC: identify ourselves to NickServ?
NS_AUTH = False
NS_USER = ""
NS_PASS = ""

+ 0
- 69
config/watcher.py Datei anzeigen

@@ -1,69 +0,0 @@
# -*- coding: utf-8 -*-

# EarwigBot Configuration File
# This file contains rules for the bot's watcher component.

import re

from wiki import task_manager

# Define different report channels on our front-end server. They /must/ be in CHANS in config/irc.py or the bot will not be able to send messages to them (unless they have -n set).
AFC_CHANS = ["#wikipedia-en-afc"] # report recent AfC changes/give AfC status messages upon join
BOT_CHANS = ["##earwigbot", "#wikipedia-en-afc"] # report edits containing "!earwigbot"

# Define some commonly used strings.
afc_prefix = "wikipedia( talk)?:(wikiproject )?articles for creation"

# Define our compiled regexps used when finding certain edits.
r_page = re.compile(afc_prefix)
r_ffu = re.compile("wikipedia( talk)?:files for upload")
r_move1 = re.compile("moved \[\[{}".format(afc_prefix)) # an AFC page was either moved locally or out
r_move2 = re.compile("moved \[\[(.*?)\]\] to \[\[{}".format(afc_prefix)) # an outside page was moved into AFC
r_moved_pages = re.compile("^moved \[\[(.*?)\]\] to \[\[(.*?)\]\]")
r_delete = re.compile("deleted \"\[\[{}".format(afc_prefix))
r_deleted_page = re.compile("^deleted \"\[\[(.*?)\]\]")
r_restore = re.compile("restored \"\[\[{}".format(afc_prefix))
r_restored_page = re.compile("^restored \"\[\[(.*?)\]\]")
r_protect = re.compile("protected \"\[\[{}".format(afc_prefix))

def process(rc):
chans = set() # channels to report this message to
page_name = rc.page.lower()
comment = rc.comment.lower()
if "!earwigbot" in rc.msg.lower():
chans.update(BOT_CHANS)
if r_page.search(page_name):
task_manager.start_task("afc_statistics", action="process_edit", page=rc.page)
task_manager.start_task("afc_copyvios", action="process_edit", page=rc.page)
chans.update(AFC_CHANS)
elif r_ffu.match(page_name):
chans.update(AFC_CHANS)
elif page_name.startswith("template:afc submission"):
chans.update(AFC_CHANS)
elif rc.flags == "move" and (r_move1.match(comment) or r_move2.match(comment)):
p = r_moved_pages.findall(rc.comment)[0]
task_manager.start_task("afc_statistics", action="process_move", pages=p)
task_manager.start_task("afc_copyvios", action="process_move", pages=p)
chans.update(AFC_CHANS)
elif rc.flags == "delete" and r_delete.match(comment):
p = r_deleted_page.findall(rc.comment)[0][0]
task_manager.start_task("afc_statistics", action="process_delete", page=p)
task_manager.start_task("afc_copyvios", action="process_delete", page=p)
chans.update(AFC_CHANS)
elif rc.flags == "restore" and r_restore.match(comment):
p = r_restored_page.findall(rc.comment)[0][0]
task_manager.start_task("afc_statistics", action="process_restore", page=p)
task_manager.start_task("afc_copyvios", action="process_restore", page=p)
chans.update(AFC_CHANS)
elif rc.flags == "protect" and r_protect.match(comment):
chans.update(AFC_CHANS)

return chans

+ 152
- 0
core/config.py Datei anzeigen

@@ -0,0 +1,152 @@
# -*- coding: utf-8 -*-

"""
EarwigBot's JSON Config File Parser

This handles all tasks involving reading and writing to our config file,
including encrypting and decrypting passwords and making a new config file from
scratch at the inital bot run.

Usually you'll just want to do "from core import config" and access config data
from within config's three global variables and one function:

* config.components - a list of enabled components
* config.wiki - a dict of config information for wiki-editing
* config.irc - a dict of config information for IRC
* config.schedule() - returns a list of tasks scheduled to run now
"""

import json
from os import makedirs, path

from lib import blowfish

script_dir = path.dirname(path.abspath(__file__))
root_dir = path.split(script_dir)[0]
config_path = path.join(root_dir, "config.json")

_config = None # holds data loaded from our config file

# set our three easy-config-access global variables to None
components, wiki, irc = (None, None, None)

def load_config():
"""Load data from our JSON config file (config.json) into _config."""
global _config
with open(config_path, 'r') as fp:
try:
_config = json.load(fp)
except ValueError as error:
print "Error parsing config file {0}:".format(config_path)
print error
exit(1)

def verify_config():
"""Check to see if we have a valid config file, and if not, notify the
user. If there is no config file at all, offer to make one; otherwise,
exit."""
if path.exists(config_path):
load_config()
try:
return _config["encryptPasswords"] # are passwords encrypted?
except KeyError:
return False # assume passwords are not encrypted by default
else:
print "You haven't configured the bot yet!"
choice = raw_input("Would you like to do this now? [y/n] ")
if choice.lower().startswith("y"):
return make_new_config()
else:
exit()

def parse_config(key):
"""Store data from our config file in three global variables for easy
access, and use the key to unencrypt passwords. Catch password decryption
errors and report them to the user."""
global components, wiki, irc

load_config() # we might be re-loading unnecessarily here, but no harm in
# that!
try:
components = _config["components"]
except KeyError:
components = []
try:
wiki = _config["wiki"]
except KeyError:
wiki = {}
try:
irc = _config["irc"]
except KeyError:
irc = {}

try:
try:
if _config["encryptPasswords"]:
decrypt(key, "wiki['password']")
decrypt(key, "irc['frontend']['nickservPassword']")
decrypt(key, "irc['watcher']['nickservPassword']")
except KeyError:
pass
except blowfish.BlowfishError as error:
print "\nError decrypting passwords:"
print "{0}: {1}.".format(error.__class__.__name__, error)
exit(1)

def decrypt(key, item):
"""Decrypt 'item' with blowfish.decrypt() using the given key and set it to
the decrypted result. 'item' should be a string, like
decrypt(key, "wiki['password']"), NOT decrypt(key, wiki['password'),
because that won't work."""
global irc, wiki
try:
result = blowfish.decrypt(key, eval(item))
except KeyError:
return
exec "{0} = result".format(item)

def schedule(minute, hour, month_day, month, week_day):
"""Return a list of tasks that are scheduled to run at the time specified
by the function arguments. The schedule data comes from our config file's
'schedule' field, which is stored as _config["schedule"]. Call this
function with config.schedule(args)."""
tasks = [] # tasks to run this turn, each as a tuple of either (task_name,
# kwargs), or just task_name

now = {"minute": minute, "hour": hour, "month_day": month_day,
"month": month, "week_day": week_day}

try:
data = _config["schedule"]
except KeyError:
data = []
for event in data:
do = True
for key, value in now.items():
try:
requirement = event[key]
except KeyError:
continue
if requirement != value:
do = False
break
if do:
try:
tasks.extend(event["tasks"])
except KeyError:
pass

return tasks

def make_new_config():
"""Make a new config file based on the user's input."""
makedirs(config_dir)
encrypt = raw_input("Would you like to encrypt passwords stored in " +
"config.json? [y/n] ")
if encrypt.lower().startswith("y"):
is_encrypted = True
else:
is_encrypted = False
return is_encrypted

+ 78
- 52
core/main.py Datei anzeigen

@@ -1,23 +1,34 @@
#! /usr/bin/python
# -*- coding: utf-8 -*-

## EarwigBot's Core

## EarwigBot has three components that can run independently of each other: an
## IRC front-end, an IRC watcher, and a wiki scheduler.
## * The IRC front-end runs on a normal IRC server and expects users to
## interact with it/give it commands.
## * The IRC watcher runs on a wiki recent-changes server and listens for
## edits. Users cannot interact with this part of the bot.
## * The wiki scheduler runs wiki-editing bot tasks in separate threads at
## user-defined times through a cron-like interface.

## There is a "priority" system here:
## 1. If the IRC frontend is enabled, it will run on the main thread, and the
## IRC watcher and wiki scheduler (if enabled) will run on separate threads.
## 2. If the wiki scheduler is enabled, it will run on the main thread, and the
## IRC watcher (if enabled) will run on a separate thread.
## 3. If the IRC watcher is enabled, it will run on the main (and only) thread.
## Else, the bot will stop, as no components are enabled.
"""
EarwigBot's Core

This (should) not be run directly; the wrapper in "earwigbot.py" is preferred,
but it should work fine alone, as long as you enter the password-unlock key at
the initial hidden prompt.

The core is essentially responsible for starting the various bot components
(irc, scheduler, etc) and making sure they are all happy. An explanation of the
different components follows:

EarwigBot has three components that can run independently of each other: an IRC
front-end, an IRC watcher, and a wiki scheduler.
* The IRC front-end runs on a normal IRC server and expects users to interact
with it/give it commands.
* The IRC watcher runs on a wiki recent-changes server and listens for edits.
Users cannot interact with this part of the bot.
* The wiki scheduler runs wiki-editing bot tasks in separate threads at
user-defined times through a cron-like interface.

There is a "priority" system here:
1. If the IRC frontend is enabled, it will run on the main thread, and the IRC
watcher and wiki scheduler (if enabled) will run on separate threads.
2. If the wiki scheduler is enabled, it will run on the main thread, and the
IRC watcher (if enabled) will run on a separate thread.
3. If the IRC watcher is enabled, it will run on the main (and only) thread.
Else, the bot will stop, as no components are enabled.
"""

import threading
import time
@@ -25,30 +36,31 @@ import traceback
import sys
import os

parent_dir = os.path.split(sys.path[0])[0]
sys.path.append(parent_dir) # make sure we look in the parent directory for modules
script_dir = os.path.dirname(os.path.abspath(__file__))
root_dir = os.path.split(script_dir)[0] # the bot's "root" directory relative
# to its different components
sys.path.append(root_dir) # make sure we look in the root dir for modules

from config.main import *
from core import config
from irc import frontend, watcher
from wiki import task_manager

f_conn = None
w_conn = None

def irc_watcher(f_conn):
def irc_watcher(f_conn=None):
"""Function to handle the IRC watcher as another thread (if frontend and/or
scheduler is enabled), otherwise run as the main thread."""
global w_conn
print "\nStarting IRC watcher..."
while 1: # restart the watcher component if (just) it breaks
while 1: # restart the watcher component if it breaks (and nothing else)
w_conn = watcher.get_connection()
w_conn.connect()
print # print a blank line here to signify that the bot has finished starting up
print # blank line to signify that the bot has finished starting up
try:
watcher.main(w_conn, f_conn)
except:
traceback.print_exc()
time.sleep(5) # sleep a bit before restarting watcher
time.sleep(5) # sleep a bit before restarting watcher
print "\nWatcher has stopped; restarting component..."

def wiki_scheduler():
@@ -57,12 +69,12 @@ def wiki_scheduler():
while 1:
time_start = time.time()
now = time.gmtime(time_start)
task_manager.start_tasks(now)
time_end = time.time()
time_diff = time_start - time_end
if time_diff < 60: # sleep until the next minute
if time_diff < 60: # sleep until the next minute
time.sleep(60 - time_diff)

def irc_frontend():
@@ -70,20 +82,21 @@ def irc_frontend():
enable the wiki scheduler and IRC watcher on new threads if they are
enabled."""
global f_conn
print "\nStarting IRC frontend..."
print "Starting IRC frontend..."
f_conn = frontend.get_connection()
frontend.startup(f_conn)
if enable_wiki_schedule:
if "wiki_schedule" in config.components:
print "\nStarting wiki scheduler..."
task_manager.load_tasks()
t_scheduler = threading.Thread(target=wiki_scheduler)
t_scheduler.name = "wiki-scheduler"
t_scheduler.daemon = True
t_scheduler.start()
if enable_irc_watcher:

if "irc_watcher" in config.components:
print "\nStarting IRC watcher..."
t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,))
t_watcher.name = "irc-watcher"
t_watcher.daemon = True
@@ -91,32 +104,45 @@ def irc_frontend():

frontend.main()

if enable_irc_watcher:
if "irc_watcher" in config.components:
w_conn.close()
f_conn.close()
def run():
if enable_irc_frontend: # make the frontend run on our primary thread if enabled, and enable additional components through that function
irc_frontend()
elif enable_wiki_schedule: # the scheduler is enabled - run it on the main thread, but also run the IRC watcher on another thread if it is enabled
print "\nStarting wiki scheduler..."
task_manager.load_tasks()
if enable_irc_watcher:
t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,))
try:
key = raw_input() # wait for our password unlock key from the bot's
except EOFError: # wrapper
key = None
config.parse_config(key) # load data from the config file and parse it
# using the unlock key
enabled = config.components

if "irc_frontend" in enabled: # make the frontend run on our primary
irc_frontend() # thread if enabled, and enable additional
# components through that function

elif "wiki_schedule" in enabled: # run the scheduler on the main
print "Starting wiki scheduler..." # thread, but also run the IRC
task_manager.load_tasks() # watcher on another thread iff it
if "irc_watcher" in enabled: # is enabled
print "\nStarting IRC watcher..."
t_watcher = threading.Thread(target=irc_watcher, args=())
t_watcher.name = "irc-watcher"
t_watcher.daemon = True
t_watcher.start()
wiki_scheduler()
elif enable_irc_watcher: # the IRC watcher is our only enabled component, so run its function only and don't worry about anything else
irc_watcher()
else: # nothing is enabled!
exit("\nNo bot parts are enabled; stopping...")

elif "irc_watcher" in enabled: # the IRC watcher is our only enabled
print "Starting IRC watcher..." # component, so run its function only
irc_watcher() # and don't worry about anything else

else: # nothing is enabled!
print "No bot parts are enabled; stopping..."
exit(1)

if __name__ == "__main__":
try:
run()
except KeyboardInterrupt:
exit("\nKeyboardInterrupt: stopping main bot loop.")
print "\nKeyboardInterrupt: stopping main bot loop."
exit(1)

+ 46
- 11
earwigbot.py Datei anzeigen

@@ -1,22 +1,57 @@
#! /usr/bin/python
# -*- coding: utf-8 -*-

import time
from subprocess import *
"""
EarwigBot

try:
from config import irc, main, schedule, secure, watcher
except ImportError:
print """Missing a config file! Make sure you have configured the bot. All *.py.default files in config/
should have their .default extension removed, and the info inside should be corrected."""
exit()
A thin wrapper for EarwigBot's main bot code, specified by bot_script. This
wrapper will automatically restart the bot when it shuts down (from !restart,
for example). It requests the bot's password at startup and reuses it every
time the bot restarts internally, so you do not need to re-enter the password
after using !restart.

For information about the bot as a whole, see the attached README.md file (in
markdown format!) and the LICENSE for licensing information.
"""

from getpass import getpass
from subprocess import Popen, PIPE
from os import path
from sys import executable
from time import sleep

from core.config import verify_config

__author__ = "Ben Kurtovic"
__copyright__ = "Copyright (c) 2009-2011 by Ben Kurtovic"
__license__ = "MIT License"
__version__ = "0.1dev"
__email__ = "ben.kurtovic@verizon.net"

bot_script = path.join(path.dirname(path.abspath(__file__)), "core", "main.py")

def main():
print "EarwigBot v{0}\n".format(__version__)

is_encrypted = verify_config()
if is_encrypted: # passwords in the config file are encrypted
key = getpass("Enter key to unencrypt bot passwords: ")
else:
key = None

while 1:
call(['python', 'core/main.py'])
time.sleep(5) # sleep for five seconds between bot runs
bot = Popen([executable, bot_script], stdin=PIPE)
bot.communicate(key) # give the key to core.config.load_config()
return_code = bot.wait()
if return_code == 1:
exit() # let critical exceptions in the subprocess cause us to
# exit as well
else:
sleep(5) # sleep between bot runs following a non-critical
# subprocess exit

if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
exit("\nKeyboardInterrupt: stopping bot wrapper.")
print "\nKeyboardInterrupt: stopping bot wrapper."

+ 4
- 0
irc/classes/__init__.py Datei anzeigen

@@ -0,0 +1,4 @@
from base_command import *
from connection import *
from data import *
from rc import *

irc/base_command.py → irc/classes/base_command.py Datei anzeigen


irc/connection.py → irc/classes/connection.py Datei anzeigen


irc/data.py → irc/classes/data.py Datei anzeigen


irc/rc.py → irc/classes/rc.py Datei anzeigen


+ 2
- 3
irc/commands/afc_status.py Datei anzeigen

@@ -6,8 +6,7 @@ import json
import re
import urllib

from config.watcher import *
from irc.base_command import BaseCommand
from irc.classes import BaseCommand

class AFCStatus(BaseCommand):
def get_hooks(self):
@@ -22,7 +21,7 @@ class AFCStatus(BaseCommand):
data.command == "number" or data.command == "afc_status"):
return True
try:
if data.line[1] == "JOIN" and data.chan in AFC_CHANS:
if data.line[1] == "JOIN" and data.chan == "#wikipedia-en-afc":
return True
except IndexError:
pass


+ 1
- 1
irc/commands/calc.py Datei anzeigen

@@ -5,7 +5,7 @@
import re
import urllib

from irc.base_command import BaseCommand
from irc.classes import BaseCommand

class Calc(BaseCommand):
def get_hooks(self):


+ 3
- 3
irc/commands/chanops.py Datei anzeigen

@@ -2,8 +2,8 @@

# Voice/devoice/op/deop users in the channel.

from irc.base_command import BaseCommand
from config.irc import *
from irc.classes import BaseCommand
from core import config

class ChanOps(BaseCommand):
def get_hooks(self):
@@ -19,7 +19,7 @@ class ChanOps(BaseCommand):
return False

def process(self, data):
if data.host not in ADMINS:
if data.host not in config.irc["permissions"]["admins"]:
self.connection.reply(data, "you must be a bot admin to use this command.")
return



+ 6
- 4
irc/commands/git.py Datei anzeigen

@@ -2,10 +2,12 @@

# Commands to interface with the bot's git repository; use '!git help' for sub-command list.

import shlex, subprocess, re
import shlex
import subprocess
import re

from config.irc import *
from irc.base_command import BaseCommand
from irc.classes import BaseCommand
from core import config

class Git(BaseCommand):
def get_hooks(self):
@@ -21,7 +23,7 @@ class Git(BaseCommand):

def process(self, data):
self.data = data
if data.host not in OWNERS:
if data.host not in config.irc["permissions"]["owners"]:
self.connection.reply(data, "you must be a bot owner to use this command.")
return



+ 1
- 2
irc/commands/help.py Datei anzeigen

@@ -2,8 +2,7 @@

# Generates help information.

from irc.base_command import BaseCommand
from irc.data import Data
from irc.classes import BaseCommand, Data
from irc import command_handler

class Help(BaseCommand):


+ 1
- 1
irc/commands/link.py Datei anzeigen

@@ -4,7 +4,7 @@

import re

from irc.base_command import BaseCommand
from irc.classes import BaseCommand

class Link(BaseCommand):
def get_hooks(self):


+ 7
- 8
irc/commands/tasks.py Datei anzeigen

@@ -2,13 +2,12 @@

# Manage wiki tasks from IRC, and check on thread status.

import threading, re
import threading
import re

from irc.base_command import BaseCommand
from irc.data import *
from irc.classes import BaseCommand, Data, KwargParseException
from wiki import task_manager
from config.main import *
from config.irc import *
from core import config

class Tasks(BaseCommand):
def get_hooks(self):
@@ -24,7 +23,7 @@ class Tasks(BaseCommand):

def process(self, data):
self.data = data
if data.host not in OWNERS:
if data.host not in config.irc["permissions"]["owners"]:
self.connection.reply(data, "at this time, you must be a bot owner to use this command.")
return

@@ -116,9 +115,9 @@ class Tasks(BaseCommand):

def get_main_thread_name(self):
"""Return the "proper" name of the MainThread; e.g. "irc-frontend" or "irc-watcher"."""
if enable_irc_frontend:
if "irc_frontend" in config.components:
return "irc-frontend"
elif enable_wiki_schedule:
elif "wiki_schedule" in config.components:
return "wiki-scheduler"
else:
return "irc-watcher"

+ 1
- 1
irc/commands/test.py Datei anzeigen

@@ -4,7 +4,7 @@

import random

from irc.base_command import BaseCommand
from irc.classes import BaseCommand

class Test(BaseCommand):
def get_hooks(self):


+ 54
- 25
irc/frontend.py Datei anzeigen

@@ -1,28 +1,42 @@
# -*- coding: utf-8 -*-

## Imports
import re, time
"""
EarwigBot's IRC Front-end Component

from config.irc import *
from config.secure import *
The IRC frontend runs on a normal IRC server and expects users to interact with
it and give it commands. Commands are stored as "command classes", subclasses
of BaseCommand in irc/base_command.py. All command classes are automatically
imported by irc/command_handler.py if they are in irc/commands.
"""

from re import findall

from core import config
from irc import command_handler
from irc.connection import *
from irc.data import Data
from irc.classes import Connection, Data, BrokenSocketException

connection = None

def get_connection():
connection = Connection(HOST, PORT, NICK, IDENT, REALNAME)
"""Return a new Connection() instance with information about our server
connection, but don't actually connect yet."""
cf = config.irc["frontend"]
connection = Connection(cf["host"], cf["port"], cf["nick"], cf["ident"],
cf["realname"])
return connection

def startup(conn):
"""Accept a single arg, a Connection() object, and set our global variable
'connection' to it. Load all command classes in irc/commands with
command_handler, and then establish a connection with the IRC server."""
global connection
connection = conn
command_handler.load_commands(connection)
connection.connect()

def main():
"""Main loop for the Frontend IRC Bot component. get_connection() and
startup() should have already been called."""
read_buffer = str()

while 1:
@@ -35,41 +49,56 @@ def main():
lines = read_buffer.split("\n")
read_buffer = lines.pop()

for line in lines:
for line in lines: # handle a single message from IRC
line = line.strip().split()
data = Data()
data = Data() # new Data() instance to store info about this line
data.line = line

if line[1] == "JOIN":
data.nick, data.ident, data.host = re.findall(":(.*?)!(.*?)@(.*?)\Z", line[0])[0]
data.nick, data.ident, data.host = findall(
":(.*?)!(.*?)@(.*?)\Z", line[0])[0]
data.chan = line[2][1:]
command_handler.check("join", data) # check if there's anything we can respond to, and if so, respond
command_handler.check("join", data) # check for 'join' hooks in
# our commands

if line[1] == "PRIVMSG":
data.nick, data.ident, data.host = re.findall(":(.*?)!(.*?)@(.*?)\Z", line[0])[0]
data.nick, data.ident, data.host = findall(
":(.*?)!(.*?)@(.*?)\Z", line[0])[0]
data.msg = ' '.join(line[3:])[1:]
data.chan = line[2]

if data.chan == NICK: # this is a privmsg to us, so set 'chan' as the nick of the sender
if data.chan == config.irc["frontend"]["nick"]:
# this is a privmsg to us, so set 'chan' as the nick of the
# sender, then check for private-only command hooks
data.chan = data.nick
command_handler.check("msg_private", data) # only respond if it's a private message
command_handler.check("msg_private", data)
else:
command_handler.check("msg_public", data) # only respond if it's a public (channel) message
# check for public-only command hooks
command_handler.check("msg_public", data)

command_handler.check("msg", data) # check for general messages
# check for command hooks that apply to all messages
command_handler.check("msg", data)

if data.msg.startswith("!restart"): # hardcode the !restart command (we can't restart from within an ordinary command)
if data.host in OWNERS:
# hardcode the !restart command (we can't restart from within
# an ordinary command)
if data.msg in ["!restart", ".restart"]:
if data.host in config.irc["permissions"]["owners"]:
print "Restarting bot per owner request..."
return

if line[0] == "PING": # If we are pinged, pong back to the server
if line[0] == "PING": # if we are pinged, pong back to the server
connection.send("PONG %s" % line[1])

if line[1] == "376":
if NS_AUTH: # if we're supposed to auth to nickserv, do that
connection.say("NickServ", "IDENTIFY %s %s" % (NS_USER, NS_PASS))
time.sleep(3) # sleep for a bit so we don't join channels un-authed
for chan in CHANS: # join all of our startup channels
if line[1] == "376": # we've successfully connected to the network
try: # if we're supposed to auth to nickserv, do that
ns_username = config.irc["frontend"]["nickservUsername"]
ns_password = config.irc["frontend"]["nickservPassword"]
except KeyError:
pass
else:
connection.say("NickServ", "IDENTIFY {0} {1}".format(
ns_username, ns_password))
# join all of our startup channels
for chan in config.irc["frontend"]["channels"]:
connection.join(chan)

+ 45
- 25
irc/watcher.py Datei anzeigen

@@ -1,20 +1,34 @@
# -*- coding: utf-8 -*-

## Imports
from config.irc import *
from config.main import *
from config.watcher import *
"""
EarwigBot's IRC Watcher Component

from irc.connection import *
from irc.rc import RC
The IRC watcher runs on a wiki recent-changes server and listens for edits.
Users cannot interact with this part of the bot. When an event occurs, run it
through irc/watcher_logic.py's process() function, which can result in either
wiki bot tasks being started (listed in wiki/tasks/) or messages being sent to
channels in the IRC frontend.
"""

global frontend_conn
from core import config
from irc.classes import Connection, RC, BrokenSocketException
from irc import watcher_logic

frontend_conn = None

def get_connection():
connection = Connection(WATCHER_HOST, WATCHER_PORT, NICK, IDENT, REALNAME)
"""Return a new Connection() instance with information about our server
connection, but don't actually connect yet."""
cf = config.irc["watcher"]
connection = Connection(cf["host"], cf["port"], cf["nick"], cf["ident"],
cf["realname"])
return connection

def main(connection, f_conn):
def main(connection, f_conn=None):
"""Main loop for the Watcher IRC Bot component. get_connection() should
have already been called and the connection should have been started with
connection.connect(). Accept the frontend connection as well as an optional
parameter in order to send messages directly to frontend IRC channels."""
global frontend_conn
frontend_conn = f_conn
read_buffer = str()
@@ -33,26 +47,32 @@ def main(connection, f_conn):

if line[1] == "PRIVMSG":
chan = line[2]
if chan != WATCHER_CHAN: # if we're getting a msg from another channel, ignore it

# ignore messages originating from channels not in our list, to
# prevent someone PMing us false data
if chan not in config.irc["watcher"]["channels"]:
continue

msg = ' '.join(line[3:])[1:]
rc = RC(msg) # create a new RC object to store this change's data
rc.parse()
check(rc)
rc = RC(msg) # new RC object to store this event's data
rc.parse() # parse a message into pagenames, usernames, etc.
process(rc) # report to frontend channels or start tasks

if line[0] == "PING": # If we are pinged, pong back to the server
if line[0] == "PING": # if we are pinged, pong back to the server
connection.send("PONG %s" % line[1])

if line[1] == "376": # Join the recent changes channel when we've finished starting up
connection.join(WATCHER_CHAN)

def check(rc):
"""check if we're supposed to report this message anywhere"""
results = process(rc) # process the message in config/watcher.py, and get a list of channels to send it to
if not results:
return
pretty = rc.get_pretty()
if enable_irc_frontend:
for chan in results:
# when we've finished starting up, join all watcher channels
if line[1] == "376":
for chan in config.irc["watcher"]["channels"]:
connection.join(chan)

def process(rc):
"""Process a message from IRC (technically, an RC object). The actual
processing is configurable, so we don't have that hard-coded here. We
simply call irc/watcher_logic.py's process() function and expect a list of
channels back, which we report the event data to."""
chans = watcher_logic.process(rc)
if chans and frontend_conn:
pretty = rc.get_pretty()
for chan in chans:
frontend_conn.say(chan, pretty)

+ 74
- 0
irc/watcher_logic.py Datei anzeigen

@@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-

"""
EarwigBot's IRC Watcher Logic

This file contains (configurable!) rules that EarwigBot's watcher uses after it
recieves an event from IRC.

This should, ideally, be in config.xml somehow, but Python code makes more
sense for this sort of thing... so...
"""

import re

from wiki import task_manager as tasks

afc_prefix = "wikipedia( talk)?:(wikiproject )?articles for creation"

# compile some regexps used when finding specific events
r_page = re.compile(afc_prefix)
r_ffu = re.compile("wikipedia( talk)?:files for upload")
r_move1 = re.compile("moved \[\[{}".format(afc_prefix))
r_move2 = re.compile("moved \[\[(.*?)\]\] to \[\[{}".format(afc_prefix))
r_moved_pages = re.compile("^moved \[\[(.*?)\]\] to \[\[(.*?)\]\]")
r_delete = re.compile("deleted \"\[\[{}".format(afc_prefix))
r_deleted_page = re.compile("^deleted \"\[\[(.*?)\]\]")
r_restore = re.compile("restored \"\[\[{}".format(afc_prefix))
r_restored_page = re.compile("^restored \"\[\[(.*?)\]\]")
r_protect = re.compile("protected \"\[\[{}".format(afc_prefix))

def process(rc):
"""Given an RC() object, return a list of channels to report this event to.
Also, start any wiki bot tasks within this function if necessary."""
chans = set() # channels to report this message to
page_name = rc.page.lower()
comment = rc.comment.lower()
if "!earwigbot" in rc.msg.lower():
chans.update(("##earwigbot", "#wikipedia-en-afc"))
if r_page.search(page_name):
tasks.start_task("afc_statistics", action="process_edit", page=rc.page)
tasks.start_task("afc_copyvios", action="process_edit", page=rc.page)
chans.add("#wikipedia-en-afc")
elif r_ffu.match(page_name):
chans.add("#wikipedia-en-afc")
elif page_name.startswith("template:afc submission"):
chans.add("#wikipedia-en-afc")
elif rc.flags == "move" and (r_move1.match(comment) or
r_move2.match(comment)):
p = r_moved_pages.findall(rc.comment)[0]
tasks.start_task("afc_statistics", action="process_move", pages=p)
tasks.start_task("afc_copyvios", action="process_move", pages=p)
chans.add("#wikipedia-en-afc")
elif rc.flags == "delete" and r_delete.match(comment):
p = r_deleted_page.findall(rc.comment)[0][0]
tasks.start_task("afc_statistics", action="process_delete", page=p)
tasks.start_task("afc_copyvios", action="process_delete", page=p)
chans.add("#wikipedia-en-afc")
elif rc.flags == "restore" and r_restore.match(comment):
p = r_restored_page.findall(rc.comment)[0][0]
tasks.start_task("afc_statistics", action="process_restore", page=p)
tasks.start_task("afc_copyvios", action="process_restore", page=p)
chans.add("#wikipedia-en-afc")
elif rc.flags == "protect" and r_protect.match(comment):
chans.add("#wikipedia-en-afc")

return chans

+ 43
- 27
wiki/task_manager.py Datei anzeigen

@@ -1,25 +1,32 @@
# -*- coding: utf-8 -*-

# A module to manage bot tasks.
"""
EarwigBot's Wiki Bot Task Manager

This module provides some functions to run and load bot tasks from wiki/tasks/.
"""

import time
import traceback
import threading
import os

from config import schedule
from core import config

task_list = dict() # the key is the task's name, the value is the task's class instance
# store loaded tasks as a dict where the key is the task name and the value is
# an instance of the task class (wiki.tasks.task_file.Task())
task_list = dict()

def load_tasks():
"""Load all valid task classes from wiki/tasks/, and add them to the task_list."""
files = os.listdir(os.path.join("wiki", "tasks")) # get all files in wiki/tasks/
files.sort() # alphabetically sort list of files
"""Load all valid task classes from wiki/tasks/, and add them to the
task_list variable."""
files = os.listdir(os.path.join("wiki", "tasks"))
files.sort() # alphabetically sort all files in wiki/tasks/
for f in files:
if not os.path.isfile(os.path.join("wiki", "tasks", f)): # ignore non-files
continue
if f.startswith("_") or not f.endswith(".py"): # ignore non-python files or files beginning with "_"
continue
if not os.path.isfile(os.path.join("wiki", "tasks", f)):
continue # ignore non-files
if f.startswith("_") or not f.endswith(".py"):
continue # ignore non-python files or files beginning with an _
load_class_from_file(f)
print "Found %s tasks: %s." % (len(task_list), ', '.join(task_list.keys()))

@@ -27,10 +34,10 @@ def load_class_from_file(f):
"""Look in a given file for the task class."""
global task_list
module = f[:-3] # strip .py from end
module = f[:-3] # strip .py from end
try:
exec "from wiki.tasks import %s as m" % module
except: # importing the file failed for some reason...
except: # importing the file failed for some reason...
print "Couldn't load task file %s:" % f
traceback.print_exc()
return
@@ -46,26 +53,34 @@ def load_class_from_file(f):

def start_tasks(now=time.gmtime()):
"""Start all tasks that are supposed to be run at a given time."""
tasks = schedule.check(now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, now.tm_wday) # get list of tasks to run this turn
tasks = config.schedule(now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon,
now.tm_wday) # get list of tasks to run this turn

for task in tasks:
if isinstance(task, tuple): # they've specified kwargs, so pass those to start_task
start_task(task[0], **task[1])
else: # otherwise, just pass task_name
if isinstance(task, tuple): # they've specified kwargs
start_task(task[0], **task[1]) # so pass those to start_task
else: # otherwise, just pass task_name
start_task(task)

def start_task(task_name, **kwargs):
"""Start a given task in a new thread. Pass args to the task's run function."""
print "Starting task '{}' in a new thread...".format(task_name)
"""Start a given task in a new thread. Pass args to the task's run()
function."""
print "Starting task '{0}' in a new thread...".format(task_name)

try:
task = task_list[task_name] # get the class for this task, a subclass of BaseTask
task = task_list[task_name]
except KeyError:
print "Couldn't find task '{}': wiki/tasks/{}.py does not exist.".format(task_name, task_name)
print ("Couldn't find task '{0}': wiki/tasks/{1}.py does not " +
"exist.").format(task_name, task_name)
return
task_thread = threading.Thread(target=lambda: task_wrapper(task, **kwargs)) # Normally we'd do task_wrapper(task, **kwargs), but because of threading we'd have to do Thread(target=task_wrapper, args=(task, **kwargs)), which doesn't work because the **kwargs is inside a tuple, not inside function params. Use lambda to get around the args=tuple nonsense
task_thread.name = "{} ({})".format(task_name, time.strftime("%b %d %H:%M:%S"))
task_thread.daemon = True # stop bot task threads automagically if the main bot stops

task_thread = threading.Thread(target=lambda: task_wrapper(task, **kwargs))
task_thread.name = "{0} ({1})".format(task_name, time.strftime(
"%b %d %H:%M:%S"))

# stop bot task threads automagically if the main bot stops
task_thread.daemon = True

task_thread.start()

def task_wrapper(task, **kwargs):
@@ -73,7 +88,8 @@ def task_wrapper(task, **kwargs):
try:
task.run(**kwargs)
except:
print "Task '{}' raised an exception and had to stop:".format(task.task_name)
print "Task '{0}' raised an exception and had to stop:".format(
task.task_name)
traceback.print_exc()
else:
print "Task '{}' finished without error.".format(task.task_name)
print "Task '{0}' finished without error.".format(task.task_name)

Laden…
Abbrechen
Speichern