@@ -1,10 +1,6 @@ | |||
# Ignore python bytecode: | |||
*.pyc | |||
# Ignore secure config files: | |||
config/secure.py | |||
# Ignore pydev's nonsense: | |||
.project | |||
.pydevproject | |||
.settings/ | |||
*.egg | |||
*.egg-info | |||
.DS_Store | |||
build | |||
docs/_build |
@@ -1,5 +1,4 @@ | |||
Copyright (c) 2009-2011 Ben Kurtovic (The Earwig) | |||
<http://en.wikipedia.org/wiki/User:The_Earwig> | |||
Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||
of this software and associated documentation files (the "Software"), to deal | |||
@@ -1,20 +0,0 @@ | |||
EarwigBot[1] is a Python[2] robot that edits Wikipedia. | |||
Development began, based on the Pywikipedia framework[3], in early 2009. | |||
Approval for its fist task, a copyright violation detector[4], was carried out | |||
in May, and the bot has been running consistently ever since (with the | |||
exception of Jan/Feb 2011). It currently handles several ongoing tasks[5], | |||
ranging from statistics generation to category cleanup, and on-demand tasks | |||
such as WikiProject template tagging. Since it started running, the bot has | |||
made over 45,000 edits. | |||
A project to rewrite it from scratch began in early April 2011, thus moving | |||
away from the Pywikipedia framework and allowing for less overall code, better | |||
integration between bot parts, and easier maintenance. | |||
Links: | |||
[1] http://toolserver.org/~earwig/earwigbot/ | |||
[2] http://python.org/ | |||
[3] http://pywikipediabot.sourceforge.net/ | |||
[4] http://en.wikipedia.org/wiki/Wikipedia:Bots/Requests_for_approval/EarwigBot_1 | |||
[5] http://en.wikipedia.org/wiki/User:EarwigBot#Tasks |
@@ -0,0 +1,205 @@ | |||
EarwigBot | |||
========= | |||
EarwigBot_ is a Python_ robot that edits Wikipedia_ and interacts with people | |||
over IRC_. This file provides a basic overview of how to install and setup the | |||
bot; more detailed information is located in the ``docs/`` directory (available | |||
online at PyPI_). | |||
History | |||
------- | |||
Development began, based on the `Pywikipedia framework`_, in early 2009. | |||
Approval for its fist task, a `copyright violation detector`_, was carried out | |||
in May, and the bot has been running consistently ever since (with the | |||
exception of Jan/Feb 2011). It currently handles `several ongoing tasks`_ | |||
ranging from statistics generation to category cleanup, and on-demand tasks | |||
such as WikiProject template tagging. Since it started running, the bot has | |||
made over 50,000 edits. | |||
A project to rewrite it from scratch began in early April 2011, thus moving | |||
away from the Pywikipedia framework and allowing for less overall code, better | |||
integration between bot parts, and easier maintenance. | |||
Installation | |||
------------ | |||
This package contains the core ``earwigbot``, abstracted enough that it should | |||
be usable and customizable by anyone running a bot on a MediaWiki site. Since | |||
it is component-based, the IRC components can be disabled if desired. IRC | |||
commands and bot tasks specific to `my instance of EarwigBot`_ that I don't | |||
feel the average user will need are available from the repository | |||
`earwigbot-plugins`_. | |||
It's recommended to run the bot's unit tests before installing. Run ``python | |||
setup.py test`` from the project's root directory. Note that some | |||
tests require an internet connection, and others may take a while to run. | |||
Coverage is currently rather incomplete. | |||
Latest release (v0.1) | |||
~~~~~~~~~~~~~~~~~~~~~ | |||
EarwigBot is available from the `Python Package Index`_, so you can install the | |||
latest release with ``pip install earwigbot`` (`get pip`_). | |||
You can also install it from source [1]_ directly:: | |||
curl -Lo earwigbot.tgz https://github.com/earwig/earwigbot/tarball/v0.1 | |||
tar -xf earwigbot.tgz | |||
cd earwig-earwigbot-* | |||
python setup.py install | |||
cd .. | |||
rm -r earwigbot.tgz earwig-earwigbot-* | |||
Development version | |||
~~~~~~~~~~~~~~~~~~~ | |||
You can install the development version of the bot from ``git`` by using | |||
setuptools/distribute's ``develop`` command [1]_, probably on the ``develop`` | |||
branch which contains (usually) working code. ``master`` contains the latest | |||
release. EarwigBot uses `git flow`_, so you're free to | |||
browse by tags or by new features (``feature/*`` branches):: | |||
git clone git://github.com/earwig/earwigbot.git earwigbot | |||
cd earwigbot | |||
python setup.py develop | |||
Setup | |||
----- | |||
The bot stores its data in a "working directory", including its config file and | |||
databases. This is also the location where you will place custom IRC commands | |||
and bot tasks, which will be explained later. It doesn't matter where this | |||
directory is, as long as the bot can write to it. | |||
Start the bot with ``earwigbot path/to/working/dir``, or just ``earwigbot`` if | |||
the working directory is the current directory. It will notice that no | |||
``config.yml`` file exists and take you through the setup process. | |||
There is currently no way to edit the ``config.yml`` file from within the bot | |||
after it has been created, but YAML is a very straightforward format, so you | |||
should be able to make any necessary changes yourself. Check out the | |||
`explanation of YAML`_ on Wikipedia for help. | |||
After setup, the bot will start. This means it will connect to the IRC servers | |||
it has been configured for, schedule bot tasks to run at specific times, and | |||
then wait for instructions (as commands on IRC). For a list of commands, say | |||
"``!help``" (commands are messages prefixed with an exclamation mark). | |||
You can stop the bot at any time with Control+C, same as you stop a normal | |||
Python program, and it will try to exit safely. You can also use the | |||
"``!quit``" command on IRC. | |||
Customizing | |||
----------- | |||
The bot's working directory contains a ``commands`` subdirectory and a | |||
``tasks`` subdirectory. Custom IRC commands can be placed in the former, | |||
whereas custom wiki bot tasks go into the latter. Developing custom modules is | |||
explained below, and in more detail through the bot's documentation on PyPI_ | |||
(or in the ``docs/`` dir). | |||
Note that custom commands will override built-in commands and tasks with the | |||
same name. | |||
``Bot`` and ``BotConfig`` | |||
~~~~~~~~~~~~~~~~~~~~~~~~~ | |||
`earwigbot.bot.Bot`_ is EarwigBot's main class. You don't have to instantiate | |||
this yourself, but it's good to be familiar with its attributes and methods, | |||
because it is the main way to communicate with other parts of the bot. A | |||
``Bot`` object is accessible as an attribute of commands and tasks (i.e., | |||
``self.bot``). | |||
`earwigbot.config.BotConfig`_ stores configuration information for the bot. Its | |||
docstring explains what each attribute is used for, but essentially each "node" | |||
(one of ``config.components``, ``wiki``, ``irc``, ``commands``, ``tasks``, and | |||
``metadata``) maps to a section of the bot's ``config.yml`` file. For example, | |||
if ``config.yml`` includes something like:: | |||
irc: | |||
frontend: | |||
nick: MyAwesomeBot | |||
channels: | |||
- "##earwigbot" | |||
- "#channel" | |||
- "#other-channel" | |||
...then ``config.irc["frontend"]["nick"]`` will be ``"MyAwesomeBot"`` and | |||
``config.irc["frontend"]["channels"]`` will be ``["##earwigbot", "#channel", | |||
"#other-channel"]``. | |||
Custom IRC commands | |||
~~~~~~~~~~~~~~~~~~~ | |||
Custom commands are subclasses of `earwigbot.commands.Command`_ that override | |||
``Command``'s ``process()`` (and optionally ``check()`` or ``setup()``) | |||
methods. | |||
The bot has a wide selection of built-in commands and plugins to act as sample | |||
code and/or to give ideas. Start with test_, and then check out chanops_ and | |||
afc_status_ for some more complicated scripts. | |||
Custom bot tasks | |||
~~~~~~~~~~~~~~~~ | |||
Custom tasks are subclasses of `earwigbot.tasks.Task`_ that override ``Task``'s | |||
``run()`` (and optionally ``setup()``) methods. | |||
See the built-in wikiproject_tagger_ task for a relatively straightforward | |||
task, or the afc_statistics_ plugin for a more complicated one. | |||
The Wiki Toolset | |||
---------------- | |||
EarwigBot's answer to the `Pywikipedia framework`_ is the Wiki Toolset | |||
(``earwigbot.wiki``), which you will mainly access through ``bot.wiki``. | |||
``bot.wiki`` provides three methods for the management of Sites - | |||
``get_site()``, ``add_site()``, and ``remove_site()``. Sites are objects that | |||
simply represent a MediaWiki site. A single instance of EarwigBot (i.e. a | |||
single *working directory*) is expected to relate to a single site or group of | |||
sites using the same login info (like all WMF wikis with CentralAuth). | |||
Load your default site (the one that you picked during setup) with | |||
``site = bot.wiki.get_site()``. | |||
Not all aspects of the toolset are covered in the docs. Explore `its code and | |||
docstrings`_ to learn how to use it in a more hands-on fashion. For reference, | |||
``bot.wiki`` is an instance of ``earwigbot.wiki.SitesDB`` tied to the | |||
``sites.db`` file in the bot's working directory. | |||
Footnotes | |||
--------- | |||
- Questions, comments, or suggestions about the documentation? `Let me know`_ | |||
so I can improve it for other people. | |||
.. [1] ``python setup.py install``/``develop`` may require root, or use the | |||
``--user`` switch to install for the current user only. | |||
.. _EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot | |||
.. _Python: http://python.org/ | |||
.. _Wikipedia: http://en.wikipedia.org/ | |||
.. _IRC: http://en.wikipedia.org/wiki/Internet_Relay_Chat | |||
.. _PyPI: http://packages.python.org/earwigbot | |||
.. _Pywikipedia framework: http://pywikipediabot.sourceforge.net/ | |||
.. _copyright violation detector: http://en.wikipedia.org/wiki/Wikipedia:Bots/Requests_for_approval/EarwigBot_1 | |||
.. _several ongoing tasks: http://en.wikipedia.org/wiki/User:EarwigBot#Tasks | |||
.. _my instance of EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot | |||
.. _earwigbot-plugins: https://github.com/earwig/earwigbot-plugins | |||
.. _Python Package Index: http://pypi.python.org | |||
.. _get pip: http://pypi.python.org/pypi/pip | |||
.. _git flow: http://nvie.com/posts/a-successful-git-branching-model/ | |||
.. _explanation of YAML: http://en.wikipedia.org/wiki/YAML | |||
.. _earwigbot.bot.Bot: https://github.com/earwig/earwigbot/blob/develop/earwigbot/bot.py | |||
.. _earwigbot.config.BotConfig: https://github.com/earwig/earwigbot/blob/develop/earwigbot/config.py | |||
.. _earwigbot.commands.Command: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/__init__.py | |||
.. _test: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/test.py | |||
.. _chanops: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/chanops.py | |||
.. _afc_status: https://github.com/earwig/earwigbot-plugins/blob/develop/commands/afc_status.py | |||
.. _earwigbot.tasks.Task: https://github.com/earwig/earwigbot/blob/develop/earwigbot/tasks/__init__.py | |||
.. _wikiproject_tagger: https://github.com/earwig/earwigbot/blob/develop/earwigbot/tasks/wikiproject_tagger.py | |||
.. _afc_statistics: https://github.com/earwig/earwigbot-plugins/blob/develop/tasks/afc_statistics.py | |||
.. _its code and docstrings: https://github.com/earwig/earwigbot/tree/develop/earwigbot/wiki | |||
.. _Let me know: ben.kurtovic@verizon.net |
@@ -1,25 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# EarwigBot Configuration File | |||
# This file contains information that the bot uses to connect to IRC. | |||
# our main (front-end) server's hostname and port | |||
HOST = "irc.freenode.net" | |||
PORT = 6667 | |||
# our watcher server's hostname, port, and RC channel | |||
WATCHER_HOST = "irc.wikimedia.org" | |||
WATCHER_PORT = 6667 | |||
WATCHER_CHAN = "#en.wikipedia" | |||
# our nick, ident, and real name, used on both servers | |||
NICK = "EarwigBot" | |||
IDENT = "earwigbot" | |||
REALNAME = "[[w:en:User:EarwigBot]]" | |||
# channels to join on main server's startup | |||
CHANS = ["##earwigbot", "##earwig", "#wikipedia-en-afc"] | |||
# hardcoded hostnames of users with certain permissions | |||
OWNERS = ["wikipedia/The-Earwig"] # can use owner-only commands (!restart and !git) | |||
ADMINS = ["wikipedia/The-Earwig", "wikipedia/LeonardBloom"] # can use high-risk commands, e.g. !op |
@@ -1,24 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# EarwigBot Configuration File | |||
# This file tells the bot which of its components should be enabled. | |||
# The IRC frontend (configured in config/irc.py) sits on a public IRC network, | |||
# responds to commands given to it, and reports edits (if the IRC watcher | |||
# component is enabled). | |||
enable_irc_frontend = True | |||
# The IRC watcher (connection details configured in config/irc.py as well) sits | |||
# on an IRC network that gives a recent changes feed, usually irc.wikimedia.net. | |||
# It looks for edits matching certain (often regex) patterns (rules configured | |||
# in config/watcher.py), and either reports them to the IRC frontend (if | |||
# enabled), or activates a task on the WikiBot (if configured to do). | |||
enable_irc_watcher = True | |||
# EarwigBot doesn't have to edit a wiki, although this is its main purpose. If | |||
# the wiki schedule is disabled, it will not be able to handle scheduled tasks | |||
# that involve editing (such as creating a daily category every day at midnight | |||
# UTC), but it can still edit through rules given in the watcher, and bot tasks | |||
# can still be activated by the command line. The schedule is configured in | |||
# config/schedule.py. | |||
enable_wiki_schedule = True |
@@ -1,28 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# EarwigBot Configuration File | |||
# This file tells the bot when to run certain wiki-editing tasks. | |||
def check(minute, hour, month_day, month, week_day): | |||
tasks = [] # tasks to run this turn, each as a tuple of (task_name, kwargs) or just task_name | |||
if minute == 0: # run every hour on the hour | |||
tasks.append(("afc_statistics", {"action": "save"})) # save statistics to [[Template:AFC_statistics]] | |||
if hour == 0: # run every day at midnight | |||
tasks.append("afc_dailycats") # create daily categories for WP:AFC | |||
tasks.append("feed_dailycats") # create daily categories for WP:FEED | |||
if week_day == 0: # run every Sunday at midnight (that is, the start of Sunday, not the end) | |||
tasks.append("afc_undated") # clear [[Category:Undated AfC submissions]] | |||
if week_day == 1: # run every Monday at midnight | |||
tasks.append("afc_catdelink") # delink mainspace categories in declined AfC submissions | |||
if week_day == 2: # run every Tuesday at midnight | |||
tasks.append("wrongmime") # tag files whose extensions do not agree with their MIME type | |||
if week_day == 3: # run every Wednesday at midnight | |||
tasks.append("blptag") # add |blp=yes to {{WPB}} or {{WPBS}} when it is used along with {{WP Biography}} | |||
return tasks |
@@ -1,9 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# EarwigBot Configuration File | |||
# This file contains information that should be kept hidden, including passwords. | |||
# IRC: identify ourselves to NickServ? | |||
NS_AUTH = False | |||
NS_USER = "" | |||
NS_PASS = "" |
@@ -1,69 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# EarwigBot Configuration File | |||
# This file contains rules for the bot's watcher component. | |||
import re | |||
from wiki import task_manager | |||
# Define different report channels on our front-end server. They /must/ be in CHANS in config/irc.py or the bot will not be able to send messages to them (unless they have -n set). | |||
AFC_CHANS = ["#wikipedia-en-afc"] # report recent AfC changes/give AfC status messages upon join | |||
BOT_CHANS = ["##earwigbot", "#wikipedia-en-afc"] # report edits containing "!earwigbot" | |||
# Define some commonly used strings. | |||
afc_prefix = "wikipedia( talk)?:(wikiproject )?articles for creation" | |||
# Define our compiled regexps used when finding certain edits. | |||
r_page = re.compile(afc_prefix) | |||
r_ffu = re.compile("wikipedia( talk)?:files for upload") | |||
r_move1 = re.compile("moved \[\[{}".format(afc_prefix)) # an AFC page was either moved locally or out | |||
r_move2 = re.compile("moved \[\[(.*?)\]\] to \[\[{}".format(afc_prefix)) # an outside page was moved into AFC | |||
r_moved_pages = re.compile("^moved \[\[(.*?)\]\] to \[\[(.*?)\]\]") | |||
r_delete = re.compile("deleted \"\[\[{}".format(afc_prefix)) | |||
r_deleted_page = re.compile("^deleted \"\[\[(.*?)\]\]") | |||
r_restore = re.compile("restored \"\[\[{}".format(afc_prefix)) | |||
r_restored_page = re.compile("^restored \"\[\[(.*?)\]\]") | |||
r_protect = re.compile("protected \"\[\[{}".format(afc_prefix)) | |||
def process(rc): | |||
chans = set() # channels to report this message to | |||
page_name = rc.page.lower() | |||
comment = rc.comment.lower() | |||
if "!earwigbot" in rc.msg.lower(): | |||
chans.update(BOT_CHANS) | |||
if r_page.search(page_name): | |||
task_manager.start_task("afc_statistics", action="process_edit", page=rc.page) | |||
task_manager.start_task("afc_copyvios", action="process_edit", page=rc.page) | |||
chans.update(AFC_CHANS) | |||
elif r_ffu.match(page_name): | |||
chans.update(AFC_CHANS) | |||
elif page_name.startswith("template:afc submission"): | |||
chans.update(AFC_CHANS) | |||
elif rc.flags == "move" and (r_move1.match(comment) or r_move2.match(comment)): | |||
p = r_moved_pages.findall(rc.comment)[0] | |||
task_manager.start_task("afc_statistics", action="process_move", pages=p) | |||
task_manager.start_task("afc_copyvios", action="process_move", pages=p) | |||
chans.update(AFC_CHANS) | |||
elif rc.flags == "delete" and r_delete.match(comment): | |||
p = r_deleted_page.findall(rc.comment)[0][0] | |||
task_manager.start_task("afc_statistics", action="process_delete", page=p) | |||
task_manager.start_task("afc_copyvios", action="process_delete", page=p) | |||
chans.update(AFC_CHANS) | |||
elif rc.flags == "restore" and r_restore.match(comment): | |||
p = r_restored_page.findall(rc.comment)[0][0] | |||
task_manager.start_task("afc_statistics", action="process_restore", page=p) | |||
task_manager.start_task("afc_copyvios", action="process_restore", page=p) | |||
chans.update(AFC_CHANS) | |||
elif rc.flags == "protect" and r_protect.match(comment): | |||
chans.update(AFC_CHANS) | |||
return chans |
@@ -1,122 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
## EarwigBot's Core | |||
## EarwigBot has three components that can run independently of each other: an | |||
## IRC front-end, an IRC watcher, and a wiki scheduler. | |||
## * The IRC front-end runs on a normal IRC server and expects users to | |||
## interact with it/give it commands. | |||
## * The IRC watcher runs on a wiki recent-changes server and listens for | |||
## edits. Users cannot interact with this part of the bot. | |||
## * The wiki scheduler runs wiki-editing bot tasks in separate threads at | |||
## user-defined times through a cron-like interface. | |||
## There is a "priority" system here: | |||
## 1. If the IRC frontend is enabled, it will run on the main thread, and the | |||
## IRC watcher and wiki scheduler (if enabled) will run on separate threads. | |||
## 2. If the wiki scheduler is enabled, it will run on the main thread, and the | |||
## IRC watcher (if enabled) will run on a separate thread. | |||
## 3. If the IRC watcher is enabled, it will run on the main (and only) thread. | |||
## Else, the bot will stop, as no components are enabled. | |||
import threading | |||
import time | |||
import traceback | |||
import sys | |||
import os | |||
parent_dir = os.path.split(sys.path[0])[0] | |||
sys.path.append(parent_dir) # make sure we look in the parent directory for modules | |||
from config.main import * | |||
from irc import frontend, watcher | |||
from wiki import task_manager | |||
f_conn = None | |||
w_conn = None | |||
def irc_watcher(f_conn): | |||
"""Function to handle the IRC watcher as another thread (if frontend and/or | |||
scheduler is enabled), otherwise run as the main thread.""" | |||
global w_conn | |||
print "\nStarting IRC watcher..." | |||
while 1: # restart the watcher component if (just) it breaks | |||
w_conn = watcher.get_connection() | |||
w_conn.connect() | |||
print # print a blank line here to signify that the bot has finished starting up | |||
try: | |||
watcher.main(w_conn, f_conn) | |||
except: | |||
traceback.print_exc() | |||
time.sleep(5) # sleep a bit before restarting watcher | |||
print "\nWatcher has stopped; restarting component..." | |||
def wiki_scheduler(): | |||
"""Function to handle the wiki scheduler as another thread, or as the | |||
primary thread if the IRC frontend is not enabled.""" | |||
while 1: | |||
time_start = time.time() | |||
now = time.gmtime(time_start) | |||
task_manager.start_tasks(now) | |||
time_end = time.time() | |||
time_diff = time_start - time_end | |||
if time_diff < 60: # sleep until the next minute | |||
time.sleep(60 - time_diff) | |||
def irc_frontend(): | |||
"""If the IRC frontend is enabled, make it run on our primary thread, and | |||
enable the wiki scheduler and IRC watcher on new threads if they are | |||
enabled.""" | |||
global f_conn | |||
print "\nStarting IRC frontend..." | |||
f_conn = frontend.get_connection() | |||
frontend.startup(f_conn) | |||
if enable_wiki_schedule: | |||
print "\nStarting wiki scheduler..." | |||
task_manager.load_tasks() | |||
t_scheduler = threading.Thread(target=wiki_scheduler) | |||
t_scheduler.name = "wiki-scheduler" | |||
t_scheduler.daemon = True | |||
t_scheduler.start() | |||
if enable_irc_watcher: | |||
t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,)) | |||
t_watcher.name = "irc-watcher" | |||
t_watcher.daemon = True | |||
t_watcher.start() | |||
frontend.main() | |||
if enable_irc_watcher: | |||
w_conn.close() | |||
f_conn.close() | |||
def run(): | |||
if enable_irc_frontend: # make the frontend run on our primary thread if enabled, and enable additional components through that function | |||
irc_frontend() | |||
elif enable_wiki_schedule: # the scheduler is enabled - run it on the main thread, but also run the IRC watcher on another thread if it is enabled | |||
print "\nStarting wiki scheduler..." | |||
task_manager.load_tasks() | |||
if enable_irc_watcher: | |||
t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,)) | |||
t_watcher.name = "irc-watcher" | |||
t_watcher.daemon = True | |||
t_watcher.start() | |||
wiki_scheduler() | |||
elif enable_irc_watcher: # the IRC watcher is our only enabled component, so run its function only and don't worry about anything else | |||
irc_watcher() | |||
else: # nothing is enabled! | |||
exit("\nNo bot parts are enabled; stopping...") | |||
if __name__ == "__main__": | |||
try: | |||
run() | |||
except KeyboardInterrupt: | |||
exit("\nKeyboardInterrupt: stopping main bot loop.") |
@@ -0,0 +1,153 @@ | |||
# Makefile for Sphinx documentation | |||
# | |||
# You can set these variables from the command line. | |||
SPHINXOPTS = | |||
SPHINXBUILD = sphinx-build | |||
PAPER = | |||
BUILDDIR = _build | |||
# Internal variables. | |||
PAPEROPT_a4 = -D latex_paper_size=a4 | |||
PAPEROPT_letter = -D latex_paper_size=letter | |||
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . | |||
# the i18n builder cannot share the environment and doctrees with the others | |||
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . | |||
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext | |||
help: | |||
@echo "Please use \`make <target>' where <target> is one of" | |||
@echo " html to make standalone HTML files" | |||
@echo " dirhtml to make HTML files named index.html in directories" | |||
@echo " singlehtml to make a single large HTML file" | |||
@echo " pickle to make pickle files" | |||
@echo " json to make JSON files" | |||
@echo " htmlhelp to make HTML files and a HTML help project" | |||
@echo " qthelp to make HTML files and a qthelp project" | |||
@echo " devhelp to make HTML files and a Devhelp project" | |||
@echo " epub to make an epub" | |||
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" | |||
@echo " latexpdf to make LaTeX files and run them through pdflatex" | |||
@echo " text to make text files" | |||
@echo " man to make manual pages" | |||
@echo " texinfo to make Texinfo files" | |||
@echo " info to make Texinfo files and run them through makeinfo" | |||
@echo " gettext to make PO message catalogs" | |||
@echo " changes to make an overview of all changed/added/deprecated items" | |||
@echo " linkcheck to check all external links for integrity" | |||
@echo " doctest to run all doctests embedded in the documentation (if enabled)" | |||
clean: | |||
-rm -rf $(BUILDDIR)/* | |||
html: | |||
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html | |||
@echo | |||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html." | |||
dirhtml: | |||
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml | |||
@echo | |||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." | |||
singlehtml: | |||
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml | |||
@echo | |||
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." | |||
pickle: | |||
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle | |||
@echo | |||
@echo "Build finished; now you can process the pickle files." | |||
json: | |||
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json | |||
@echo | |||
@echo "Build finished; now you can process the JSON files." | |||
htmlhelp: | |||
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp | |||
@echo | |||
@echo "Build finished; now you can run HTML Help Workshop with the" \ | |||
".hhp project file in $(BUILDDIR)/htmlhelp." | |||
qthelp: | |||
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp | |||
@echo | |||
@echo "Build finished; now you can run "qcollectiongenerator" with the" \ | |||
".qhcp project file in $(BUILDDIR)/qthelp, like this:" | |||
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/EarwigBot.qhcp" | |||
@echo "To view the help file:" | |||
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/EarwigBot.qhc" | |||
devhelp: | |||
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp | |||
@echo | |||
@echo "Build finished." | |||
@echo "To view the help file:" | |||
@echo "# mkdir -p $$HOME/.local/share/devhelp/EarwigBot" | |||
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/EarwigBot" | |||
@echo "# devhelp" | |||
epub: | |||
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub | |||
@echo | |||
@echo "Build finished. The epub file is in $(BUILDDIR)/epub." | |||
latex: | |||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex | |||
@echo | |||
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." | |||
@echo "Run \`make' in that directory to run these through (pdf)latex" \ | |||
"(use \`make latexpdf' here to do that automatically)." | |||
latexpdf: | |||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex | |||
@echo "Running LaTeX files through pdflatex..." | |||
$(MAKE) -C $(BUILDDIR)/latex all-pdf | |||
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." | |||
text: | |||
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text | |||
@echo | |||
@echo "Build finished. The text files are in $(BUILDDIR)/text." | |||
man: | |||
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man | |||
@echo | |||
@echo "Build finished. The manual pages are in $(BUILDDIR)/man." | |||
texinfo: | |||
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo | |||
@echo | |||
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." | |||
@echo "Run \`make' in that directory to run these through makeinfo" \ | |||
"(use \`make info' here to do that automatically)." | |||
info: | |||
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo | |||
@echo "Running Texinfo files through makeinfo..." | |||
make -C $(BUILDDIR)/texinfo info | |||
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." | |||
gettext: | |||
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale | |||
@echo | |||
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." | |||
changes: | |||
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes | |||
@echo | |||
@echo "The overview file is in $(BUILDDIR)/changes." | |||
linkcheck: | |||
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck | |||
@echo | |||
@echo "Link check complete; look for any errors in the above output " \ | |||
"or in $(BUILDDIR)/linkcheck/output.txt." | |||
doctest: | |||
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest | |||
@echo "Testing of doctests in the sources finished, look at the " \ | |||
"results in $(BUILDDIR)/doctest/output.txt." |
@@ -0,0 +1,9 @@ | |||
commands Package | |||
================ | |||
:mod:`commands` Package | |||
----------------------- | |||
.. automodule:: earwigbot.commands | |||
:members: | |||
:undoc-members: |
@@ -0,0 +1,46 @@ | |||
config Package | |||
============== | |||
:mod:`config` Package | |||
--------------------- | |||
.. automodule:: earwigbot.config | |||
:members: | |||
:undoc-members: | |||
:mod:`formatter` Module | |||
----------------------- | |||
.. automodule:: earwigbot.config.formatter | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`node` Module | |||
------------------ | |||
.. automodule:: earwigbot.config.node | |||
:members: | |||
:undoc-members: | |||
:mod:`ordered_yaml` Module | |||
-------------------------- | |||
.. automodule:: earwigbot.config.ordered_yaml | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`permissions` Module | |||
------------------------- | |||
.. automodule:: earwigbot.config.permissions | |||
:members: | |||
:undoc-members: | |||
:mod:`script` Module | |||
-------------------- | |||
.. automodule:: earwigbot.config.script | |||
:members: | |||
:undoc-members: |
@@ -0,0 +1,46 @@ | |||
irc Package | |||
=========== | |||
:mod:`irc` Package | |||
------------------ | |||
.. automodule:: earwigbot.irc | |||
:members: | |||
:undoc-members: | |||
:mod:`connection` Module | |||
------------------------ | |||
.. automodule:: earwigbot.irc.connection | |||
:members: | |||
:undoc-members: | |||
:mod:`data` Module | |||
------------------ | |||
.. automodule:: earwigbot.irc.data | |||
:members: | |||
:undoc-members: | |||
:mod:`frontend` Module | |||
---------------------- | |||
.. automodule:: earwigbot.irc.frontend | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`rc` Module | |||
---------------- | |||
.. automodule:: earwigbot.irc.rc | |||
:members: | |||
:undoc-members: | |||
:mod:`watcher` Module | |||
--------------------- | |||
.. automodule:: earwigbot.irc.watcher | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: |
@@ -0,0 +1,57 @@ | |||
earwigbot Package | |||
================= | |||
:mod:`earwigbot` Package | |||
------------------------ | |||
.. automodule:: earwigbot.__init__ | |||
:members: | |||
:undoc-members: | |||
:mod:`bot` Module | |||
----------------- | |||
.. automodule:: earwigbot.bot | |||
:members: | |||
:undoc-members: | |||
:mod:`exceptions` Module | |||
------------------------ | |||
.. automodule:: earwigbot.exceptions | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`lazy` Module | |||
------------------ | |||
.. automodule:: earwigbot.lazy | |||
:members: | |||
:undoc-members: | |||
:mod:`managers` Module | |||
---------------------- | |||
.. automodule:: earwigbot.managers | |||
:members: _ResourceManager, CommandManager, TaskManager | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`util` Module | |||
------------------ | |||
.. automodule:: earwigbot.util | |||
:members: | |||
:undoc-members: | |||
Subpackages | |||
----------- | |||
.. toctree:: | |||
earwigbot.commands | |||
earwigbot.config | |||
earwigbot.irc | |||
earwigbot.tasks | |||
earwigbot.wiki |
@@ -0,0 +1,16 @@ | |||
tasks Package | |||
============= | |||
:mod:`tasks` Package | |||
-------------------- | |||
.. automodule:: earwigbot.tasks | |||
:members: | |||
:undoc-members: | |||
:mod:`wikiproject_tagger` Module | |||
-------------------------------- | |||
.. automodule:: earwigbot.tasks.wikiproject_tagger | |||
:members: | |||
:show-inheritance: |
@@ -0,0 +1,47 @@ | |||
copyvios Package | |||
================ | |||
:mod:`copyvios` Package | |||
----------------------- | |||
.. automodule:: earwigbot.wiki.copyvios | |||
:members: | |||
:undoc-members: | |||
:mod:`exclusions` Module | |||
------------------------ | |||
.. automodule:: earwigbot.wiki.copyvios.exclusions | |||
:members: | |||
:undoc-members: | |||
:mod:`markov` Module | |||
-------------------- | |||
.. automodule:: earwigbot.wiki.copyvios.markov | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`parsers` Module | |||
--------------------- | |||
.. automodule:: earwigbot.wiki.copyvios.parsers | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`result` Module | |||
-------------------- | |||
.. automodule:: earwigbot.wiki.copyvios.result | |||
:members: | |||
:undoc-members: | |||
:mod:`search` Module | |||
-------------------- | |||
.. automodule:: earwigbot.wiki.copyvios.search | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: |
@@ -0,0 +1,59 @@ | |||
wiki Package | |||
============ | |||
:mod:`wiki` Package | |||
------------------- | |||
.. automodule:: earwigbot.wiki | |||
:members: | |||
:undoc-members: | |||
:mod:`category` Module | |||
---------------------- | |||
.. automodule:: earwigbot.wiki.category | |||
:members: | |||
:undoc-members: | |||
:mod:`constants` Module | |||
----------------------- | |||
.. automodule:: earwigbot.wiki.constants | |||
:members: | |||
:undoc-members: | |||
:mod:`page` Module | |||
------------------ | |||
.. automodule:: earwigbot.wiki.page | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`site` Module | |||
------------------ | |||
.. automodule:: earwigbot.wiki.site | |||
:members: | |||
:undoc-members: | |||
:mod:`sitesdb` Module | |||
--------------------- | |||
.. automodule:: earwigbot.wiki.sitesdb | |||
:members: | |||
:undoc-members: | |||
:mod:`user` Module | |||
------------------ | |||
.. automodule:: earwigbot.wiki.user | |||
:members: | |||
:undoc-members: | |||
Subpackages | |||
----------- | |||
.. toctree:: | |||
earwigbot.wiki.copyvios |
@@ -0,0 +1,7 @@ | |||
earwigbot | |||
========= | |||
.. toctree:: | |||
:maxdepth: 6 | |||
earwigbot |
@@ -0,0 +1,242 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# EarwigBot documentation build configuration file, created by | |||
# sphinx-quickstart on Sun Apr 29 01:42:25 2012. | |||
# | |||
# This file is execfile()d with the current directory set to its containing dir. | |||
# | |||
# Note that not all possible configuration values are present in this | |||
# autogenerated file. | |||
# | |||
# All configuration values have a default; values that are commented out | |||
# serve to show the default. | |||
import sys, os | |||
# If extensions (or modules to document with autodoc) are in another directory, | |||
# add these directories to sys.path here. If the directory is relative to the | |||
# documentation root, use os.path.abspath to make it absolute, like shown here. | |||
sys.path.insert(0, os.path.abspath('..')) | |||
# -- General configuration ----------------------------------------------------- | |||
# If your documentation needs a minimal Sphinx version, state it here. | |||
#needs_sphinx = '1.0' | |||
# Add any Sphinx extension module names here, as strings. They can be extensions | |||
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. | |||
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.viewcode'] | |||
# Add any paths that contain templates here, relative to this directory. | |||
templates_path = ['_templates'] | |||
# The suffix of source filenames. | |||
source_suffix = '.rst' | |||
# The encoding of source files. | |||
#source_encoding = 'utf-8-sig' | |||
# The master toctree document. | |||
master_doc = 'index' | |||
# General information about the project. | |||
project = u'EarwigBot' | |||
copyright = u'2009, 2010, 2011, 2012 Ben Kurtovic' | |||
# The version info for the project you're documenting, acts as replacement for | |||
# |version| and |release|, also used in various other places throughout the | |||
# built documents. | |||
# | |||
# The short X.Y version. | |||
version = '0.1' | |||
# The full version, including alpha/beta/rc tags. | |||
release = '0.1' | |||
# The language for content autogenerated by Sphinx. Refer to documentation | |||
# for a list of supported languages. | |||
#language = None | |||
# There are two options for replacing |today|: either, you set today to some | |||
# non-false value, then it is used: | |||
#today = '' | |||
# Else, today_fmt is used as the format for a strftime call. | |||
#today_fmt = '%B %d, %Y' | |||
# List of patterns, relative to source directory, that match files and | |||
# directories to ignore when looking for source files. | |||
exclude_patterns = ['_build'] | |||
# The reST default role (used for this markup: `text`) to use for all documents. | |||
#default_role = None | |||
# If true, '()' will be appended to :func: etc. cross-reference text. | |||
#add_function_parentheses = True | |||
# If true, the current module name will be prepended to all description | |||
# unit titles (such as .. function::). | |||
#add_module_names = True | |||
# If true, sectionauthor and moduleauthor directives will be shown in the | |||
# output. They are ignored by default. | |||
#show_authors = False | |||
# The name of the Pygments (syntax highlighting) style to use. | |||
pygments_style = 'sphinx' | |||
# A list of ignored prefixes for module index sorting. | |||
#modindex_common_prefix = [] | |||
# -- Options for HTML output --------------------------------------------------- | |||
# The theme to use for HTML and HTML Help pages. See the documentation for | |||
# a list of builtin themes. | |||
html_theme = 'nature' | |||
# Theme options are theme-specific and customize the look and feel of a theme | |||
# further. For a list of options available for each theme, see the | |||
# documentation. | |||
#html_theme_options = {} | |||
# Add any paths that contain custom themes here, relative to this directory. | |||
#html_theme_path = [] | |||
# The name for this set of Sphinx documents. If None, it defaults to | |||
# "<project> v<release> documentation". | |||
#html_title = None | |||
# A shorter title for the navigation bar. Default is the same as html_title. | |||
#html_short_title = None | |||
# The name of an image file (relative to this directory) to place at the top | |||
# of the sidebar. | |||
#html_logo = None | |||
# The name of an image file (within the static path) to use as favicon of the | |||
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 | |||
# pixels large. | |||
#html_favicon = None | |||
# Add any paths that contain custom static files (such as style sheets) here, | |||
# relative to this directory. They are copied after the builtin static files, | |||
# so a file named "default.css" will overwrite the builtin "default.css". | |||
html_static_path = ['_static'] | |||
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, | |||
# using the given strftime format. | |||
#html_last_updated_fmt = '%b %d, %Y' | |||
# If true, SmartyPants will be used to convert quotes and dashes to | |||
# typographically correct entities. | |||
#html_use_smartypants = True | |||
# Custom sidebar templates, maps document names to template names. | |||
#html_sidebars = {} | |||
# Additional templates that should be rendered to pages, maps page names to | |||
# template names. | |||
#html_additional_pages = {} | |||
# If false, no module index is generated. | |||
#html_domain_indices = True | |||
# If false, no index is generated. | |||
#html_use_index = True | |||
# If true, the index is split into individual pages for each letter. | |||
#html_split_index = False | |||
# If true, links to the reST sources are added to the pages. | |||
#html_show_sourcelink = True | |||
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. | |||
#html_show_sphinx = True | |||
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. | |||
#html_show_copyright = True | |||
# If true, an OpenSearch description file will be output, and all pages will | |||
# contain a <link> tag referring to it. The value of this option must be the | |||
# base URL from which the finished HTML is served. | |||
#html_use_opensearch = '' | |||
# This is the file name suffix for HTML files (e.g. ".xhtml"). | |||
#html_file_suffix = None | |||
# Output file base name for HTML help builder. | |||
htmlhelp_basename = 'EarwigBotdoc' | |||
# -- Options for LaTeX output -------------------------------------------------- | |||
latex_elements = { | |||
# The paper size ('letterpaper' or 'a4paper'). | |||
#'papersize': 'letterpaper', | |||
# The font size ('10pt', '11pt' or '12pt'). | |||
#'pointsize': '10pt', | |||
# Additional stuff for the LaTeX preamble. | |||
#'preamble': '', | |||
} | |||
# Grouping the document tree into LaTeX files. List of tuples | |||
# (source start file, target name, title, author, documentclass [howto/manual]). | |||
latex_documents = [ | |||
('index', 'EarwigBot.tex', u'EarwigBot Documentation', | |||
u'Ben Kurtovic', 'manual'), | |||
] | |||
# The name of an image file (relative to this directory) to place at the top of | |||
# the title page. | |||
#latex_logo = None | |||
# For "manual" documents, if this is true, then toplevel headings are parts, | |||
# not chapters. | |||
#latex_use_parts = False | |||
# If true, show page references after internal links. | |||
#latex_show_pagerefs = False | |||
# If true, show URL addresses after external links. | |||
#latex_show_urls = False | |||
# Documents to append as an appendix to all manuals. | |||
#latex_appendices = [] | |||
# If false, no module index is generated. | |||
#latex_domain_indices = True | |||
# -- Options for manual page output -------------------------------------------- | |||
# One entry per manual page. List of tuples | |||
# (source start file, name, description, authors, manual section). | |||
man_pages = [ | |||
('index', 'earwigbot', u'EarwigBot Documentation', | |||
[u'Ben Kurtovic'], 1) | |||
] | |||
# If true, show URL addresses after external links. | |||
#man_show_urls = False | |||
# -- Options for Texinfo output ------------------------------------------------ | |||
# Grouping the document tree into Texinfo files. List of tuples | |||
# (source start file, target name, title, author, | |||
# dir menu entry, description, category) | |||
texinfo_documents = [ | |||
('index', 'EarwigBot', u'EarwigBot Documentation', | |||
u'Ben Kurtovic', 'EarwigBot', 'One line description of project.', | |||
'Miscellaneous'), | |||
] | |||
# Documents to append as an appendix to all manuals. | |||
#texinfo_appendices = [] | |||
# If false, no module index is generated. | |||
#texinfo_domain_indices = True | |||
# How to display URL addresses: 'footnote', 'no', or 'inline'. | |||
#texinfo_show_urls = 'footnote' |
@@ -0,0 +1,240 @@ | |||
Customizing | |||
=========== | |||
The bot's working directory contains a :file:`commands` subdirectory and a | |||
:file:`tasks` subdirectory. Custom IRC commands can be placed in the former, | |||
whereas custom wiki bot tasks go into the latter. Developing custom modules is | |||
explained in detail in this documentation. | |||
Note that custom commands will override built-in commands and tasks with the | |||
same name. | |||
:py:class:`~earwigbot.bot.Bot` and :py:class:`~earwigbot.bot.BotConfig` | |||
----------------------------------------------------------------------- | |||
:py:class:`earwigbot.bot.Bot` is EarwigBot's main class. You don't have to | |||
instantiate this yourself, but it's good to be familiar with its attributes and | |||
methods, because it is the main way to communicate with other parts of the bot. | |||
A :py:class:`~earwigbot.bot.Bot` object is accessible as an attribute of | |||
commands and tasks (i.e., :py:attr:`self.bot`). | |||
The most useful attributes are: | |||
- :py:attr:`~earwigbot.bot.Bot.config`: an instance of | |||
:py:class:`~earwigbot.config.BotConfig`, for accessing the bot's | |||
configuration data (see below). | |||
- :py:attr:`~earwigbot.bot.Bot.commands`: the bot's | |||
:py:class:`~earwigbot.managers.CommandManager`, which is used internally to | |||
run IRC commands (through | |||
:py:meth:`commands.call() <earwigbot.managers.CommandManager.call>`, which | |||
you shouldn't have to use); you can safely reload all commands with | |||
:py:meth:`commands.load() <earwigbot.managers._ResourceManager.load>`. | |||
- :py:attr:`~earwigbot.bot.Bot.tasks`: the bot's | |||
:py:class:`~earwigbot.managers.TaskManager`, which can be used to start tasks | |||
with :py:meth:`tasks.start(task_name, **kwargs) | |||
<earwigbot.managers.TaskManager.start>`. :py:meth:`tasks.load() | |||
<earwigbot.managers._ResourceManager.load>` can be used to safely reload all | |||
tasks. | |||
- :py:attr:`~earwigbot.bot.Bot.frontend` / | |||
:py:attr:`~earwigbot.bot.Bot.watcher`: instances of | |||
:py:class:`earwigbot.irc.Frontend <earwigbot.irc.frontend.Frontend>` and | |||
:py:class:`earwigbot.irc.Watcher <earwigbot.irc.watcher.Watcher>`, | |||
respectively, which represent the bot's connections to these two servers; you | |||
can, for example, send a message to the frontend with | |||
:py:meth:`frontend.say(chan, msg) | |||
<earwigbot.irc.connection.IRCConnection.say>` (more on communicating with IRC | |||
below). | |||
- :py:attr:`~earwigbot.bot.Bot.wiki`: interface with the | |||
:doc:`Wiki Toolset <toolset>`. | |||
- Finally, :py:meth:`~earwigbot.bot.Bot.restart` (restarts IRC components and | |||
reloads config, commands, and tasks) and :py:meth:`~earwigbot.bot.Bot.stop` | |||
can be used almost anywhere. Both take an optional "reason" that will be | |||
logged and used as the quit message when disconnecting from IRC. | |||
:py:class:`earwigbot.config.BotConfig` stores configuration information for the | |||
bot. Its docstrings explains what each attribute is used for, but essentially | |||
each "node" (one of :py:attr:`config.components | |||
<earwigbot.config.BotConfig.components>`, | |||
:py:attr:`~earwigbot.config.BotConfig.wiki`, | |||
:py:attr:`~earwigbot.config.BotConfig.irc`, | |||
:py:attr:`~earwigbot.config.BotConfig.commands`, | |||
:py:attr:`~earwigbot.config.BotConfig.tasks`, or | |||
:py:attr:`~earwigbot.config.BotConfig.metadata`) maps to a section | |||
of the bot's :file:`config.yml` file. For example, if :file:`config.yml` | |||
includes something like:: | |||
irc: | |||
frontend: | |||
nick: MyAwesomeBot | |||
channels: | |||
- "##earwigbot" | |||
- "#channel" | |||
- "#other-channel" | |||
...then :py:attr:`config.irc["frontend"]["nick"]` will be ``"MyAwesomeBot"`` | |||
and :py:attr:`config.irc["frontend"]["channels"]` will be | |||
``["##earwigbot", "#channel", "#other-channel"]``. | |||
Custom IRC commands | |||
------------------- | |||
Custom commands are subclasses of :py:class:`earwigbot.commands.Command` that | |||
override :py:class:`~earwigbot.commands.Command`'s | |||
:py:meth:`~earwigbot.commands.Command.process` (and optionally | |||
:py:meth:`~earwigbot.commands.Command.check` or | |||
:py:meth:`~earwigbot.commands.Command.setup`) methods. | |||
:py:class:`~earwigbot.commands.Command`'s docstrings should explain what each | |||
attribute and method is for and what they should be overridden with, but these | |||
are the basics: | |||
- Class attribute :py:attr:`~earwigbot.commands.Command.name` is the name of | |||
the command. This must be specified. | |||
- Class attribute :py:attr:`~earwigbot.commands.Command.commands` is a list of | |||
names that will trigger this command. It defaults to the command's | |||
:py:attr:`~earwigbot.commands.Command.name`, but you can override it with | |||
multiple names to serve as aliases. This is handled by the default | |||
:py:meth:`~earwigbot.commands.Command.check` implementation (see below), so | |||
if :py:meth:`~earwigbot.commands.Command.check` is overridden, this is | |||
ignored by everything except the help_ command (so ``!help alias`` will | |||
trigger help for the actual command). | |||
- Class attribute :py:attr:`~earwigbot.commands.Command.hooks` is a list of the | |||
"IRC events" that this command might respond to. It defaults to ``["msg"]``, | |||
but options include ``"msg_private"`` (for private messages only), | |||
``"msg_public"`` (for channel messages only), and ``"join"`` (for when a user | |||
joins a channel). See the afc_status_ plugin for a command that responds to | |||
other hook types. | |||
- Method :py:meth:`~earwigbot.commands.Command.setup` is called *once* with no | |||
arguments immediately after the command is first loaded. Does nothing by | |||
default; treat it like an :py:meth:`__init__` if you want | |||
(:py:meth:`~earwigbot.tasks.Command.__init__` does things by default and a | |||
dedicated setup method is often easier than overriding | |||
:py:meth:`~earwigbot.tasks.Command.__init__` and using :py:obj:`super`). | |||
- Method :py:meth:`~earwigbot.commands.Command.check` is passed a | |||
:py:class:`~earwigbot.irc.data.Data` object, and should return ``True`` if | |||
you want to respond to this message, or ``False`` otherwise. The default | |||
behavior is to return ``True`` only if :py:attr:`data.is_command` is ``True`` | |||
and :py:attr:`data.command` ``==`` | |||
:py:attr:`~earwigbot.commands.Command.name` (or :py:attr:`data.command | |||
<earwigbot.irc.data.Data.command>` is in | |||
:py:attr:`~earwigbot.commands.Command.commands` if that list is overriden; | |||
see above), which is suitable for most cases. A possible reason for | |||
overriding is if you want to do something in response to events from a | |||
specific channel only. Note that by returning ``True``, you prevent any other | |||
commands from responding to this message. | |||
- Method :py:meth:`~earwigbot.commands.Command.process` is passed the same | |||
:py:class:`~earwigbot.irc.data.Data` object as | |||
:py:meth:`~earwigbot.commands.Command.check`, but only if | |||
:py:meth:`~earwigbot.commands.Command.check` returned ``True``. This is where | |||
the bulk of your command goes. To respond to IRC messages, there are a number | |||
of methods of :py:class:`~earwigbot.commands.Command` at your disposal. See | |||
the test_ command for a simple example, or look in | |||
:py:class:`~earwigbot.commands.Command`'s | |||
:py:meth:`~earwigbot.commands.Command.__init__` method for the full list. | |||
The most common ones are :py:meth:`say(chan_or_user, msg) | |||
<earwigbot.irc.connection.IRCConnection.say>`, :py:meth:`reply(data, msg) | |||
<earwigbot.irc.connection.IRCConnection.reply>` (convenience function; sends | |||
a reply to the issuer of the command in the channel it was received), | |||
:py:meth:`action(chan_or_user, msg) | |||
<earwigbot.irc.connection.IRCConnection.action>`, | |||
:py:meth:`notice(chan_or_user, msg) | |||
<earwigbot.irc.connection.IRCConnection.notice>`, :py:meth:`join(chan) | |||
<earwigbot.irc.connection.IRCConnection.join>`, and | |||
:py:meth:`part(chan) <earwigbot.irc.connection.IRCConnection.part>`. | |||
Commands have access to :py:attr:`config.commands[command_name]` for config | |||
information, which is a node in :file:`config.yml` like every other attribute | |||
of :py:attr:`bot.config`. This can be used to store, for example, API keys or | |||
SQL connection info, so that these can be easily changed without modifying the | |||
command itself. | |||
The command *class* doesn't need a specific name, but it should logically | |||
follow the command's name. The filename doesn't matter, but it is recommended | |||
to match the command name for readability. Multiple command classes are allowed | |||
in one file. | |||
The bot has a wide selection of built-in commands and plugins to act as sample | |||
code and/or to give ideas. Start with test_, and then check out chanops_ and | |||
afc_status_ for some more complicated scripts. | |||
Custom bot tasks | |||
---------------- | |||
Custom tasks are subclasses of :py:class:`earwigbot.tasks.Task` that | |||
override :py:class:`~earwigbot.tasks.Task`'s | |||
:py:meth:`~earwigbot.tasks.Task.run` (and optionally | |||
:py:meth:`~earwigbot.tasks.Task.setup`) methods. | |||
:py:class:`~earwigbot.tasks.Task`'s docstrings should explain what each | |||
attribute and method is for and what they should be overridden with, but these | |||
are the basics: | |||
- Class attribute :py:attr:`~earwigbot.tasks.Task.name` is the name of the | |||
task. This must be specified. | |||
- Class attribute :py:attr:`~earwigbot.tasks.Task.number` can be used to store | |||
an optional "task number", possibly for use in edit summaries (to be | |||
generated with :py:meth:`~earwigbot.tasks.Task.make_summary`). For | |||
example, EarwigBot's :py:attr:`config.wiki["summary"]` is | |||
``"([[WP:BOT|Bot]]; [[User:EarwigBot#Task $1|Task $1]]): $2"``, which the | |||
task class's :py:meth:`make_summary(comment) | |||
<earwigbot.tasks.Task.make_summary>` method will take and replace | |||
``$1`` with the task number and ``$2`` with the details of the edit. | |||
Additionally, :py:meth:`~earwigbot.tasks.Task.shutoff_enabled` (which checks | |||
whether the bot has been told to stop on-wiki by checking the content of a | |||
particular page) can check a different page for each task using similar | |||
variables. EarwigBot's :py:attr:`config.wiki["shutoff"]["page"]` is | |||
``"User:$1/Shutoff/Task $2"``; ``$1`` is substituted with the bot's username, | |||
and ``$2`` is substituted with the task number, so, e.g., task #14 checks the | |||
page ``[[User:EarwigBot/Shutoff/Task 14]].`` If the page's content does *not* | |||
match :py:attr:`config.wiki["shutoff"]["disabled"]` (``"run"`` by default), | |||
then shutoff is considered to be *enabled* and | |||
:py:meth:`~earwigbot.tasks.Task.shutoff_enabled` will return ``True``, | |||
indicating the task should not run. If you don't intend to use either of | |||
these methods, feel free to leave this attribute blank. | |||
- Method :py:meth:`~earwigbot.tasks.Task.setup` is called *once* with no | |||
arguments immediately after the task is first loaded. Does nothing by | |||
default; treat it like an :py:meth:`__init__` if you want | |||
(:py:meth:`~earwigbot.tasks.Task.__init__` does things by default and a | |||
dedicated setup method is often easier than overriding | |||
:py:meth:`~earwigbot.tasks.Task.__init__` and using :py:obj:`super`). | |||
- Method :py:meth:`~earwigbot.tasks.Task.run` is called with any number of | |||
keyword arguments every time the task is executed (by | |||
:py:meth:`tasks.start(task_name, **kwargs) | |||
<earwigbot.managers.TaskManager.start>`, usually). This is where the bulk of | |||
the task's code goes. For interfacing with MediaWiki sites, read up on the | |||
:doc:`Wiki Toolset <toolset>`. | |||
Tasks have access to :py:attr:`config.tasks[task_name]` for config information, | |||
which is a node in :file:`config.yml` like every other attribute of | |||
:py:attr:`bot.config`. This can be used to store, for example, edit summaries | |||
or templates to append to user talk pages, so that these can be easily changed | |||
without modifying the task itself. | |||
The task *class* doesn't need a specific name, but it should logically follow | |||
the task's name. The filename doesn't matter, but it is recommended to match | |||
the task name for readability. Multiple tasks classes are allowed in one file. | |||
See the built-in wikiproject_tagger_ task for a relatively straightforward | |||
task, or the afc_statistics_ plugin for a more complicated one. | |||
.. _help: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/help.py | |||
.. _afc_status: https://github.com/earwig/earwigbot-plugins/blob/develop/commands/afc_status.py | |||
.. _test: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/test.py | |||
.. _chanops: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/chanops.py | |||
.. _wikiproject_tagger: https://github.com/earwig/earwigbot/blob/develop/earwigbot/tasks/wikiproject_tagger.py | |||
.. _afc_statistics: https://github.com/earwig/earwigbot-plugins/blob/develop/tasks/afc_statistics.py |
@@ -0,0 +1,48 @@ | |||
EarwigBot v0.1 Documentation | |||
============================ | |||
EarwigBot_ is a Python_ robot that edits Wikipedia_ and interacts with people | |||
over IRC_. | |||
History | |||
------- | |||
Development began, based on the `Pywikipedia framework`_, in early 2009. | |||
Approval for its fist task, a `copyright violation detector`_, was carried out | |||
in May, and the bot has been running consistently ever since (with the | |||
exception of Jan/Feb 2011). It currently handles `several ongoing tasks`_ | |||
ranging from statistics generation to category cleanup, and on-demand tasks | |||
such as WikiProject template tagging. Since it started running, the bot has | |||
made over 50,000 edits. | |||
A project to rewrite it from scratch began in early April 2011, thus moving | |||
away from the Pywikipedia framework and allowing for less overall code, better | |||
integration between bot parts, and easier maintenance. | |||
.. _EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot | |||
.. _Python: http://python.org/ | |||
.. _Wikipedia: http://en.wikipedia.org/ | |||
.. _IRC: http://en.wikipedia.org/wiki/Internet_Relay_Chat | |||
.. _Pywikipedia framework: http://pywikipediabot.sourceforge.net/ | |||
.. _copyright violation detector: http://en.wikipedia.org/wiki/Wikipedia:Bots/Requests_for_approval/EarwigBot_1 | |||
.. _several ongoing tasks: http://en.wikipedia.org/wiki/User:EarwigBot#Tasks | |||
Contents | |||
-------- | |||
.. toctree:: | |||
:maxdepth: 2 | |||
installation | |||
setup | |||
customizing | |||
toolset | |||
tips | |||
API Reference <api/modules> | |||
Indices and tables | |||
------------------ | |||
* :ref:`genindex` | |||
* :ref:`modindex` | |||
* :ref:`search` |
@@ -0,0 +1,55 @@ | |||
Installation | |||
============ | |||
This package contains the core :py:mod:`earwigbot`, abstracted enough that it | |||
should be usable and customizable by anyone running a bot on a MediaWiki site. | |||
Since it is component-based, the IRC components can be disabled if desired. IRC | |||
commands and bot tasks specific to `my instance of EarwigBot`_ that I don't | |||
feel the average user will need are available from the repository | |||
`earwigbot-plugins`_. | |||
It's recommended to run the bot's unit tests before installing. Run | |||
:command:`python setup.py test` from the project's root directory. Note that | |||
some tests require an internet connection, and others may take a while to run. | |||
Coverage is currently rather incomplete. | |||
Latest release (v0.1) | |||
--------------------- | |||
EarwigBot is available from the `Python Package Index`_, so you can install the | |||
latest release with :command:`pip install earwigbot` (`get pip`_). | |||
You can also install it from source [1]_ directly:: | |||
curl -Lo earwigbot.tgz https://github.com/earwig/earwigbot/tarball/v0.1 | |||
tar -xf earwigbot.tgz | |||
cd earwig-earwigbot-* | |||
python setup.py install | |||
cd .. | |||
rm -r earwigbot.tgz earwig-earwigbot-* | |||
Development version | |||
------------------- | |||
You can install the development version of the bot from :command:`git` by using | |||
setuptools/`distribute`_'s :command:`develop` command [1]_, probably on the | |||
``develop`` branch which contains (usually) working code. ``master`` contains | |||
the latest release. EarwigBot uses `git flow`_, so you're free to browse by | |||
tags or by new features (``feature/*`` branches):: | |||
git clone git://github.com/earwig/earwigbot.git earwigbot | |||
cd earwigbot | |||
python setup.py develop | |||
.. rubric:: Footnotes | |||
.. [1] :command:`python setup.py install`/:command:`develop` may require root, | |||
or use the :command:`--user` switch to install for the current user | |||
only. | |||
.. _my instance of EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot | |||
.. _earwigbot-plugins: https://github.com/earwig/earwigbot-plugins | |||
.. _Python Package Index: http://pypi.python.org | |||
.. _get pip: http://pypi.python.org/pypi/pip | |||
.. _distribute: http://pypi.python.org/pypi/distribute | |||
.. _git flow: http://nvie.com/posts/a-successful-git-branching-model/ |
@@ -0,0 +1,28 @@ | |||
Setup | |||
===== | |||
The bot stores its data in a "working directory", including its config file and | |||
databases. This is also the location where you will place custom IRC commands | |||
and bot tasks, which will be explained later. It doesn't matter where this | |||
directory is, as long as the bot can write to it. | |||
Start the bot with :command:`earwigbot path/to/working/dir`, or just | |||
:command:`earwigbot` if the working directory is the current directory. It will | |||
notice that no :file:`config.yml` file exists and take you through the setup | |||
process. | |||
There is currently no way to edit the :file:`config.yml` file from within the | |||
bot after it has been created, but YAML is a very straightforward format, so | |||
you should be able to make any necessary changes yourself. Check out the | |||
`explanation of YAML`_ on Wikipedia for help. | |||
After setup, the bot will start. This means it will connect to the IRC servers | |||
it has been configured for, schedule bot tasks to run at specific times, and | |||
then wait for instructions (as commands on IRC). For a list of commands, say | |||
"``!help``" (commands are messages prefixed with an exclamation mark). | |||
You can stop the bot at any time with :kbd:`Control-c`, same as you stop a | |||
normal Python program, and it will try to exit safely. You can also use the | |||
"``!quit``" command on IRC. | |||
.. _explanation of YAML: http://en.wikipedia.org/wiki/YAML |
@@ -0,0 +1,46 @@ | |||
Tips | |||
==== | |||
- Logging_ is a fantastic way to monitor the bot's progress as it runs. It has | |||
a slew of built-in loggers, and enabling log retention (so logs are saved to | |||
:file:`logs/` in the working directory) is highly recommended. In the normal | |||
setup, there are three log files, each of which "rotate" at a specific time | |||
(:file:`filename.log` becomes :file:`filename.log.2012-04-10`, for example). | |||
The :file:`debug.log` file rotates every hour, and maintains six hours of | |||
logs of every level (``DEBUG`` and up). :file:`bot.log` rotates every day at | |||
midnight, and maintains seven days of non-debug logs (``INFO`` and up). | |||
Finally, :file:`error.log` rotates every Sunday night, and maintains four | |||
weeks of logs indicating unexpected events (``WARNING`` and up). | |||
To use logging in your commands or tasks (recommended), | |||
:py:class:~earwigbot.commands.BaseCommand` and | |||
:py:class:~earwigbot.tasks.BaseTask` provide :py:attr:`logger` attributes | |||
configured for the specific command or task. If you're working with other | |||
classes, :py:attr:`bot.logger` is the root logger | |||
(:py:obj:`logging.getLogger("earwigbot")` by default), so you can use | |||
:py:func:`~logging.Logger.getChild` to make your logger. For example, task | |||
loggers are essentially | |||
:py:attr:`bot.logger.getChild("tasks").getChild(task.name) <bot.logger>`. | |||
- A very useful IRC command is "``!reload``", which reloads all commands and | |||
tasks without restarting the bot. [1]_ Combined with using the `!git plugin`_ | |||
for pulling repositories from IRC, this can provide a seamless command/task | |||
development workflow if the bot runs on an external server and you set up | |||
its working directory as a git repo. | |||
- You can run a task by itself instead of the entire bot with | |||
:command:`earwigbot path/to/working/dir --task task_name`. | |||
- Questions, comments, or suggestions about the documentation? `Let me know`_, | |||
or `create an issue`_ so I can improve it for other people. | |||
.. rubric:: Footnotes | |||
.. [1] In reality, all this does is call :py:meth:`bot.commands.load() | |||
<earwigbot.managers._ResourceManager.load>` and | |||
:py:meth:`bot.tasks.load() <earwigbot.managers._ResourceManager.load>`! | |||
.. _logging: http://docs.python.org/library/logging.html | |||
.. _!git plugin: https://github.com/earwig/earwigbot-plugins/blob/develop/commands/git.py | |||
.. _Let me know: ben.kurtovic@verizon.net | |||
.. _create an issue: https://github.com/earwig/earwigbot/issues |
@@ -0,0 +1,247 @@ | |||
The Wiki Toolset | |||
================ | |||
EarwigBot's answer to the `Pywikipedia framework`_ is the Wiki Toolset | |||
(:py:mod:`earwigbot.wiki`), which you will mainly access through | |||
:py:attr:`bot.wiki <earwigbot.bot.Bot.wiki>`. | |||
:py:attr:`bot.wiki <earwigbot.bot.Bot.wiki>` provides three methods for the | |||
management of Sites - :py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site`, | |||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site`, and | |||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.remove_site`. Sites are objects that | |||
simply represent a MediaWiki site. A single instance of EarwigBot (i.e. a | |||
single *working directory*) is expected to relate to a single site or group of | |||
sites using the same login info (like all WMF wikis with `CentralAuth`_). | |||
Load your default site (the one that you picked during setup) with | |||
``site = bot.wiki.get_site()``. | |||
Dealing with other sites | |||
~~~~~~~~~~~~~~~~~~~~~~~~ | |||
*Skip this section if you're only working with one site.* | |||
If a site is *already known to the bot* (meaning that it is stored in the | |||
:file:`sites.db` file, which includes just your default wiki at first), you can | |||
load a site with ``site = bot.wiki.get_site(name)``, where ``name`` might be | |||
``"enwiki"`` or ``"frwiktionary"`` (you can also do | |||
``site = bot.wiki.get_site(project="wikipedia", lang="en")``). Recall that not | |||
giving any arguments to ``get_site()`` will return the default site. | |||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site` is used to add new sites to | |||
the sites database. It may be called with similar arguments as | |||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site`, but the difference is | |||
important. :py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site` only needs | |||
enough information to identify the site in its database, which is usually just | |||
its name; the database stores all other necessary connection info. With | |||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site`, you need to provide enough | |||
connection info so the toolset can successfully access the site's API/SQL | |||
databases and store that information for later. That might not be much; for WMF | |||
wikis, you can usually use code like this:: | |||
project, lang = "wikipedia", "es" | |||
try: | |||
site = bot.wiki.get_site(project=project, lang=lang) | |||
except earwigbot.SiteNotFoundError: | |||
# Load site info from http://es.wikipedia.org/w/api.php: | |||
site = bot.wiki.add_site(project=project, lang=lang) | |||
This works because EarwigBot assumes that the URL for the site is | |||
``"//{lang}.{project}.org"``, the API is at ``/w/api.php``, and the SQL | |||
connection info (if any) is stored as ``config.wiki["sql"]``. This might change | |||
if you're dealing with non-WMF wikis, where the code might look something more | |||
like:: | |||
project, lang = "mywiki", "it" | |||
try: | |||
site = bot.wiki.get_site(project=project, lang=lang) | |||
except earwigbot.SiteNotFoundError: | |||
# Load site info from http://mysite.net/mywiki/it/s/api.php: | |||
base_url = "http://mysite.net/" + project + "/" + lang | |||
db_name = lang + project + "_p" | |||
sql = {host: "sql.mysite.net", db: db_name} | |||
site = bot.wiki.add_site(base_url=base_url, script_path="/s", sql=sql) | |||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.remove_site` does the opposite of | |||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site`: give it a site's name or a | |||
project/lang pair like :py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site` | |||
takes, and it'll remove that site from the sites database. | |||
Sites | |||
~~~~~ | |||
:py:class:`earwigbot.wiki.Site <earwigbot.wiki.site.Site>` objects provide the | |||
following attributes: | |||
- :py:attr:`~earwigbot.wiki.site.Site.name`: the site's name (or "wikiid"), | |||
like ``"enwiki"`` | |||
- :py:attr:`~earwigbot.wiki.site.Site.project`: the site's project name, like | |||
``"wikipedia"`` | |||
- :py:attr:`~earwigbot.wiki.site.Site.lang`: the site's language code, like | |||
``"en"`` | |||
- :py:attr:`~earwigbot.wiki.site.Site.domain`: the site's web domain, like | |||
``"en.wikipedia.org"`` | |||
- :py:attr:`~earwigbot.wiki.site.Site.url`: the site's full base URL, like | |||
``"https://en.wikipedia.org"`` | |||
and the following methods: | |||
- :py:meth:`api_query(**kwargs) <earwigbot.wiki.site.Site.api_query>`: does an | |||
API query with the given keyword arguments as params | |||
- :py:meth:`sql_query(query, params=(), ...) | |||
<earwigbot.wiki.site.Site.sql_query>`: does an SQL query and yields its | |||
results (as a generator) | |||
- :py:meth:`~earwigbot.wiki.site.Site.get_replag`: returns the estimated | |||
database replication lag (if we have the site's SQL connection info) | |||
- :py:meth:`namespace_id_to_name(id, all=False) | |||
<earwigbot.wiki.site.Site.namespace_id_to_name>`: given a namespace ID, | |||
returns the primary associated namespace name (or a list of all names when | |||
``all`` is ``True``) | |||
- :py:meth:`namespace_name_to_id(name) | |||
<earwigbot.wiki.site.Site.namespace_name_to_id>`: given a namespace name, | |||
returns the associated namespace ID | |||
- :py:meth:`get_page(title, follow_redirects=False, ...) | |||
<earwigbot.wiki.site.Site.get_page>`: returns a ``Page`` object for the given | |||
title (or a :py:class:`~earwigbot.wiki.category.Category` object if the | |||
page's namespace is "``Category:``") | |||
- :py:meth:`get_category(catname, follow_redirects=False, ...) | |||
<earwigbot.wiki.site.Site.get_category>`: returns a ``Category`` object for | |||
the given title (sans namespace) | |||
- :py:meth:`get_user(username) <earwigbot.wiki.site.Site.get_user>`: returns a | |||
:py:class:`~earwigbot.wiki.user.User` object for the given username | |||
- :py:meth:`delegate(services, ...) <earwigbot.wiki.site.Site.delegate>`: | |||
delegates a task to either the API or SQL depending on various conditions, | |||
such as server lag | |||
Pages and categories | |||
~~~~~~~~~~~~~~~~~~~~ | |||
Create :py:class:`earwigbot.wiki.Page <earwigbot.wiki.page.Page>` objects with | |||
:py:meth:`site.get_page(title) <earwigbot.wiki.site.Site.get_page>`, | |||
:py:meth:`page.toggle_talk() <earwigbot.wiki.page.Page.toggle_talk>`, | |||
:py:meth:`user.get_userpage() <earwigbot.wiki.user.User.get_userpage>`, or | |||
:py:meth:`user.get_talkpage() <earwigbot.wiki.user.User.get_talkpage>`. They | |||
provide the following attributes: | |||
- :py:attr:`~earwigbot.wiki.page.Page.site`: the page's corresponding | |||
:py:class:`~earwigbot.wiki.site.Site` object | |||
- :py:attr:`~earwigbot.wiki.page.Page.title`: the page's title, or pagename | |||
- :py:attr:`~earwigbot.wiki.page.Page.exists`: whether or not the page exists | |||
- :py:attr:`~earwigbot.wiki.page.Page.pageid`: an integer ID representing the | |||
page | |||
- :py:attr:`~earwigbot.wiki.page.Page.url`: the page's URL | |||
- :py:attr:`~earwigbot.wiki.page.Page.namespace`: the page's namespace as an | |||
integer | |||
- :py:attr:`~earwigbot.wiki.page.Page.protection`: the page's current | |||
protection status | |||
- :py:attr:`~earwigbot.wiki.page.Page.is_talkpage`: ``True`` if the page is a | |||
talkpage, else ``False`` | |||
- :py:attr:`~earwigbot.wiki.page.Page.is_redirect`: ``True`` if the page is a | |||
redirect, else ``False`` | |||
and the following methods: | |||
- :py:meth:`~earwigbot.wiki.page.Page.reload`: forcibly reloads the page's | |||
attributes (emphasis on *reload* - this is only necessary if there is reason | |||
to believe they have changed) | |||
- :py:meth:`toggle_talk(...) <earwigbot.wiki.page.Page.toggle_talk>`: returns a | |||
content page's talk page, or vice versa | |||
- :py:meth:`~earwigbot.wiki.page.Page.get`: returns page content | |||
- :py:meth:`~earwigbot.wiki.page.Page.get_redirect_target`: if the page is a | |||
redirect, returns its destination | |||
- :py:meth:`~earwigbot.wiki.page.Page.get_creator`: returns a | |||
:py:class:`~earwigbot.wiki.user.User` object representing the first user to | |||
edit the page | |||
- :py:meth:`edit(text, summary, minor=False, bot=True, force=False) | |||
<earwigbot.wiki.page.Page.edit>`: replaces the page's content with ``text`` | |||
or creates a new page | |||
- :py:meth:`add_section(text, title, minor=False, bot=True, force=False) | |||
<earwigbot.wiki.page.Page.add_section>`: adds a new section named ``title`` | |||
at the bottom of the page | |||
- :py:meth:`copyvio_check(...) | |||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>`: checks the page for | |||
copyright violations | |||
- :py:meth:`copyvio_compare(url, ...) | |||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`: checks the page like | |||
:py:meth:`~earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check`, but | |||
against a specific URL | |||
- :py:meth:`check_exclusion(username=None, optouts=None) | |||
<earwigbot.wiki.page.Page.check_exclusion>`: checks whether or not we are | |||
allowed to edit the page per ``{{bots}}``/``{{nobots}}`` | |||
Additionally, :py:class:`~earwigbot.wiki.category.Category` objects (created | |||
with :py:meth:`site.get_category(name) <earwigbot.wiki.site.Site.get_category>` | |||
or :py:meth:`site.get_page(title) <earwigbot.wiki.site.Site.get_page>` where | |||
``title`` is in the ``Category:`` namespace) provide the following additional | |||
attributes: | |||
- :py:attr:`~earwigbot.wiki.category.Category.size`: the total number of | |||
members in the category | |||
- :py:attr:`~earwigbot.wiki.category.Category.pages`: the number of pages in | |||
the category | |||
- :py:attr:`~earwigbot.wiki.category.Category.files`: the number of files in | |||
the category | |||
- :py:attr:`~earwigbot.wiki.category.Category.subcats`: the number of | |||
subcategories in the category | |||
And the following additional method: | |||
- :py:meth:`get_members(limit=None, ...) | |||
<earwigbot.wiki.category.Category.get_members>`: iterates over | |||
:py:class:`~earwigbot.wiki.page.Page`\ s in the category, until either the | |||
category is exhausted or (if given) ``limit`` is reached | |||
Users | |||
~~~~~ | |||
Create :py:class:`earwigbot.wiki.User <earwigbot.wiki.user.User>` objects with | |||
:py:meth:`site.get_user(name) <earwigbot.wiki.site.Site.get_user>` or | |||
:py:meth:`page.get_creator() <earwigbot.wiki.page.Page.get_creator>`. They | |||
provide the following attributes: | |||
- :py:attr:`~earwigbot.wiki.user.User.site`: the user's corresponding | |||
:py:class:`~earwigbot.wiki.site.Site` object | |||
- :py:attr:`~earwigbot.wiki.user.User.name`: the user's username | |||
- :py:attr:`~earwigbot.wiki.user.User.exists`: ``True`` if the user exists, or | |||
``False`` if they do not | |||
- :py:attr:`~earwigbot.wiki.user.User.userid`: an integer ID representing the | |||
user | |||
- :py:attr:`~earwigbot.wiki.user.User.blockinfo`: information about any current | |||
blocks on the user (``False`` if no block, or a dict of | |||
``{"by": blocking_user, "reason": block_reason, | |||
"expiry": block_expire_time}``) | |||
- :py:attr:`~earwigbot.wiki.user.User.groups`: a list of the user's groups | |||
- :py:attr:`~earwigbot.wiki.user.User.rights`: a list of the user's rights | |||
- :py:attr:`~earwigbot.wiki.user.User.editcount`: the number of edits made by | |||
the user | |||
- :py:attr:`~earwigbot.wiki.user.User.registration`: the time the user | |||
registered as a :py:obj:`time.struct_time` | |||
- :py:attr:`~earwigbot.wiki.user.User.emailable`: ``True`` if you can email the | |||
user, ``False`` if you cannot | |||
- :py:attr:`~earwigbot.wiki.user.User.gender`: the user's gender (``"male"``, | |||
``"female"``, or ``"unknown"``) | |||
- :py:attr:`~earwigbot.wiki.user.User.is_ip`: ``True`` if the user is an IP | |||
address, IPv4 or IPv6, otherwise ``False`` | |||
and the following methods: | |||
- :py:meth:`~earwigbot.wiki.user.User.reload`: forcibly reloads the user's | |||
attributes (emphasis on *reload* - this is only necessary if there is reason | |||
to believe they have changed) | |||
- :py:meth:`~earwigbot.wiki.user.User.get_userpage`: returns a | |||
:py:class:`~earwigbot.wiki.page.Page` object representing the user's userpage | |||
- :py:meth:`~earwigbot.wiki.user.User.get_talkpage`: returns a | |||
:py:class:`~earwigbot.wiki.page.Page` object representing the user's talkpage | |||
Additional features | |||
~~~~~~~~~~~~~~~~~~~ | |||
Not all aspects of the toolset are covered here. Explore `its code and | |||
docstrings`_ to learn how to use it in a more hands-on fashion. For reference, | |||
:py:attr:`bot.wiki <earwigbot.bot.Bot.wiki>` is an instance of | |||
:py:class:`earwigbot.wiki.SitesDB <earwigbot.wiki.sitesdb.SitesDB>` tied to the | |||
:file:`sites.db` file in the bot's working directory. | |||
.. _Pywikipedia framework: http://pywikipediabot.sourceforge.net/ | |||
.. _CentralAuth: http://www.mediawiki.org/wiki/Extension:CentralAuth | |||
.. _its code and docstrings: https://github.com/earwig/earwigbot/tree/develop/earwigbot/wiki |
@@ -1,22 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
import time | |||
from subprocess import * | |||
try: | |||
from config import irc, main, schedule, secure, watcher | |||
except ImportError: | |||
print """Missing a config file! Make sure you have configured the bot. All *.py.default files in config/ | |||
should have their .default extension removed, and the info inside should be corrected.""" | |||
exit() | |||
def main(): | |||
while 1: | |||
call(['python', 'core/main.py']) | |||
time.sleep(5) # sleep for five seconds between bot runs | |||
if __name__ == "__main__": | |||
try: | |||
main() | |||
except KeyboardInterrupt: | |||
exit("\nKeyboardInterrupt: stopping bot wrapper.") |
@@ -0,0 +1,68 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
""" | |||
`EarwigBot <https://github.com/earwig/earwigbot>`_ is a Python robot that edits | |||
Wikipedia and interacts with people over IRC. | |||
See :file:`README.rst` for an overview, or the :file:`docs/` directory for | |||
details. This documentation is also available `online | |||
<http://packages.python.org/earwigbot>`_. | |||
""" | |||
__author__ = "Ben Kurtovic" | |||
__copyright__ = "Copyright (C) 2009, 2010, 2011, 2012 Ben Kurtovic" | |||
__license__ = "MIT License" | |||
__version__ = "0.1" | |||
__email__ = "ben.kurtovic@verizon.net" | |||
__release__ = True | |||
if not __release__: | |||
def _get_git_commit_id(): | |||
"""Return the ID of the git HEAD commit.""" | |||
from git import Repo | |||
from os.path import split, dirname | |||
path = split(dirname(__file__))[0] | |||
commit_id = Repo(path).head.object.hexsha | |||
return commit_id[:8] | |||
try: | |||
__version__ += ".git+" + _get_git_commit_id() | |||
except Exception: | |||
pass | |||
finally: | |||
del _get_git_commit_id | |||
from earwigbot import lazy | |||
importer = lazy.LazyImporter() | |||
bot = importer.new("earwigbot.bot") | |||
commands = importer.new("earwigbot.commands") | |||
config = importer.new("earwigbot.config") | |||
exceptions = importer.new("earwigbot.exceptions") | |||
irc = importer.new("earwigbot.irc") | |||
managers = importer.new("earwigbot.managers") | |||
tasks = importer.new("earwigbot.tasks") | |||
util = importer.new("earwigbot.util") | |||
wiki = importer.new("earwigbot.wiki") | |||
del importer |
@@ -0,0 +1,222 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import logging | |||
from threading import Lock, Thread, enumerate as enumerate_threads | |||
from time import sleep, time | |||
from earwigbot import __version__ | |||
from earwigbot.config import BotConfig | |||
from earwigbot.irc import Frontend, Watcher | |||
from earwigbot.managers import CommandManager, TaskManager | |||
from earwigbot.wiki import SitesDB | |||
__all__ = ["Bot"] | |||
class Bot(object): | |||
""" | |||
**EarwigBot: Main Bot Class** | |||
The :py:class:`Bot` class is the core of EarwigBot, essentially responsible | |||
for starting the various bot components and making sure they are all happy. | |||
EarwigBot has three components that can run independently of each other: an | |||
IRC front-end, an IRC watcher, and a wiki scheduler. | |||
- The IRC front-end runs on a normal IRC server and expects users to | |||
interact with it/give it commands. | |||
- The IRC watcher runs on a wiki recent-changes server and listens for | |||
edits. Users cannot interact with this part of the bot. | |||
- The wiki scheduler runs wiki-editing bot tasks in separate threads at | |||
user-defined times through a cron-like interface. | |||
The :py:class:`Bot` object is accessible from within commands and tasks as | |||
:py:attr:`self.bot`. This is the primary way to access data from other | |||
components of the bot. For example, our | |||
:py:class:`~earwigbot.config.BotConfig` object is accessable from | |||
:py:attr:`bot.config`, tasks can be started with | |||
:py:meth:`bot.tasks.start() <earwigbot.managers.TaskManager.start>`, and | |||
sites can be loaded from the wiki toolset with | |||
:py:meth:`bot.wiki.get_site() <earwigbot.wiki.sitesdb.SitesDB.get_site>`. | |||
""" | |||
def __init__(self, root_dir, level=logging.INFO): | |||
self.config = BotConfig(self, root_dir, level) | |||
self.logger = logging.getLogger("earwigbot") | |||
self.commands = CommandManager(self) | |||
self.tasks = TaskManager(self) | |||
self.wiki = SitesDB(self) | |||
self.frontend = None | |||
self.watcher = None | |||
self.component_lock = Lock() | |||
self._keep_looping = True | |||
self.config.load() | |||
self.commands.load() | |||
self.tasks.load() | |||
def __repr__(self): | |||
"""Return the canonical string representation of the Bot.""" | |||
return "Bot(config={0!r})".format(self.config) | |||
def __str__(self): | |||
"""Return a nice string representation of the Bot.""" | |||
return "<Bot at {0}>".format(self.config.root_dir) | |||
def _dispatch_irc_component(self, name, klass): | |||
"""Create a new IRC component, record it internally, and start it.""" | |||
component = klass(self) | |||
setattr(self, name, component) | |||
Thread(name="irc_" + name, target=component.loop).start() | |||
def _start_irc_components(self): | |||
"""Start the IRC frontend/watcher in separate threads if enabled.""" | |||
if self.config.components.get("irc_frontend"): | |||
self.logger.info("Starting IRC frontend") | |||
self._dispatch_irc_component("frontend", Frontend) | |||
if self.config.components.get("irc_watcher"): | |||
self.logger.info("Starting IRC watcher") | |||
self._dispatch_irc_component("watcher", Watcher) | |||
def _start_wiki_scheduler(self): | |||
"""Start the wiki scheduler in a separate thread if enabled.""" | |||
def wiki_scheduler(): | |||
while self._keep_looping: | |||
time_start = time() | |||
self.tasks.schedule() | |||
time_end = time() | |||
time_diff = time_start - time_end | |||
if time_diff < 60: # Sleep until the next minute | |||
sleep(60 - time_diff) | |||
if self.config.components.get("wiki_scheduler"): | |||
self.logger.info("Starting wiki scheduler") | |||
thread = Thread(name="wiki_scheduler", target=wiki_scheduler) | |||
thread.daemon = True # Stop if other threads stop | |||
thread.start() | |||
def _keep_irc_component_alive(self, name, klass): | |||
"""Ensure that IRC components stay connected, else restart them.""" | |||
component = getattr(self, name) | |||
if component: | |||
component.keep_alive() | |||
if component.is_stopped(): | |||
log = "IRC {0} has stopped; restarting".format(name) | |||
self.logger.warn(log) | |||
self._dispatch_irc_component(name, klass) | |||
def _stop_irc_components(self, msg): | |||
"""Request the IRC frontend and watcher to stop if enabled.""" | |||
if self.frontend: | |||
self.frontend.stop(msg) | |||
if self.watcher: | |||
self.watcher.stop(msg) | |||
def _stop_daemon_threads(self): | |||
"""Notify the user of which threads are going to be killed. | |||
Unfortunately, there is no method right now of stopping command and | |||
task threads safely. This is because there is no way to tell them to | |||
stop like the IRC components can be told; furthermore, they are run as | |||
daemons, and daemon threads automatically stop without calling any | |||
__exit__ or try/finally code when all non-daemon threads stop. They | |||
were originally implemented as regular non-daemon threads, but this | |||
meant there was no way to completely stop the bot if tasks were | |||
running, because all other threads would exit and threading would | |||
absorb KeyboardInterrupts. | |||
The advantage of this is that stopping the bot is truly guarenteed to | |||
*stop* the bot, while the disadvantage is that the threads are given no | |||
advance warning of their forced shutdown. | |||
""" | |||
tasks = [] | |||
component_names = self.config.components.keys() | |||
skips = component_names + ["MainThread", "reminder", "irc:quit"] | |||
for thread in enumerate_threads(): | |||
if thread.name not in skips and thread.is_alive(): | |||
tasks.append(thread.name) | |||
if tasks: | |||
log = "The following commands or tasks will be killed: {0}" | |||
self.logger.warn(log.format(" ".join(tasks))) | |||
@property | |||
def is_running(self): | |||
"""Whether or not the bot is currently running. | |||
This may return ``False`` even if the bot is still technically active, | |||
but in the process of shutting down. | |||
""" | |||
return self._keep_looping | |||
def run(self): | |||
"""Main entry point into running the bot. | |||
Starts all config-enabled components and then enters an idle loop, | |||
ensuring that all components remain online and restarting components | |||
that get disconnected from their servers. | |||
""" | |||
self.logger.info("Starting bot (EarwigBot {0})".format(__version__)) | |||
self._start_irc_components() | |||
self._start_wiki_scheduler() | |||
while self._keep_looping: | |||
with self.component_lock: | |||
self._keep_irc_component_alive("frontend", Frontend) | |||
self._keep_irc_component_alive("watcher", Watcher) | |||
sleep(2) | |||
def restart(self, msg=None): | |||
"""Reload config, commands, tasks, and safely restart IRC components. | |||
This is thread-safe, and it will gracefully stop IRC components before | |||
reloading anything. Note that you can safely reload commands or tasks | |||
without restarting the bot with :py:meth:`bot.commands.load() | |||
<earwigbot.managers._ResourceManager.load>` or | |||
:py:meth:`bot.tasks.load() <earwigbot.managers._ResourceManager.load>`. | |||
These should not interfere with running components or tasks. | |||
If given, *msg* will be used as our quit message. | |||
""" | |||
if msg: | |||
self.logger.info('Restarting bot ("{0}")'.format(msg)) | |||
else: | |||
self.logger.info("Restarting bot") | |||
with self.component_lock: | |||
self._stop_irc_components(msg) | |||
self.config.load() | |||
self.commands.load() | |||
self.tasks.load() | |||
self._start_irc_components() | |||
def stop(self, msg=None): | |||
"""Gracefully stop all bot components. | |||
If given, *msg* will be used as our quit message. | |||
""" | |||
if msg: | |||
self.logger.info('Stopping bot ("{0}")'.format(msg)) | |||
else: | |||
self.logger.info("Stopping bot") | |||
with self.component_lock: | |||
self._stop_irc_components(msg) | |||
self._keep_looping = False | |||
self._stop_daemon_threads() |
@@ -0,0 +1,122 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
__all__ = ["Command"] | |||
class Command(object): | |||
""" | |||
**EarwigBot: Base IRC Command** | |||
This package provides built-in IRC "commands" used by the bot's front-end | |||
component. Additional commands can be installed as plugins in the bot's | |||
working directory. | |||
This class (import with ``from earwigbot.commands import Command``), can be | |||
subclassed to create custom IRC commands. | |||
This docstring is reported to the user when they type ``"!help | |||
<command>"``. | |||
""" | |||
# The command's name, as reported to the user when they use !help: | |||
name = None | |||
# A list of names that will trigger this command. If left empty, it will | |||
# be triggered by the command's name and its name only: | |||
commands = [] | |||
# Hooks are "msg", "msg_private", "msg_public", and "join". "msg" is the | |||
# default behavior; if you wish to override that, change the value in your | |||
# command subclass: | |||
hooks = ["msg"] | |||
def __init__(self, bot): | |||
"""Constructor for new commands. | |||
This is called once when the command is loaded (from | |||
:py:meth:`commands.load() <earwigbot.managers._ResourceManager.load>`). | |||
*bot* is out base :py:class:`~earwigbot.bot.Bot` object. Don't override | |||
this directly; if you do, remember to place | |||
``super(Command, self).__init()`` first. Use :py:meth:`setup` for | |||
typical command-init/setup needs. | |||
""" | |||
self.bot = bot | |||
self.config = bot.config | |||
self.logger = bot.commands.logger.getChild(self.name) | |||
# Convenience functions: | |||
self.say = lambda target, msg, hidelog=False: self.bot.frontend.say(target, msg, hidelog) | |||
self.reply = lambda data, msg, hidelog=False: self.bot.frontend.reply(data, msg, hidelog) | |||
self.action = lambda target, msg, hidelog=False: self.bot.frontend.action(target, msg, hidelog) | |||
self.notice = lambda target, msg, hidelog=False: self.bot.frontend.notice(target, msg, hidelog) | |||
self.join = lambda chan, hidelog=False: self.bot.frontend.join(chan, hidelog) | |||
self.part = lambda chan, msg=None, hidelog=False: self.bot.frontend.part(chan, msg, hidelog) | |||
self.mode = lambda t, level, msg, hidelog=False: self.bot.frontend.mode(t, level, msg, hidelog) | |||
self.ping = lambda target, hidelog=False: self.bot.frontend.ping(target, hidelog) | |||
self.pong = lambda target, hidelog=False: self.bot.frontend.pong(target, hidelog) | |||
self.setup() | |||
def __repr__(self): | |||
"""Return the canonical string representation of the Command.""" | |||
res = "Command(name={0!r}, commands={1!r}, hooks={2!r}, bot={3!r})" | |||
return res.format(self.name, self.commands, self.hooks, self.bot) | |||
def __str__(self): | |||
"""Return a nice string representation of the Command.""" | |||
return "<Command {0} of {1}>".format(self.name, self.bot) | |||
def setup(self): | |||
"""Hook called immediately after the command is loaded. | |||
Does nothing by default; feel free to override. | |||
""" | |||
pass | |||
def check(self, data): | |||
"""Return whether this command should be called in response to *data*. | |||
Given a :py:class:`~earwigbot.irc.data.Data` instance, return ``True`` | |||
if we should respond to this activity, or ``False`` if we should ignore | |||
it and move on. Be aware that since this is called for each message | |||
sent on IRC, it should be cheap to execute and unlikely to throw | |||
exceptions. | |||
Most commands return ``True`` only if :py:attr:`data.command | |||
<earwigbot.irc.data.Data.command>` ``==`` :py:attr:`self.name <name>`, | |||
or :py:attr:`data.command <earwigbot.irc.data.Data.command>` is in | |||
:py:attr:`self.commands <commands>` if that list is overriden. This is | |||
the default behavior; you should only override it if you wish to change | |||
that. | |||
""" | |||
if self.commands: | |||
return data.is_command and data.command in self.commands | |||
return data.is_command and data.command == self.name | |||
def process(self, data): | |||
"""Main entry point for doing a command. | |||
Handle an activity (usually a message) on IRC. At this point, thanks | |||
to :py:meth:`check` which is called automatically by the command | |||
handler, we know this is something we should respond to. Place your | |||
command's body here. | |||
""" | |||
pass |
@@ -0,0 +1,142 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import re | |||
from earwigbot.commands import Command | |||
class Access(Command): | |||
"""Control and get info on who can access the bot.""" | |||
name = "access" | |||
commands = ["access", "permission", "permissions", "perm", "perms"] | |||
def process(self, data): | |||
if not data.args: | |||
self.reply(data, "Subcommands are self, list, add, remove.") | |||
return | |||
permdb = self.config.irc["permissions"] | |||
if data.args[0] == "self": | |||
self.do_self(data, permdb) | |||
elif data.args[0] == "list": | |||
self.do_list(data, permdb) | |||
elif data.args[0] == "add": | |||
self.do_add(data, permdb) | |||
elif data.args[0] == "remove": | |||
self.do_remove(data, permdb) | |||
else: | |||
msg = "Unknown subcommand \x0303{0}\x0F.".format(data.args[0]) | |||
self.reply(data, msg) | |||
def do_self(self, data, permdb): | |||
if permdb.is_owner(data): | |||
msg = "You are a bot owner (matching rule \x0302{0}\x0F)." | |||
self.reply(data, msg.format(permdb.is_owner(data))) | |||
elif permdb.is_admin(data): | |||
msg = "You are a bot admin (matching rule \x0302{0}\x0F)." | |||
self.reply(data, msg.format(permdb.is_admin(data))) | |||
else: | |||
self.reply(data, "You do not match any bot access rules.") | |||
def do_list(self, data, permdb): | |||
if len(data.args) > 1: | |||
if data.args[1] in ["owner", "owners"]: | |||
name, rules = "owners", permdb.data.get(permdb.OWNER) | |||
elif data.args[1] in ["admin", "admins"]: | |||
name, rules = "admins", permdb.data.get(permdb.ADMIN) | |||
else: | |||
msg = "Unknown access level \x0302{0}\x0F." | |||
self.reply(data, msg.format(data.args[1])) | |||
return | |||
if rules: | |||
msg = "Bot {0}: {1}.".format(name, ", ".join(map(str, rules))) | |||
else: | |||
msg = "No bot {0}.".format(name) | |||
self.reply(data, msg) | |||
else: | |||
owners = len(permdb.data.get(permdb.OWNER, [])) | |||
admins = len(permdb.data.get(permdb.ADMIN, [])) | |||
msg = "There are {0} bot owners and {1} bot admins. Use '!{2} list owners' or '!{2} list admins' for details." | |||
self.reply(data, msg.format(owners, admins, data.command)) | |||
def do_add(self, data, permdb): | |||
user = self.get_user_from_args(data, permdb) | |||
if user: | |||
nick, ident, host = user | |||
if data.args[1] in ["owner", "owners"]: | |||
name, level, adder = "owner", permdb.OWNER, permdb.add_owner | |||
else: | |||
name, level, adder = "admin", permdb.ADMIN, permdb.add_admin | |||
if permdb.has_exact(level, nick, ident, host): | |||
rule = "{0}!{1}@{2}".format(nick, ident, host) | |||
msg = "\x0302{0}\x0F is already a bot {1}.".format(rule, name) | |||
self.reply(data, msg) | |||
else: | |||
rule = adder(nick, ident, host) | |||
msg = "Added bot {0} \x0302{1}\x0F.".format(name, rule) | |||
self.reply(data, msg) | |||
def do_remove(self, data, permdb): | |||
user = self.get_user_from_args(data, permdb) | |||
if user: | |||
nick, ident, host = user | |||
if data.args[1] in ["owner", "owners"]: | |||
name, rmver = "owner", permdb.remove_owner | |||
else: | |||
name, rmver = "admin", permdb.remove_admin | |||
rule = rmver(nick, ident, host) | |||
if rule: | |||
msg = "Removed bot {0} \x0302{1}\x0F.".format(name, rule) | |||
self.reply(data, msg) | |||
else: | |||
rule = "{0}!{1}@{2}".format(nick, ident, host) | |||
msg = "No bot {0} matching \x0302{1}\x0F.".format(name, rule) | |||
self.reply(data, msg) | |||
def get_user_from_args(self, data, permdb): | |||
if not permdb.is_owner(data): | |||
msg = "You must be a bot owner to add users to the access list." | |||
self.reply(data, msg) | |||
return | |||
levels = ["owner", "owners", "admin", "admins"] | |||
if len(data.args) == 1 or data.args[1] not in levels: | |||
msg = "Please specify an access level ('owners' or 'admins')." | |||
self.reply(data, msg) | |||
return | |||
if len(data.args) == 2: | |||
self.no_arg_error(data) | |||
return | |||
kwargs = data.kwargs | |||
if "nick" in kwargs or "ident" in kwargs or "host" in kwargs: | |||
nick = kwargs.get("nick", "*") | |||
ident = kwargs.get("ident", "*") | |||
host = kwargs.get("host", "*") | |||
return nick, ident, host | |||
user = re.match(r"(.*?)!(.*?)@(.*?)$", data.args[2]) | |||
if not user: | |||
self.no_arg_error(data) | |||
return | |||
return user.group(1), user.group(2), user.group(3) | |||
def no_arg_error(self, data): | |||
msg = 'Please specify a user, either as "\x0302nick\x0F!\x0302ident\x0F@\x0302host\x0F"' | |||
msg += ' or "nick=\x0302nick\x0F, ident=\x0302ident\x0F, host=\x0302host\x0F".' | |||
self.reply(data, msg) |
@@ -0,0 +1,83 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import re | |||
import urllib | |||
from earwigbot.commands import Command | |||
class Calc(Command): | |||
"""A somewhat advanced calculator: see http://futureboy.us/fsp/frink.fsp | |||
for details.""" | |||
name = "calc" | |||
def process(self, data): | |||
if not data.args: | |||
self.reply(data, "What do you want me to calculate?") | |||
return | |||
query = ' '.join(data.args) | |||
query = self.cleanup(query) | |||
url = "http://futureboy.us/fsp/frink.fsp?fromVal={0}" | |||
url = url.format(urllib.quote(query)) | |||
result = urllib.urlopen(url).read() | |||
r_result = re.compile(r'(?i)<A NAME=results>(.*?)</A>') | |||
r_tag = re.compile(r'<\S+.*?>') | |||
match = r_result.search(result) | |||
if not match: | |||
self.reply(data, "Calculation error.") | |||
return | |||
result = match.group(1) | |||
result = r_tag.sub("", result) # strip span.warning tags | |||
result = result.replace(">", ">") | |||
result = result.replace("(undefined symbol)", "(?) ") | |||
result = result.strip() | |||
if not result: | |||
result = '?' | |||
elif " in " in query: | |||
result += " " + query.split(" in ", 1)[1] | |||
res = "%s = %s" % (query, result) | |||
self.reply(data, res) | |||
def cleanup(self, query): | |||
fixes = [ | |||
(' in ', ' -> '), | |||
(' over ', ' / '), | |||
(u'£', 'GBP '), | |||
(u'€', 'EUR '), | |||
('\$', 'USD '), | |||
(r'\bKB\b', 'kilobytes'), | |||
(r'\bMB\b', 'megabytes'), | |||
(r'\bGB\b', 'kilobytes'), | |||
('kbps', '(kilobits / second)'), | |||
('mbps', '(megabits / second)') | |||
] | |||
for original, fix in fixes: | |||
query = re.sub(original, fix, query) | |||
return query.strip() |
@@ -0,0 +1,91 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from earwigbot.commands import Command | |||
class ChanOps(Command): | |||
"""Voice, devoice, op, or deop users in the channel, or join or part from | |||
other channels.""" | |||
name = "chanops" | |||
commands = ["chanops", "voice", "devoice", "op", "deop", "join", "part"] | |||
def process(self, data): | |||
if data.command == "chanops": | |||
msg = "Available commands are !voice, !devoice, !op, !deop, !join, and !part." | |||
self.reply(data, msg) | |||
return | |||
de_escalate = data.command in ["devoice", "deop"] | |||
if de_escalate and (not data.args or data.args[0] == data.nick): | |||
target = data.nick | |||
elif not self.config.irc["permissions"].is_admin(data): | |||
self.reply(data, "You must be a bot admin to use this command.") | |||
return | |||
if data.command == "join": | |||
self.do_join(data) | |||
elif data.command == "part": | |||
self.do_part(data) | |||
else: | |||
# If it is just !op/!devoice/whatever without arguments, assume | |||
# they want to do this to themselves: | |||
if not data.args: | |||
target = data.nick | |||
else: | |||
target = data.args[0] | |||
command = data.command.upper() | |||
self.say("ChanServ", " ".join((command, data.chan, target))) | |||
log = "{0} requested {1} on {2} in {3}" | |||
self.logger.info(log.format(data.nick, command, target, data.chan)) | |||
def do_join(self, data): | |||
if data.args: | |||
channel = data.args[0] | |||
if not channel.startswith("#"): | |||
channel = "#" + channel | |||
else: | |||
msg = "You must specify a channel to join or part from." | |||
self.reply(data, msg) | |||
return | |||
self.join(channel) | |||
log = "{0} requested JOIN to {1}".format(data.nick, channel) | |||
self.logger.info(log) | |||
def do_part(self, data): | |||
channel = data.chan | |||
reason = None | |||
if data.args: | |||
if data.args[0].startswith("#"): | |||
# "!part #channel reason for parting" | |||
channel = data.args[0] | |||
if data.args[1:]: | |||
reason = " ".join(data.args[1:]) | |||
else: # "!part reason for parting"; assume current channel | |||
reason = " ".join(data.args) | |||
msg = "Requested by {0}".format(data.nick) | |||
log = "{0} requested PART from {1}".format(data.nick, channel) | |||
if reason: | |||
msg += ": {0}".format(reason) | |||
log += ' ("{0}")'.format(reason) | |||
self.part(channel, msg) | |||
self.logger.info(log) |
@@ -0,0 +1,79 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import hashlib | |||
from Crypto.Cipher import Blowfish | |||
from earwigbot.commands import Command | |||
class Crypt(Command): | |||
"""Provides hash functions with !hash (!hash list for supported algorithms) | |||
and Blowfish encryption with !encrypt and !decrypt.""" | |||
name = "crypt" | |||
commands = ["crypt", "hash", "encrypt", "decrypt"] | |||
def process(self, data): | |||
if data.command == "crypt": | |||
msg = "Available commands are !hash, !encrypt, and !decrypt." | |||
self.reply(data, msg) | |||
return | |||
if not data.args: | |||
msg = "What do you want me to {0}?".format(data.command) | |||
self.reply(data, msg) | |||
return | |||
if data.command == "hash": | |||
algo = data.args[0] | |||
if algo == "list": | |||
algos = ', '.join(hashlib.algorithms) | |||
msg = algos.join(("Supported algorithms: ", ".")) | |||
self.reply(data, msg) | |||
elif algo in hashlib.algorithms: | |||
string = ' '.join(data.args[1:]) | |||
result = getattr(hashlib, algo)(string).hexdigest() | |||
self.reply(data, result) | |||
else: | |||
msg = "Unknown algorithm: '{0}'.".format(algo) | |||
self.reply(data, msg) | |||
else: | |||
key = data.args[0] | |||
text = " ".join(data.args[1:]) | |||
if not text: | |||
msg = "A key was provided, but text to {0} was not." | |||
self.reply(data, msg.format(data.command)) | |||
return | |||
cipher = Blowfish.new(hashlib.sha256(key).digest()) | |||
try: | |||
if data.command == "encrypt": | |||
if len(text) % 8: | |||
pad = 8 - len(text) % 8 | |||
text = text.ljust(len(text) + pad, "\x00") | |||
self.reply(data, cipher.encrypt(text).encode("hex")) | |||
else: | |||
self.reply(data, cipher.decrypt(text.decode("hex"))) | |||
except ValueError as error: | |||
self.reply(data, error.message) |
@@ -0,0 +1,68 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import platform | |||
import time | |||
from earwigbot import __version__ | |||
from earwigbot.commands import Command | |||
class CTCP(Command): | |||
"""Not an actual command; this module implements responses to the CTCP | |||
requests PING, TIME, and VERSION.""" | |||
name = "ctcp" | |||
hooks = ["msg_private"] | |||
def check(self, data): | |||
if data.is_command and data.command == "ctcp": | |||
return True | |||
commands = ["PING", "TIME", "VERSION"] | |||
msg = data.line[3] | |||
if msg[:2] == ":\x01" and msg[2:].rstrip("\x01") in commands: | |||
return True | |||
return False | |||
def process(self, data): | |||
if data.is_command: | |||
return | |||
target = data.nick | |||
command = data.line[3][1:].strip("\x01") | |||
if command == "PING": | |||
msg = " ".join(data.line[4:]) | |||
if msg: | |||
self.notice(target, "\x01PING {0}\x01".format(msg)) | |||
else: | |||
self.notice(target, "\x01PING\x01") | |||
elif command == "TIME": | |||
ts = time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.localtime()) | |||
self.notice(target, "\x01TIME {0}\x01".format(ts)) | |||
elif command == "VERSION": | |||
default = "EarwigBot - $1 - Python/$2 https://github.com/earwig/earwigbot" | |||
vers = self.config.irc.get("version", default) | |||
vers = vers.replace("$1", __version__) | |||
vers = vers.replace("$2", platform.python_version()) | |||
self.notice(target, "\x01VERSION {0}\x01".format(vers)) |
@@ -0,0 +1,181 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import re | |||
from earwigbot import exceptions | |||
from earwigbot.commands import Command | |||
class Dictionary(Command): | |||
"""Define words and stuff.""" | |||
name = "dictionary" | |||
commands = ["dict", "dictionary", "define"] | |||
def process(self, data): | |||
if not data.args: | |||
self.reply(data, "What do you want me to define?") | |||
return | |||
term = " ".join(data.args) | |||
lang = self.bot.wiki.get_site().lang | |||
try: | |||
defined = self.define(term, lang) | |||
except exceptions.APIError: | |||
msg = "Cannot find a {0}-language Wiktionary." | |||
self.reply(data, msg.format(lang)) | |||
else: | |||
self.reply(data, defined.encode("utf8")) | |||
def define(self, term, lang, tries=2): | |||
try: | |||
site = self.bot.wiki.get_site(project="wiktionary", lang=lang) | |||
except exceptions.SiteNotFoundError: | |||
site = self.bot.wiki.add_site(project="wiktionary", lang=lang) | |||
page = site.get_page(term, follow_redirects=True) | |||
try: | |||
entry = page.get() | |||
except (exceptions.PageNotFoundError, exceptions.InvalidPageError): | |||
if term.lower() != term and tries: | |||
return self.define(term.lower(), lang, tries - 1) | |||
if term.capitalize() != term and tries: | |||
return self.define(term.capitalize(), lang, tries - 1) | |||
return "No definition found." | |||
level, languages = self.get_languages(entry) | |||
if not languages: | |||
return u"Couldn't parse {0}!".format(page.url) | |||
result = [] | |||
for lang, section in sorted(languages.items()): | |||
definition = self.get_definition(section, level) | |||
result.append(u"({0}) {1}".format(lang, definition)) | |||
return u"; ".join(result) | |||
def get_languages(self, entry, level=2): | |||
regex = r"(?:\A|\n)==\s*([a-zA-Z0-9_ ]*?)\s*==(?:\Z|\n)" | |||
split = re.split(regex, entry) | |||
if len(split) % 2 == 0: | |||
if level == 2: | |||
return self.get_languages(entry, level=3) | |||
else: | |||
return 3, None | |||
return 2, None | |||
split.pop(0) | |||
languages = {} | |||
for i in xrange(0, len(split), 2): | |||
languages[split[i]] = split[i + 1] | |||
return level, languages | |||
def get_definition(self, section, level): | |||
parts_of_speech = { | |||
"v.": "Verb", | |||
"n.": "Noun", | |||
"pron.": "Pronoun", | |||
"adj.": "Adjective", | |||
"adv.": "Adverb", | |||
"prep.": "Preposition", | |||
"conj.": "Conjunction", | |||
"inter.": "Interjection", | |||
"symbol": "Symbol", | |||
"suffix": "Suffix", | |||
"initialism": "Initialism", | |||
"phrase": "Phrase", | |||
"proverb": "Proverb", | |||
"prop. n.": "Proper noun", | |||
"abbr.": "Abbreviation", | |||
"punct.": "Punctuation mark", | |||
} | |||
blocks = "=" * (level + 1) | |||
defs = [] | |||
for part, basename in parts_of_speech.iteritems(): | |||
fullnames = [basename, "\{\{" + basename + "\}\}", | |||
"\{\{" + basename.lower() + "\}\}"] | |||
for fullname in fullnames: | |||
regex = blocks + "\s*" + fullname + "\s*" + blocks | |||
if re.search(regex, section): | |||
regex = blocks + "\s*" + fullname | |||
regex += "\s*{0}(.*?)(?:(?:{0})|\Z)".format(blocks) | |||
bodies = re.findall(regex, section, re.DOTALL) | |||
if bodies: | |||
for body in bodies: | |||
definition = self.parse_body(body) | |||
if definition: | |||
msg = u"\x02{0}\x0F {1}" | |||
defs.append(msg.format(part, definition)) | |||
return "; ".join(defs) | |||
def parse_body(self, body): | |||
substitutions = [ | |||
("<!--(.*?)-->", ""), | |||
("<ref>(.*?)</ref>", ""), | |||
("\[\[[^\]|]*?\|([^\]|]*?)\]\]", r"\1"), | |||
("\{\{unsupported\|(.*?)\}\}", r"\1"), | |||
("\{\{(.*?) of\|([^}|]*?)(\|(.*?))?\}\}", r"\1 of \2."), | |||
("\{\{w\|(.*?)\}\}", r"\1"), | |||
("\{\{surname(.*?)\}\}", r"A surname."), | |||
("\{\{given name\|([^}|]*?)(\|(.*?))?\}\}", r"A \1 given name."), | |||
] | |||
senses = [] | |||
for line in body.splitlines(): | |||
line = line.strip() | |||
if re.match("#\s*[^:*#]", line): | |||
for regex, repl in substitutions: | |||
line = re.sub(regex, repl, line) | |||
line = self.strip_templates(line) | |||
line = line[1:].replace("'''", "").replace("''", "") | |||
line = line.replace("[[", "").replace("]]", "") | |||
if line.strip(): | |||
senses.append(line.strip()[0].upper() + line.strip()[1:]) | |||
if not senses: | |||
return None | |||
if len(senses) == 1: | |||
return senses[0] | |||
result = [] # Number the senses incrementally | |||
for i, sense in enumerate(senses): | |||
result.append(u"{0}. {1}".format(i + 1, sense)) | |||
return " ".join(result) | |||
def strip_templates(self, line): | |||
line = list(line) | |||
stripped = "" | |||
depth = 0 | |||
while line: | |||
this = line.pop(0) | |||
if line: | |||
next = line[0] | |||
else: | |||
next = "" | |||
if this == "{" and next == "{": | |||
line.pop(0) | |||
depth += 1 | |||
elif this == "}" and next == "}": | |||
line.pop(0) | |||
depth -= 1 | |||
elif depth == 0: | |||
stripped += this | |||
return stripped |
@@ -0,0 +1,53 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from urllib import quote_plus | |||
from earwigbot import exceptions | |||
from earwigbot.commands import Command | |||
class Editcount(Command): | |||
"""Return a user's edit count.""" | |||
name = "editcount" | |||
commands = ["ec", "editcount"] | |||
def process(self, data): | |||
if not data.args: | |||
name = data.nick | |||
else: | |||
name = ' '.join(data.args) | |||
site = self.bot.wiki.get_site() | |||
user = site.get_user(name) | |||
try: | |||
count = user.editcount | |||
except exceptions.UserNotFoundError: | |||
msg = "The user \x0302{0}\x0F does not exist." | |||
self.reply(data, msg.format(name)) | |||
return | |||
safe = quote_plus(user.name.encode("utf8")) | |||
url = "http://toolserver.org/~tparis/pcount/index.php?name={0}&lang={1}&wiki={2}" | |||
fullurl = url.format(safe, site.lang, site.project) | |||
msg = "\x0302{0}\x0F has {1} edits ({2})." | |||
self.reply(data, msg.format(name, count, fullurl)) |
@@ -0,0 +1,71 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import re | |||
from earwigbot.commands import Command | |||
class Help(Command): | |||
"""Displays help information.""" | |||
name = "help" | |||
def check(self, data): | |||
if data.is_command: | |||
if data.command == "help": | |||
return True | |||
if not data.command and data.trigger == data.my_nick: | |||
return True | |||
return False | |||
def process(self, data): | |||
if not data.command: | |||
self.do_hello(data) | |||
elif data.args: | |||
self.do_command_help(data) | |||
else: | |||
self.do_main_help(data) | |||
def do_main_help(self, data): | |||
"""Give the user a general help message with a list of all commands.""" | |||
msg = "Hi, I'm a bot! I have {0} commands loaded: {1}. You can get help for any command with '!help <command>'." | |||
cmnds = sorted([cmnd.name for cmnd in self.bot.commands]) | |||
msg = msg.format(len(cmnds), ', '.join(cmnds)) | |||
self.reply(data, msg) | |||
def do_command_help(self, data): | |||
"""Give the user help for a specific command.""" | |||
target = data.args[0] | |||
for command in self.bot.commands: | |||
if command.name == target or target in command.commands: | |||
if command.__doc__: | |||
doc = command.__doc__.replace("\n", "") | |||
doc = re.sub("\s\s+", " ", doc) | |||
msg = 'Help for command \x0303{0}\x0F: "{1}"' | |||
self.reply(data, msg.format(target, doc)) | |||
return | |||
msg = "Sorry, no help for \x0303{0}\x0F.".format(target) | |||
self.reply(data, msg) | |||
def do_hello(self, data): | |||
self.say(data.chan, "Yes, {0}?".format(data.nick)) |
@@ -0,0 +1,101 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from earwigbot import exceptions | |||
from earwigbot.commands import Command | |||
class Lag(Command): | |||
"""Return the replag for a specific database on the Toolserver.""" | |||
name = "lag" | |||
commands = ["lag", "replag", "maxlag"] | |||
def process(self, data): | |||
site = self.get_site(data) | |||
if not site: | |||
return | |||
if data.command == "replag": | |||
base = "\x0302{0}\x0F: {1}." | |||
msg = base.format(site.name, self.get_replag(site)) | |||
elif data.command == "maxlag": | |||
base = "\x0302{0}\x0F: {1}." | |||
msg = base.format(site.name, self.get_maxlag(site).capitalize()) | |||
else: | |||
base = "\x0302{0}\x0F: {1}; {2}." | |||
msg = base.format(site.name, self.get_replag(site), | |||
self.get_maxlag(site)) | |||
self.reply(data, msg) | |||
def get_replag(self, site): | |||
return "Toolserver replag is {0}".format(self.time(site.get_replag())) | |||
def get_maxlag(self, site): | |||
return "database maxlag is {0}".format(self.time(site.get_maxlag())) | |||
def get_site(self, data): | |||
if data.kwargs and "project" in data.kwargs and "lang" in data.kwargs: | |||
project, lang = data.kwargs["project"], data.kwargs["lang"] | |||
return self.get_site_from_proj_and_lang(data, project, lang) | |||
if not data.args: | |||
return self.bot.wiki.get_site() | |||
if len(data.args) > 1: | |||
name = " ".join(data.args) | |||
self.reply(data, "Unknown site: \x0302{0}\x0F.".format(name)) | |||
return | |||
name = data.args[0] | |||
if "." in name: | |||
lang, project = name.split(".")[:2] | |||
elif ":" in name: | |||
project, lang = name.split(":")[:2] | |||
else: | |||
try: | |||
return self.bot.wiki.get_site(name) | |||
except exceptions.SiteNotFoundError: | |||
msg = "Unknown site: \x0302{0}\x0F.".format(name) | |||
self.reply(data, msg) | |||
return | |||
return self.get_site_from_proj_and_lang(data, project, lang) | |||
def get_site_from_proj_and_lang(self, data, project, lang): | |||
try: | |||
site = self.bot.wiki.get_site(project=project, lang=lang) | |||
except exceptions.SiteNotFoundError: | |||
try: | |||
site = self.bot.wiki.add_site(project=project, lang=lang) | |||
except exceptions.APIError: | |||
msg = "Site \x0302{0}:{1}\x0F not found." | |||
self.reply(data, msg.format(project, lang)) | |||
return | |||
return site | |||
def time(self, seconds): | |||
parts = [("year", 31536000), ("day", 86400), ("hour", 3600), | |||
("minute", 60), ("second", 1)] | |||
msg = [] | |||
for name, size in parts: | |||
num = seconds / size | |||
seconds -= num * size | |||
if num: | |||
chunk = "{0} {1}".format(num, name if num == 1 else name + "s") | |||
msg.append(chunk) | |||
return ", ".join(msg) if msg else "0 seconds" |
@@ -0,0 +1,62 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from earwigbot.commands import Command | |||
class Langcode(Command): | |||
"""Convert a language code into its name and a list of WMF sites in that | |||
language, or a name into its code.""" | |||
name = "langcode" | |||
commands = ["langcode", "lang", "language"] | |||
def process(self, data): | |||
if not data.args: | |||
self.reply(data, "Please specify a language code.") | |||
return | |||
code, lcase = data.args[0], data.args[0].lower() | |||
site = self.bot.wiki.get_site() | |||
matrix = site.api_query(action="sitematrix")["sitematrix"] | |||
del matrix["count"] | |||
del matrix["specials"] | |||
for site in matrix.itervalues(): | |||
if not site["name"]: | |||
continue | |||
name = site["name"].encode("utf8") | |||
localname = site["localname"].encode("utf8") | |||
if site["code"] == lcase: | |||
if name != localname: | |||
name += " ({0})".format(localname) | |||
sites = ", ".join([s["url"] for s in site["site"]]) | |||
msg = "\x0302{0}\x0F is {1} ({2})".format(code, name, sites) | |||
self.reply(data, msg) | |||
return | |||
elif name.lower() == lcase or localname.lower() == lcase: | |||
if name != localname: | |||
name += " ({0})".format(localname) | |||
sites = ", ".join([s["url"] for s in site["site"]]) | |||
msg = "{0} is \x0302{1}\x0F ({2})" | |||
self.reply(data, msg.format(name, site["code"], sites)) | |||
return | |||
self.reply(data, "Language \x0302{0}\x0F not found.".format(code)) |
@@ -0,0 +1,79 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import re | |||
from earwigbot.commands import Command | |||
class Link(Command): | |||
"""Convert a Wikipedia page name into a URL.""" | |||
name = "link" | |||
def setup(self): | |||
self.last = {} | |||
def check(self, data): | |||
if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", data.msg): | |||
self.last[data.chan] = data.msg # Store most recent link | |||
return data.is_command and data.command == self.name | |||
def process(self, data): | |||
self.site = self.bot.wiki.get_site() | |||
if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", data.msg): | |||
links = u" , ".join(self.parse_line(data.msg)) | |||
self.reply(data, links.encode("utf8")) | |||
elif data.command == "link": | |||
if not data.args: | |||
if data.chan in self.last: | |||
links = u" , ".join(self.parse_line(self.last[data.chan])) | |||
self.reply(data, links.encode("utf8")) | |||
else: | |||
self.reply(data, "What do you want me to link to?") | |||
return | |||
pagename = " ".join(data.args) | |||
link = self.site.get_page(pagename).url.encode("utf8") | |||
self.reply(data, link) | |||
def parse_line(self, line): | |||
"""Return a list of links within a line of text.""" | |||
results = [] | |||
# Destroy {{{template parameters}}}: | |||
line = re.sub("\{\{\{(.*?)\}\}\}", "", line) | |||
# Find all [[links]]: | |||
links = re.findall("(\[\[(.*?)(\||\]\]))", line) | |||
if links: | |||
# re.findall() returns a list of tuples, but we only want the 2nd | |||
# item in each tuple: | |||
results = [self.site.get_page(name[1]).url for name in links] | |||
# Find all {{templates}} | |||
templates = re.findall("(\{\{(.*?)(\||\}\}))", line) | |||
if templates: | |||
p_tmpl = lambda name: self.site.get_page("Template:" + name).url | |||
templates = [p_tmpl(i[1]) for i in templates] | |||
results += templates | |||
return results |
@@ -0,0 +1,319 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from datetime import datetime | |||
from os import path | |||
import re | |||
import sqlite3 as sqlite | |||
from threading import Lock | |||
from earwigbot.commands import Command | |||
class Notes(Command): | |||
"""A mini IRC-based wiki for storing notes, tips, and reminders.""" | |||
name = "notes" | |||
commands = ["notes", "note", "about"] | |||
version = 2 | |||
def setup(self): | |||
self._dbfile = path.join(self.config.root_dir, "notes.db") | |||
self._db_access_lock = Lock() | |||
def process(self, data): | |||
commands = { | |||
"help": self.do_help, | |||
"list": self.do_list, | |||
"read": self.do_read, | |||
"edit": self.do_edit, | |||
"info": self.do_info, | |||
"rename": self.do_rename, | |||
"delete": self.do_delete, | |||
} | |||
if not data.args: | |||
msg = "\x0302The Earwig Mini-Wiki\x0F: running v{0}. Subcommands are: {1}. You can get help on any with '!{2} help subcommand'." | |||
cmnds = ", ".join((commands)) | |||
self.reply(data, msg.format(self.version, cmnds, data.command)) | |||
return | |||
command = data.args[0].lower() | |||
if command in commands: | |||
commands[command](data) | |||
else: | |||
msg = "Unknown subcommand: \x0303{0}\x0F.".format(command) | |||
self.reply(data, msg) | |||
def do_help(self, data): | |||
"""Get help on a subcommand.""" | |||
info = { | |||
"help": "Get help on other subcommands.", | |||
"list": "List existing entries.", | |||
"read": "Read an existing entry ('!notes read [name]').", | |||
"edit": """Modify or create a new entry ('!notes edit name | |||
[entry content]...'). If modifying, you must be the | |||
entry author or a bot admin.""", | |||
"info": """Get information on an existing entry ('!notes info | |||
[name]').""", | |||
"rename": """Rename an existing entry ('!notes rename [old_name] | |||
[new_name]'). You must be the entry author or a bot | |||
admin.""", | |||
"delete": """Delete an existing entry ('!notes delete [name]'). You | |||
must be the entry author or a bot admin.""", | |||
} | |||
try: | |||
command = data.args[1] | |||
except IndexError: | |||
self.reply(data, "Please specify a subcommand to get help on.") | |||
return | |||
try: | |||
help_ = re.sub(r"\s\s+", " ", info[command].replace("\n", "")) | |||
self.reply(data, "\x0303{0}\x0F: ".format(command) + help_) | |||
except KeyError: | |||
msg = "Unknown subcommand: \x0303{0}\x0F.".format(command) | |||
self.reply(data, msg) | |||
def do_list(self, data): | |||
"""Show a list of entries in the notes database.""" | |||
query = "SELECT entry_title FROM entries" | |||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||
try: | |||
entries = conn.execute(query).fetchall() | |||
except sqlite.OperationalError: | |||
entries = [] | |||
if entries: | |||
entries = [entry[0] for entry in entries] | |||
self.reply(data, "Entries: {0}".format(", ".join(entries))) | |||
else: | |||
self.reply(data, "No entries in the database.") | |||
def do_read(self, data): | |||
"""Read an entry from the notes database.""" | |||
query = """SELECT entry_title, rev_content FROM entries | |||
INNER JOIN revisions ON entry_revision = rev_id | |||
WHERE entry_slug = ?""" | |||
try: | |||
slug = self.slugify(data.args[1]) | |||
except IndexError: | |||
self.reply(data, "Please specify an entry to read from.") | |||
return | |||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||
try: | |||
title, content = conn.execute(query, (slug,)).fetchone() | |||
except (sqlite.OperationalError, TypeError): | |||
title, content = slug, None | |||
if content: | |||
self.reply(data, "\x0302{0}\x0F: {1}".format(title, content)) | |||
else: | |||
self.reply(data, "Entry \x0302{0}\x0F not found.".format(title)) | |||
def do_edit(self, data): | |||
"""Edit an entry in the notes database.""" | |||
query1 = """SELECT entry_id, entry_title, user_host FROM entries | |||
INNER JOIN revisions ON entry_revision = rev_id | |||
INNER JOIN users ON rev_user = user_id | |||
WHERE entry_slug = ?""" | |||
query2 = "INSERT INTO revisions VALUES (?, ?, ?, ?, ?)" | |||
query3 = "INSERT INTO entries VALUES (?, ?, ?, ?)" | |||
query4 = "UPDATE entries SET entry_revision = ? WHERE entry_id = ?" | |||
try: | |||
slug = self.slugify(data.args[1]) | |||
except IndexError: | |||
self.reply(data, "Please specify an entry to edit.") | |||
return | |||
content = " ".join(data.args[2:]).strip() | |||
if not content: | |||
self.reply(data, "Please give some content to put in the entry.") | |||
return | |||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||
create = True | |||
try: | |||
id_, title, author = conn.execute(query1, (slug,)).fetchone() | |||
create = False | |||
except sqlite.OperationalError: | |||
id_, title, author = 1, data.args[1], data.host | |||
self.create_db(conn) | |||
except TypeError: | |||
id_ = self.get_next_entry(conn) | |||
title, author = data.args[1], data.host | |||
permdb = self.config.irc["permissions"] | |||
if author != data.host and not permdb.is_admin(data): | |||
msg = "You must be an author or a bot admin to edit this entry." | |||
self.reply(data, msg) | |||
return | |||
revid = self.get_next_revision(conn) | |||
userid = self.get_user(conn, data.host) | |||
now = datetime.utcnow().strftime("%b %d, %Y %H:%M:%S") | |||
conn.execute(query2, (revid, id_, userid, now, content)) | |||
if create: | |||
conn.execute(query3, (id_, slug, title, revid)) | |||
else: | |||
conn.execute(query4, (revid, id_)) | |||
self.reply(data, "Entry \x0302{0}\x0F updated.".format(title)) | |||
def do_info(self, data): | |||
"""Get info on an entry in the notes database.""" | |||
query = """SELECT entry_title, rev_timestamp, user_host FROM entries | |||
INNER JOIN revisions ON entry_id = rev_entry | |||
INNER JOIN users ON rev_user = user_id | |||
WHERE entry_slug = ?""" | |||
try: | |||
slug = self.slugify(data.args[1]) | |||
except IndexError: | |||
self.reply(data, "Please specify an entry to get info on.") | |||
return | |||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||
try: | |||
info = conn.execute(query, (slug,)).fetchall() | |||
except sqlite.OperationalError: | |||
info = [] | |||
if info: | |||
title = info[0][0] | |||
times = [datum[1] for datum in info] | |||
earliest = min(times) | |||
msg = "\x0302{0}\x0F: {1} edits since {2}" | |||
msg = msg.format(title, len(info), earliest) | |||
if len(times) > 1: | |||
latest = max(times) | |||
msg += "; last edit on {0}".format(latest) | |||
names = [datum[2] for datum in info] | |||
msg += "; authors: {0}.".format(", ".join(list(set(names)))) | |||
self.reply(data, msg) | |||
else: | |||
title = data.args[1] | |||
self.reply(data, "Entry \x0302{0}\x0F not found.".format(title)) | |||
def do_rename(self, data): | |||
"""Rename an entry in the notes database.""" | |||
query1 = """SELECT entry_id, user_host FROM entries | |||
INNER JOIN revisions ON entry_revision = rev_id | |||
INNER JOIN users ON rev_user = user_id | |||
WHERE entry_slug = ?""" | |||
query2 = """UPDATE entries SET entry_slug = ?, entry_title = ? | |||
WHERE entry_id = ?""" | |||
try: | |||
slug = self.slugify(data.args[1]) | |||
except IndexError: | |||
self.reply(data, "Please specify an entry to rename.") | |||
return | |||
try: | |||
newtitle = data.args[2] | |||
except IndexError: | |||
self.reply(data, "Please specify a new name for the entry.") | |||
return | |||
if newtitle == data.args[1]: | |||
self.reply(data, "The old and new names are identical.") | |||
return | |||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||
try: | |||
id_, author = conn.execute(query1, (slug,)).fetchone() | |||
except (sqlite.OperationalError, TypeError): | |||
msg = "Entry \x0302{0}\x0F not found.".format(data.args[1]) | |||
self.reply(data, msg) | |||
return | |||
permdb = self.config.irc["permissions"] | |||
if author != data.host and not permdb.is_admin(data): | |||
msg = "You must be an author or a bot admin to rename this entry." | |||
self.reply(data, msg) | |||
return | |||
conn.execute(query2, (self.slugify(newtitle), newtitle, id_)) | |||
msg = "Entry \x0302{0}\x0F renamed to \x0302{1}\x0F." | |||
self.reply(data, msg.format(data.args[1], newtitle)) | |||
def do_delete(self, data): | |||
"""Delete an entry from the notes database.""" | |||
query1 = """SELECT entry_id, user_host FROM entries | |||
INNER JOIN revisions ON entry_revision = rev_id | |||
INNER JOIN users ON rev_user = user_id | |||
WHERE entry_slug = ?""" | |||
query2 = "DELETE FROM entries WHERE entry_id = ?" | |||
query3 = "DELETE FROM revisions WHERE rev_entry = ?" | |||
try: | |||
slug = self.slugify(data.args[1]) | |||
except IndexError: | |||
self.reply(data, "Please specify an entry to delete.") | |||
return | |||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||
try: | |||
id_, author = conn.execute(query1, (slug,)).fetchone() | |||
except (sqlite.OperationalError, TypeError): | |||
msg = "Entry \x0302{0}\x0F not found.".format(data.args[1]) | |||
self.reply(data, msg) | |||
return | |||
permdb = self.config.irc["permissions"] | |||
if author != data.host and not permdb.is_admin(data): | |||
msg = "You must be an author or a bot admin to delete this entry." | |||
self.reply(data, msg) | |||
return | |||
conn.execute(query2, (id_,)) | |||
conn.execute(query3, (id_,)) | |||
self.reply(data, "Entry \x0302{0}\x0F deleted.".format(data.args[1])) | |||
def slugify(self, name): | |||
"""Convert *name* into an identifier for storing in the database.""" | |||
return name.lower().replace("_", "").replace("-", "") | |||
def create_db(self, conn): | |||
"""Initialize the notes database with its necessary tables.""" | |||
script = """ | |||
CREATE TABLE entries (entry_id, entry_slug, entry_title, | |||
entry_revision); | |||
CREATE TABLE users (user_id, user_host); | |||
CREATE TABLE revisions (rev_id, rev_entry, rev_user, rev_timestamp, | |||
rev_content); | |||
""" | |||
conn.executescript(script) | |||
def get_next_entry(self, conn): | |||
"""Get the next entry ID.""" | |||
query = "SELECT MAX(entry_id) FROM entries" | |||
later = conn.execute(query).fetchone()[0] | |||
return later + 1 if later else 1 | |||
def get_next_revision(self, conn): | |||
"""Get the next revision ID.""" | |||
query = "SELECT MAX(rev_id) FROM revisions" | |||
later = conn.execute(query).fetchone()[0] | |||
return later + 1 if later else 1 | |||
def get_user(self, conn, host): | |||
"""Get the user ID corresponding to a hostname, or make one.""" | |||
query1 = "SELECT user_id FROM users WHERE user_host = ?" | |||
query2 = "SELECT MAX(user_id) FROM users" | |||
query3 = "INSERT INTO users VALUES (?, ?)" | |||
user = conn.execute(query1, (host,)).fetchone() | |||
if user: | |||
return user[0] | |||
last = conn.execute(query2).fetchone()[0] | |||
later = last + 1 if last else 1 | |||
conn.execute(query3, (later, host)) | |||
return later |
@@ -0,0 +1,68 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from earwigbot.commands import Command | |||
class Quit(Command): | |||
"""Quit, restart, or reload components from the bot. Only the owners can | |||
run this command.""" | |||
name = "quit" | |||
commands = ["quit", "restart", "reload"] | |||
def process(self, data): | |||
if not self.config.irc["permissions"].is_owner(data): | |||
self.reply(data, "You must be a bot owner to use this command.") | |||
return | |||
if data.command == "quit": | |||
self.do_quit(data) | |||
elif data.command == "restart": | |||
self.do_restart(data) | |||
else: | |||
self.do_reload(data) | |||
def do_quit(self, data): | |||
args = data.args | |||
if data.trigger == data.my_nick: | |||
reason = " ".join(args) | |||
else: | |||
if not args or args[0].lower() != data.my_nick: | |||
self.reply(data, "To confirm this action, the first argument must be my name.") | |||
return | |||
reason = " ".join(args[1:]) | |||
if reason: | |||
self.bot.stop("Stopped by {0}: {1}".format(data.nick, reason)) | |||
else: | |||
self.bot.stop("Stopped by {0}".format(data.nick)) | |||
def do_restart(self, data): | |||
if data.args: | |||
msg = " ".join(data.args) | |||
self.bot.restart("Restarted by {0}: {1}".format(data.nick, msg)) | |||
else: | |||
self.bot.restart("Restarted by {0}".format(data.nick)) | |||
def do_reload(self, data): | |||
self.logger.info("{0} requested command/task reload".format(data.nick)) | |||
self.bot.commands.load() | |||
self.bot.tasks.load() | |||
self.reply(data, "IRC commands and bot tasks reloaded.") |
@@ -0,0 +1,72 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import time | |||
from earwigbot import exceptions | |||
from earwigbot.commands import Command | |||
class Registration(Command): | |||
"""Return when a user registered.""" | |||
name = "registration" | |||
commands = ["registration", "reg", "age"] | |||
def process(self, data): | |||
if not data.args: | |||
name = data.nick | |||
else: | |||
name = ' '.join(data.args) | |||
site = self.bot.wiki.get_site() | |||
user = site.get_user(name) | |||
try: | |||
reg = user.registration | |||
except exceptions.UserNotFoundError: | |||
msg = "The user \x0302{0}\x0F does not exist." | |||
self.reply(data, msg.format(name)) | |||
return | |||
date = time.strftime("%b %d, %Y at %H:%M:%S UTC", reg) | |||
age = self.get_diff(time.mktime(reg), time.mktime(time.gmtime())) | |||
if user.gender == "male": | |||
gender = "He's" | |||
elif user.gender == "female": | |||
gender = "She's" | |||
else: | |||
gender = "They're" # Singular they? | |||
msg = "\x0302{0}\x0F registered on {1}. {2} {3} old." | |||
self.reply(data, msg.format(name, date, gender, age)) | |||
def get_diff(self, t1, t2): | |||
parts = [("year", 31536000), ("day", 86400), ("hour", 3600), | |||
("minute", 60), ("second", 1)] | |||
msg = [] | |||
for name, size in parts: | |||
num = int(t2 - t1) / size | |||
t1 += num * size | |||
if num: | |||
chunk = "{0} {1}".format(num, name if num == 1 else name + "s") | |||
msg.append(chunk) | |||
return ", ".join(msg) if msg else "0 seconds" |
@@ -0,0 +1,62 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from threading import Timer | |||
import time | |||
from earwigbot.commands import Command | |||
class Remind(Command): | |||
"""Set a message to be repeated to you in a certain amount of time.""" | |||
name = "remind" | |||
commands = ["remind", "reminder"] | |||
def process(self, data): | |||
if not data.args: | |||
msg = "Please specify a time (in seconds) and a message in the following format: !remind <time> <msg>." | |||
self.reply(data, msg) | |||
return | |||
try: | |||
wait = int(data.args[0]) | |||
except ValueError: | |||
msg = "The time must be given as an integer, in seconds." | |||
self.reply(data, msg) | |||
return | |||
message = ' '.join(data.args[1:]) | |||
if not message: | |||
msg = "What message do you want me to give you when time is up?" | |||
self.reply(data, msg) | |||
return | |||
end = time.localtime(time.time() + wait) | |||
end_time = time.strftime("%b %d %H:%M:%S", end) | |||
end_time_with_timezone = time.strftime("%b %d %H:%M:%S %Z", end) | |||
msg = 'Set reminder for "{0}" in {1} seconds (ends {2}).' | |||
msg = msg.format(message, wait, end_time_with_timezone) | |||
self.reply(data, msg) | |||
t_reminder = Timer(wait, self.reply, args=(data, message)) | |||
t_reminder.name = "reminder " + end_time | |||
t_reminder.daemon = True | |||
t_reminder.start() |
@@ -0,0 +1,52 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from earwigbot import exceptions | |||
from earwigbot.commands import Command | |||
class Rights(Command): | |||
"""Retrieve a list of rights for a given username.""" | |||
name = "rights" | |||
commands = ["rights", "groups", "permissions", "privileges"] | |||
def process(self, data): | |||
if not data.args: | |||
name = data.nick | |||
else: | |||
name = ' '.join(data.args) | |||
site = self.bot.wiki.get_site() | |||
user = site.get_user(name) | |||
try: | |||
rights = user.groups | |||
except exceptions.UserNotFoundError: | |||
msg = "The user \x0302{0}\x0F does not exist." | |||
self.reply(data, msg.format(name)) | |||
return | |||
try: | |||
rights.remove("*") # Remove the '*' group given to everyone | |||
except ValueError: | |||
pass | |||
msg = "The rights for \x0302{0}\x0F are {1}." | |||
self.reply(data, msg.format(name, ', '.join(rights))) |
@@ -0,0 +1,37 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import random | |||
from earwigbot.commands import Command | |||
class Test(Command): | |||
"""Test the bot!""" | |||
name = "test" | |||
def process(self, data): | |||
user = "\x02" + data.nick + "\x0F" # Wrap nick in bold | |||
hey = random.randint(0, 1) | |||
if hey: | |||
self.say(data.chan, "Hey {0}!".format(user)) | |||
else: | |||
self.say(data.chan, "'Sup {0}?".format(user)) |
@@ -0,0 +1,143 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import threading | |||
import re | |||
from earwigbot.commands import Command | |||
class Threads(Command): | |||
"""Manage wiki tasks from IRC, and check on thread status.""" | |||
name = "threads" | |||
commands = ["tasks", "task", "threads", "tasklist"] | |||
def process(self, data): | |||
self.data = data | |||
if not self.config.irc["permissions"].is_owner(data): | |||
msg = "You must be a bot owner to use this command." | |||
self.reply(data, msg) | |||
return | |||
if not data.args: | |||
if data.command == "tasklist": | |||
self.do_list() | |||
else: | |||
msg = "No arguments provided. Maybe you wanted '!{0} list', '!{0} start', or '!{0} listall'?" | |||
self.reply(data, msg.format(data.command)) | |||
return | |||
if data.args[0] == "list": | |||
self.do_list() | |||
elif data.args[0] == "start": | |||
self.do_start() | |||
elif data.args[0] in ["listall", "all"]: | |||
self.do_listall() | |||
else: # They asked us to do something we don't know | |||
msg = "Unknown argument: \x0303{0}\x0F.".format(data.args[0]) | |||
self.reply(data, msg) | |||
def do_list(self): | |||
"""With !tasks list (or abbreviation !tasklist), list all running | |||
threads. This includes the main threads, like the irc frontend and the | |||
watcher, and task threads.""" | |||
threads = threading.enumerate() | |||
normal_threads = [] | |||
daemon_threads = [] | |||
for thread in threads: | |||
tname = thread.name | |||
if tname == "MainThread": | |||
t = "\x0302MainThread\x0F (id {0})" | |||
normal_threads.append(t.format(thread.ident)) | |||
elif tname in self.config.components: | |||
t = "\x0302{0}\x0F (id {1})" | |||
normal_threads.append(t.format(tname, thread.ident)) | |||
elif tname.startswith("reminder"): | |||
tname = tname.replace("reminder ", "") | |||
t = "\x0302reminder\x0F (until {0})" | |||
normal_threads.append(t.format(tname)) | |||
else: | |||
tname, start_time = re.findall("^(.*?) \((.*?)\)$", tname)[0] | |||
t = "\x0302{0}\x0F (id {1}, since {2})" | |||
daemon_threads.append(t.format(tname, thread.ident, | |||
start_time)) | |||
if daemon_threads: | |||
if len(daemon_threads) > 1: | |||
msg = "\x02{0}\x0F threads active: {1}, and \x02{2}\x0F command/task threads: {3}." | |||
else: | |||
msg = "\x02{0}\x0F threads active: {1}, and \x02{2}\x0F command/task thread: {3}." | |||
msg = msg.format(len(threads), ', '.join(normal_threads), | |||
len(daemon_threads), ', '.join(daemon_threads)) | |||
else: | |||
msg = "\x02{0}\x0F threads active: {1}, and \x020\x0F command/task threads." | |||
msg = msg.format(len(threads), ', '.join(normal_threads)) | |||
self.reply(self.data, msg) | |||
def do_listall(self): | |||
"""With !tasks listall or !tasks all, list all loaded tasks, and report | |||
whether they are currently running or idle.""" | |||
threads = threading.enumerate() | |||
tasklist = [] | |||
for task in sorted([task.name for task in self.bot.tasks]): | |||
threadlist = [t for t in threads if t.name.startswith(task)] | |||
ids = [str(t.ident) for t in threadlist] | |||
if not ids: | |||
tasklist.append("\x0302{0}\x0F (idle)".format(task)) | |||
elif len(ids) == 1: | |||
t = "\x0302{0}\x0F (\x02active\x0F as id {1})" | |||
tasklist.append(t.format(task, ids[0])) | |||
else: | |||
t = "\x0302{0}\x0F (\x02active\x0F as ids {1})" | |||
tasklist.append(t.format(task, ', '.join(ids))) | |||
tasks = ", ".join(tasklist) | |||
msg = "\x02{0}\x0F tasks loaded: {1}.".format(len(tasklist), tasks) | |||
self.reply(self.data, msg) | |||
def do_start(self): | |||
"""With !tasks start, start any loaded task by name with or without | |||
kwargs.""" | |||
data = self.data | |||
try: | |||
task_name = data.args[1] | |||
except IndexError: # No task name given | |||
self.reply(data, "What task do you want me to start?") | |||
return | |||
if task_name not in [task.name for task in self.bot.tasks]: | |||
# This task does not exist or hasn't been loaded: | |||
msg = "Task could not be found; either it doesn't exist, or it wasn't loaded correctly." | |||
self.reply(data, msg.format(task_name)) | |||
return | |||
data.kwargs["fromIRC"] = True | |||
self.bot.tasks.start(task_name, **data.kwargs) | |||
msg = "Task \x0302{0}\x0F started.".format(task_name) | |||
self.reply(data, msg) |
@@ -0,0 +1,65 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from datetime import datetime | |||
from math import floor | |||
from time import time | |||
import pytz | |||
from earwigbot.commands import Command | |||
class Time(Command): | |||
"""Report the current time in any timezone (UTC default), or in beats.""" | |||
name = "time" | |||
commands = ["time", "beats", "swatch"] | |||
def process(self, data): | |||
if data.command in ["beats", "swatch"]: | |||
self.do_beats(data) | |||
return | |||
if data.args: | |||
timezone = data.args[0] | |||
else: | |||
timezone = "UTC" | |||
if timezone in ["beats", "swatch"]: | |||
self.do_beats(data) | |||
else: | |||
self.do_time(data, timezone) | |||
def do_beats(self, data): | |||
beats = ((time() + 3600) % 86400) / 86.4 | |||
beats = int(floor(beats)) | |||
self.reply(data, "@{0:0>3}".format(beats)) | |||
def do_time(self, data, timezone): | |||
if not pytz: | |||
msg = "This command requires the 'pytz' module: http://pytz.sourceforge.net/" | |||
self.reply(data, msg) | |||
return | |||
try: | |||
tzinfo = pytz.timezone(timezone) | |||
except pytz.exceptions.UnknownTimeZoneError: | |||
self.reply(data, "Unknown timezone: {0}.".format(timezone)) | |||
return | |||
now = pytz.utc.localize(datetime.utcnow()).astimezone(tzinfo) | |||
self.reply(data, now.strftime("%Y-%m-%d %H:%M:%S %Z")) |
@@ -0,0 +1,48 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from unicodedata import normalize | |||
from earwigbot.commands import Command | |||
class Trout(Command): | |||
"""Slap someone with a trout, or related fish.""" | |||
name = "trout" | |||
commands = ["trout", "whale"] | |||
def setup(self): | |||
try: | |||
self.exceptions = self.config.commands[self.name]["exceptions"] | |||
except KeyError: | |||
self.exceptions = {} | |||
def process(self, data): | |||
animal = data.command | |||
target = " ".join(data.args) or data.nick | |||
target = "himself" if target == "yourself" else target | |||
normal = normalize("NFKD", target.decode("utf8")).lower() | |||
if normal in self.exceptions: | |||
self.reply(data, self.exceptions[normal]) | |||
else: | |||
msg = "slaps \x02{0}\x0F around a bit with a large {1}." | |||
self.action(data.chan, msg.format(target, animal)) |
@@ -0,0 +1,347 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from collections import OrderedDict | |||
from getpass import getpass | |||
from hashlib import sha256 | |||
import logging | |||
import logging.handlers | |||
from os import mkdir, path | |||
import stat | |||
from Crypto.Cipher import Blowfish | |||
import bcrypt | |||
import yaml | |||
from earwigbot.config.formatter import BotFormatter | |||
from earwigbot.config.node import ConfigNode | |||
from earwigbot.config.ordered_yaml import OrderedLoader | |||
from earwigbot.config.permissions import PermissionsDB | |||
from earwigbot.config.script import ConfigScript | |||
from earwigbot.exceptions import NoConfigError | |||
__all__ = ["BotConfig"] | |||
class BotConfig(object): | |||
""" | |||
**EarwigBot: YAML Config File Manager** | |||
This handles all tasks involving reading and writing to our config file, | |||
including encrypting and decrypting passwords and making a new config file | |||
from scratch at the inital bot run. | |||
BotConfig has a few attributes and methods, including the following: | |||
- :py:attr:`root_dir`: bot's working directory; contains | |||
:file:`config.yml`, :file:`logs/` | |||
- :py:attr:`path`: path to the bot's config file | |||
- :py:attr:`components`: enabled components | |||
- :py:attr:`wiki`: information about wiki-editing | |||
- :py:attr:`irc`: information about IRC | |||
- :py:attr:`commands`: information about IRC commands | |||
- :py:attr:`tasks`: information for bot tasks | |||
- :py:attr:`metadata`: miscellaneous information | |||
- :py:meth:`schedule`: tasks scheduled to run at a given time | |||
BotConfig also has some methods used in config loading: | |||
- :py:meth:`load`: loads (or reloads) and parses our config file | |||
- :py:meth:`decrypt`: decrypts an object in the config tree | |||
""" | |||
def __init__(self, bot, root_dir, level): | |||
self._bot = bot | |||
self._root_dir = root_dir | |||
self._logging_level = level | |||
self._config_path = path.join(self.root_dir, "config.yml") | |||
self._log_dir = path.join(self.root_dir, "logs") | |||
perms_file = path.join(self.root_dir, "permissions.db") | |||
self._permissions = PermissionsDB(perms_file) | |||
self._decryption_cipher = None | |||
self._data = None | |||
self._components = ConfigNode() | |||
self._wiki = ConfigNode() | |||
self._irc = ConfigNode() | |||
self._commands = ConfigNode() | |||
self._tasks = ConfigNode() | |||
self._metadata = ConfigNode() | |||
self._nodes = [self._components, self._wiki, self._irc, self._commands, | |||
self._tasks, self._metadata] | |||
self._decryptable_nodes = [ # Default nodes to decrypt | |||
(self._wiki, ("password",)), | |||
(self._wiki, ("search", "credentials", "key")), | |||
(self._wiki, ("search", "credentials", "secret")), | |||
(self._irc, ("frontend", "nickservPassword")), | |||
(self._irc, ("watcher", "nickservPassword")), | |||
] | |||
def __repr__(self): | |||
"""Return the canonical string representation of the BotConfig.""" | |||
res = "BotConfig(root_dir={0!r}, level={1!r})" | |||
return res.format(self.root_dir, self.logging_level) | |||
def __str__(self): | |||
"""Return a nice string representation of the BotConfig.""" | |||
return "<BotConfig at {0}>".format(self.root_dir) | |||
def _handle_missing_config(self): | |||
print "Config file missing or empty:", self._config_path | |||
msg = "Would you like to create a config file now? [Y/n] " | |||
choice = raw_input(msg) | |||
if choice.lower().startswith("n"): | |||
raise NoConfigError() | |||
else: | |||
try: | |||
ConfigScript(self).make_new() | |||
except KeyboardInterrupt: | |||
raise NoConfigError() | |||
def _load(self): | |||
"""Load data from our JSON config file (config.yml) into self._data.""" | |||
filename = self._config_path | |||
with open(filename, 'r') as fp: | |||
try: | |||
self._data = yaml.load(fp, OrderedLoader) | |||
except yaml.YAMLError: | |||
print "Error parsing config file {0}:".format(filename) | |||
raise | |||
def _setup_logging(self): | |||
"""Configures the logging module so it works the way we want it to.""" | |||
log_dir = self._log_dir | |||
logger = logging.getLogger("earwigbot") | |||
logger.handlers = [] # Remove any handlers already attached to us | |||
logger.setLevel(logging.DEBUG) | |||
color_formatter = BotFormatter(color=True) | |||
formatter = BotFormatter() | |||
if self.metadata.get("enableLogging"): | |||
hand = logging.handlers.TimedRotatingFileHandler | |||
logfile = lambda f: path.join(log_dir, f) | |||
if not path.isdir(log_dir): | |||
if not path.exists(log_dir): | |||
mkdir(log_dir, stat.S_IWUSR|stat.S_IRUSR|stat.S_IXUSR) | |||
else: | |||
msg = "log_dir ({0}) exists but is not a directory!" | |||
print msg.format(log_dir) | |||
return | |||
main_handler = hand(logfile("bot.log"), "midnight", 1, 7) | |||
error_handler = hand(logfile("error.log"), "W6", 1, 4) | |||
debug_handler = hand(logfile("debug.log"), "H", 1, 6) | |||
main_handler.setLevel(logging.INFO) | |||
error_handler.setLevel(logging.WARNING) | |||
debug_handler.setLevel(logging.DEBUG) | |||
for h in (main_handler, error_handler, debug_handler): | |||
h.setFormatter(formatter) | |||
logger.addHandler(h) | |||
self._stream_handler = stream = logging.StreamHandler() | |||
stream.setLevel(self._logging_level) | |||
stream.setFormatter(color_formatter) | |||
logger.addHandler(stream) | |||
def _decrypt(self, node, nodes): | |||
"""Try to decrypt the contents of a config node. Use self.decrypt().""" | |||
try: | |||
node._decrypt(self._decryption_cipher, nodes[:-1], nodes[-1]) | |||
except ValueError: | |||
print "Error decrypting passwords:" | |||
raise | |||
@property | |||
def bot(self): | |||
"""The config's Bot object.""" | |||
return self._bot | |||
@property | |||
def root_dir(self): | |||
"""The bot's root directory containing its config file and more.""" | |||
return self._root_dir | |||
@property | |||
def logging_level(self): | |||
"""The minimum logging level for messages logged via stdout.""" | |||
return self._logging_level | |||
@logging_level.setter | |||
def logging_level(self, level): | |||
self._logging_level = level | |||
self._stream_handler.setLevel(level) | |||
@property | |||
def path(self): | |||
"""The path to the bot's config file.""" | |||
return self._config_path | |||
@property | |||
def log_dir(self): | |||
"""The directory containing the bot's logs.""" | |||
return self._log_dir | |||
@property | |||
def data(self): | |||
"""The entire config file as a decoded JSON object.""" | |||
return self._data | |||
@property | |||
def components(self): | |||
"""A dict of enabled components.""" | |||
return self._components | |||
@property | |||
def wiki(self): | |||
"""A dict of information about wiki-editing.""" | |||
return self._wiki | |||
@property | |||
def irc(self): | |||
"""A dict of information about IRC.""" | |||
return self._irc | |||
@property | |||
def commands(self): | |||
"""A dict of information for IRC commands.""" | |||
return self._commands | |||
@property | |||
def tasks(self): | |||
"""A dict of information for bot tasks.""" | |||
return self._tasks | |||
@property | |||
def metadata(self): | |||
"""A dict of miscellaneous information.""" | |||
return self._metadata | |||
def is_loaded(self): | |||
"""Return ``True`` if our config file has been loaded, or ``False``.""" | |||
return self._data is not None | |||
def is_encrypted(self): | |||
"""Return ``True`` if passwords are encrypted, otherwise ``False``.""" | |||
return self.metadata.get("encryptPasswords", False) | |||
def load(self): | |||
"""Load, or reload, our config file. | |||
First, check if we have a valid config file, and if not, notify the | |||
user. If there is no config file at all, offer to make one, otherwise | |||
exit. | |||
Data from the config file is stored in six | |||
:py:class:`~earwigbot.config.ConfigNode`\ s (:py:attr:`components`, | |||
:py:attr:`wiki`, :py:attr:`irc`, :py:attr:`commands`, :py:attr:`tasks`, | |||
:py:attr:`metadata`) for easy access (as well as the lower-level | |||
:py:attr:`data` attribute). If passwords are encrypted, we'll use | |||
:py:func:`~getpass.getpass` for the key and then decrypt them. If the | |||
config is being reloaded, encrypted items will be automatically | |||
decrypted if they were decrypted earlier. | |||
""" | |||
if not path.exists(self._config_path): | |||
self._handle_missing_config() | |||
self._load() | |||
if not self._data: | |||
self._handle_missing_config() | |||
self._load() | |||
self.components._load(self._data.get("components", OrderedDict())) | |||
self.wiki._load(self._data.get("wiki", OrderedDict())) | |||
self.irc._load(self._data.get("irc", OrderedDict())) | |||
self.commands._load(self._data.get("commands", OrderedDict())) | |||
self.tasks._load(self._data.get("tasks", OrderedDict())) | |||
self.metadata._load(self._data.get("metadata", OrderedDict())) | |||
self._setup_logging() | |||
if self.is_encrypted(): | |||
if not self._decryption_cipher: | |||
key = getpass("Enter key to decrypt bot passwords: ") | |||
self._decryption_cipher = Blowfish.new(sha256(key).digest()) | |||
signature = self.metadata["signature"] | |||
if bcrypt.hashpw(key, signature) != signature: | |||
raise RuntimeError("Incorrect password.") | |||
for node, nodes in self._decryptable_nodes: | |||
self._decrypt(node, nodes) | |||
if self.irc: | |||
self.irc["permissions"] = self._permissions | |||
self._permissions.load() | |||
def decrypt(self, node, *nodes): | |||
"""Decrypt an object in our config tree. | |||
:py:attr:`_decryption_cipher` is used as our key, retrieved using | |||
:py:func:`~getpass.getpass` in :py:meth:`load` if it wasn't already | |||
specified. If this is called when passwords are not encrypted (check | |||
with :py:meth:`is_encrypted`), nothing will happen. We'll also keep | |||
track of this node if :py:meth:`load` is called again (i.e. to reload) | |||
and automatically decrypt it. | |||
Example usage:: | |||
>>> config.decrypt(config.irc, "frontend", "nickservPassword") | |||
# decrypts config.irc["frontend"]["nickservPassword"] | |||
""" | |||
signature = (node, nodes) | |||
if signature in self._decryptable_nodes: | |||
return # Already decrypted | |||
self._decryptable_nodes.append(signature) | |||
if self.is_encrypted(): | |||
self._decrypt(node, nodes) | |||
def schedule(self, minute, hour, month_day, month, week_day): | |||
"""Return a list of tasks scheduled to run at the specified time. | |||
The schedule data comes from our config file's ``schedule`` field, | |||
which is stored as :py:attr:`self.data["schedule"] <data>`. | |||
""" | |||
# Tasks to run this turn, each as a list of either [task_name, kwargs], | |||
# or just the task_name: | |||
tasks = [] | |||
now = {"minute": minute, "hour": hour, "month_day": month_day, | |||
"month": month, "week_day": week_day} | |||
data = self._data.get("schedule", []) | |||
for event in data: | |||
do = True | |||
for key, value in now.items(): | |||
try: | |||
requirement = event[key] | |||
except KeyError: | |||
continue | |||
if requirement != value: | |||
do = False | |||
break | |||
if do: | |||
try: | |||
tasks.extend(event["tasks"]) | |||
except KeyError: | |||
pass | |||
return tasks |
@@ -0,0 +1,51 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import logging | |||
__all__ = ["BotFormatter"] | |||
class BotFormatter(logging.Formatter): | |||
def __init__(self, color=False): | |||
self._format = super(BotFormatter, self).format | |||
if color: | |||
fmt = "[%(asctime)s %(lvl)s] %(name)s: %(message)s" | |||
self.format = lambda rec: self._format(self.format_color(rec)) | |||
else: | |||
fmt = "[%(asctime)s %(levelname)-8s] %(name)s: %(message)s" | |||
self.format = self._format | |||
datefmt = "%Y-%m-%d %H:%M:%S" | |||
super(BotFormatter, self).__init__(fmt=fmt, datefmt=datefmt) | |||
def format_color(self, record): | |||
l = record.levelname.ljust(8) | |||
if record.levelno == logging.DEBUG: | |||
record.lvl = l.join(("\x1b[34m", "\x1b[0m")) # Blue | |||
if record.levelno == logging.INFO: | |||
record.lvl = l.join(("\x1b[32m", "\x1b[0m")) # Green | |||
if record.levelno == logging.WARNING: | |||
record.lvl = l.join(("\x1b[33m", "\x1b[0m")) # Yellow | |||
if record.levelno == logging.ERROR: | |||
record.lvl = l.join(("\x1b[31m", "\x1b[0m")) # Red | |||
if record.levelno == logging.CRITICAL: | |||
record.lvl = l.join(("\x1b[1m\x1b[31m", "\x1b[0m")) # Bold red | |||
return record |
@@ -0,0 +1,104 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from collections import OrderedDict | |||
__all__ = ["ConfigNode"] | |||
class ConfigNode(object): | |||
def __init__(self): | |||
self._data = OrderedDict() | |||
def __repr__(self): | |||
return self._data | |||
def __nonzero__(self): | |||
return bool(self._data) | |||
def __len__(self): | |||
return len(self._data) | |||
def __getitem__(self, key): | |||
return self._data[key] | |||
def __setitem__(self, key, item): | |||
self._data[key] = item | |||
def __getattr__(self, key): | |||
if key == "_data": | |||
return super(ConfigNode, self).__getattr__(key) | |||
return self._data[key] | |||
def __setattr__(self, key, item): | |||
if key == "_data": | |||
super(ConfigNode, self).__setattr__(key, item) | |||
else: | |||
self._data[key] = item | |||
def __iter__(self): | |||
for key in self._data: | |||
yield key | |||
def __contains__(self, item): | |||
return item in self._data | |||
def _dump(self): | |||
data = self._data.copy() | |||
for key, val in data.iteritems(): | |||
if isinstance(val, ConfigNode): | |||
data[key] = val._dump() | |||
return data | |||
def _load(self, data): | |||
self._data = data.copy() | |||
def _decrypt(self, cipher, intermediates, item): | |||
base = self._data | |||
for inter in intermediates: | |||
try: | |||
base = base[inter] | |||
except KeyError: | |||
return | |||
if item in base: | |||
ciphertext = base[item].decode("hex") | |||
base[item] = cipher.decrypt(ciphertext).rstrip("\x00") | |||
def get(self, *args, **kwargs): | |||
return self._data.get(*args, **kwargs) | |||
def keys(self): | |||
return self._data.keys() | |||
def values(self): | |||
return self._data.values() | |||
def items(self): | |||
return self._data.items() | |||
def iterkeys(self): | |||
return self._data.iterkeys() | |||
def itervalues(self): | |||
return self._data.itervalues() | |||
def iteritems(self): | |||
return self._data.iteritems() |
@@ -0,0 +1,106 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
""" | |||
Based on: | |||
* https://gist.github.com/844388 | |||
* http://pyyaml.org/attachment/ticket/161/use_ordered_dict.py | |||
with modifications. | |||
""" | |||
from collections import OrderedDict | |||
import yaml | |||
__all__ = ["OrderedLoader", "OrderedDumper"] | |||
class OrderedLoader(yaml.Loader): | |||
"""A YAML loader that loads mappings into ordered dictionaries.""" | |||
def __init__(self, *args, **kwargs): | |||
super(OrderedLoader, self).__init__(*args, **kwargs) | |||
constructor = type(self).construct_yaml_map | |||
self.add_constructor(u"tag:yaml.org,2002:map", constructor) | |||
self.add_constructor(u"tag:yaml.org,2002:omap", constructor) | |||
def construct_yaml_map(self, node): | |||
data = OrderedDict() | |||
yield data | |||
value = self.construct_mapping(node) | |||
data.update(value) | |||
def construct_mapping(self, node, deep=False): | |||
if isinstance(node, yaml.MappingNode): | |||
self.flatten_mapping(node) | |||
else: | |||
raise yaml.constructor.ConstructorError(None, None, | |||
"expected a mapping node, but found {0}".format(node.id), | |||
node.start_mark) | |||
mapping = OrderedDict() | |||
for key_node, value_node in node.value: | |||
key = self.construct_object(key_node, deep=deep) | |||
try: | |||
hash(key) | |||
except TypeError, exc: | |||
raise yaml.constructor.ConstructorError( | |||
"while constructing a mapping", node.start_mark, | |||
"found unacceptable key ({0})".format(exc), | |||
key_node.start_mark) | |||
value = self.construct_object(value_node, deep=deep) | |||
mapping[key] = value | |||
return mapping | |||
class OrderedDumper(yaml.SafeDumper): | |||
"""A YAML dumper that dumps ordered dictionaries into mappings.""" | |||
def __init__(self, *args, **kwargs): | |||
super(OrderedDumper, self).__init__(*args, **kwargs) | |||
self.add_representer(OrderedDict, type(self).represent_dict) | |||
def represent_mapping(self, tag, mapping, flow_style=None): | |||
value = [] | |||
node = yaml.MappingNode(tag, value, flow_style=flow_style) | |||
if self.alias_key is not None: | |||
self.represented_objects[self.alias_key] = node | |||
best_style = True | |||
if hasattr(mapping, "items"): | |||
mapping = list(mapping.items()) | |||
for item_key, item_value in mapping: | |||
node_key = self.represent_data(item_key) | |||
node_value = self.represent_data(item_value) | |||
if not (isinstance(node_key, yaml.ScalarNode) and not | |||
node_key.style): | |||
best_style = False | |||
if not (isinstance(node_value, yaml.ScalarNode) and not | |||
node_value.style): | |||
best_style = False | |||
value.append((node_key, node_value)) | |||
if flow_style is None: | |||
if self.default_flow_style is not None: | |||
node.flow_style = self.default_flow_style | |||
else: | |||
node.flow_style = best_style | |||
return node |
@@ -0,0 +1,176 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from fnmatch import fnmatch | |||
import sqlite3 as sqlite | |||
from threading import Lock | |||
__all__ = ["PermissionsDB"] | |||
class PermissionsDB(object): | |||
""" | |||
**EarwigBot: Permissions Database Manager** | |||
Controls the :file:`permissions.db` file, which stores the bot's owners and | |||
admins for the purposes of using certain dangerous IRC commands. | |||
""" | |||
ADMIN = 1 | |||
OWNER = 2 | |||
def __init__(self, dbfile): | |||
self._dbfile = dbfile | |||
self._db_access_lock = Lock() | |||
self._data = {} | |||
def __repr__(self): | |||
"""Return the canonical string representation of the PermissionsDB.""" | |||
res = "PermissionsDB(dbfile={0!r})" | |||
return res.format(self._dbfile) | |||
def __str__(self): | |||
"""Return a nice string representation of the PermissionsDB.""" | |||
return "<PermissionsDB at {0}>".format(self._dbfile) | |||
def _create(self, conn): | |||
"""Initialize the permissions database with its necessary tables.""" | |||
query = """CREATE TABLE users (user_nick, user_ident, user_host, | |||
user_rank)""" | |||
conn.execute(query) | |||
def _is_rank(self, user, rank): | |||
"""Return True if the given user has the given rank, else False.""" | |||
try: | |||
for rule in self._data[rank]: | |||
if user in rule: | |||
return rule | |||
except KeyError: | |||
pass | |||
return False | |||
def _set_rank(self, user, rank): | |||
"""Add a User to the database under a given rank.""" | |||
query = "INSERT INTO users VALUES (?, ?, ?, ?)" | |||
with self._db_access_lock: | |||
with sqlite.connect(self._dbfile) as conn: | |||
conn.execute(query, (user.nick, user.ident, user.host, rank)) | |||
try: | |||
self._data[rank].append(user) | |||
except KeyError: | |||
self._data[rank] = [user] | |||
return user | |||
def _del_rank(self, user, rank): | |||
"""Remove a User from the database.""" | |||
query = """DELETE FROM users WHERE user_nick = ? AND user_ident = ? AND | |||
user_host = ? AND user_rank = ?""" | |||
with self._db_access_lock: | |||
try: | |||
for rule in self._data[rank]: | |||
if user in rule: | |||
with sqlite.connect(self._dbfile) as conn: | |||
args = (user.nick, user.ident, user.host, rank) | |||
conn.execute(query, args) | |||
self._data[rank].remove(rule) | |||
return rule | |||
except KeyError: | |||
pass | |||
return None | |||
@property | |||
def data(self): | |||
"""A dict of all entries in the permissions database.""" | |||
return self._data | |||
def load(self): | |||
"""Load permissions from an existing database, or create a new one.""" | |||
query = "SELECT user_nick, user_ident, user_host, user_rank FROM users" | |||
self._data = {} | |||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||
try: | |||
for nick, ident, host, rank in conn.execute(query): | |||
try: | |||
self._data[rank].append(_User(nick, ident, host)) | |||
except KeyError: | |||
self._data[rank] = [_User(nick, ident, host)] | |||
except sqlite.OperationalError: | |||
self._create(conn) | |||
def has_exact(self, rank, nick="*", ident="*", host="*"): | |||
"""Return ``True`` if there is an exact match for this rule.""" | |||
try: | |||
for usr in self._data[rank]: | |||
if nick != usr.nick or ident != usr.ident or host != usr.host: | |||
continue | |||
return usr | |||
except KeyError: | |||
pass | |||
return False | |||
def is_admin(self, data): | |||
"""Return ``True`` if the given user is a bot admin, else ``False``.""" | |||
user = _User(data.nick, data.ident, data.host) | |||
return self._is_rank(user, rank=self.ADMIN) | |||
def is_owner(self, data): | |||
"""Return ``True`` if the given user is a bot owner, else ``False``.""" | |||
user = _User(data.nick, data.ident, data.host) | |||
return self._is_rank(user, rank=self.OWNER) | |||
def add_admin(self, nick="*", ident="*", host="*"): | |||
"""Add a nick/ident/host combo to the bot admins list.""" | |||
return self._set_rank(_User(nick, ident, host), rank=self.ADMIN) | |||
def add_owner(self, nick="*", ident="*", host="*"): | |||
"""Add a nick/ident/host combo to the bot owners list.""" | |||
return self._set_rank(_User(nick, ident, host), rank=self.OWNER) | |||
def remove_admin(self, nick="*", ident="*", host="*"): | |||
"""Remove a nick/ident/host combo to the bot admins list.""" | |||
return self._del_rank(_User(nick, ident, host), rank=self.ADMIN) | |||
def remove_owner(self, nick="*", ident="*", host="*"): | |||
"""Remove a nick/ident/host combo to the bot owners list.""" | |||
return self._del_rank(_User(nick, ident, host), rank=self.OWNER) | |||
class _User(object): | |||
"""A class that represents an IRC user for the purpose of testing rules.""" | |||
def __init__(self, nick, ident, host): | |||
self.nick = nick | |||
self.ident = ident | |||
self.host = host | |||
def __repr__(self): | |||
"""Return the canonical string representation of the User.""" | |||
res = "_User(nick={0!r}, ident={1!r}, host={2!r})" | |||
return res.format(self.nick, self.ident, self.host) | |||
def __str__(self): | |||
"""Return a nice string representation of the User.""" | |||
return "{0}!{1}@{2}".format(self.nick, self.ident, self.host) | |||
def __contains__(self, user): | |||
if fnmatch(user.nick, self.nick): | |||
if fnmatch(user.ident, self.ident): | |||
if fnmatch(user.host, self.host): | |||
return True | |||
return False |
@@ -0,0 +1,446 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from collections import OrderedDict | |||
from getpass import getpass | |||
from hashlib import sha256 | |||
from os import chmod, mkdir, path | |||
import re | |||
import stat | |||
import sys | |||
from textwrap import fill, wrap | |||
from Crypto.Cipher import Blowfish | |||
import bcrypt | |||
import yaml | |||
from earwigbot import exceptions | |||
from earwigbot.config.ordered_yaml import OrderedDumper | |||
__all__ = ["ConfigScript"] | |||
RULES_TEMPLATE = """# -*- coding: utf-8 -*- | |||
def process(bot, rc): | |||
\"\"\"Given a Bot() object and an RC() object, return a list of channels | |||
to report this event to. Also, start any wiki bot tasks within this | |||
function if necessary.\"\"\" | |||
pass | |||
""" | |||
class ConfigScript(object): | |||
"""A script to guide a user through the creation of a new config file.""" | |||
WIDTH = 79 | |||
PROMPT = "\x1b[32m> \x1b[0m" | |||
BCRYPT_ROUNDS = 12 | |||
def __init__(self, config): | |||
self.config = config | |||
self.data = OrderedDict([ | |||
("metadata", OrderedDict()), | |||
("components", OrderedDict()), | |||
("wiki", OrderedDict()), | |||
("irc", OrderedDict()), | |||
("commands", OrderedDict()), | |||
("tasks", OrderedDict()), | |||
("schedule", []) | |||
]) | |||
self._cipher = None | |||
self._wmf = False | |||
self._proj = None | |||
self._lang = None | |||
def _print(self, text): | |||
print fill(re.sub("\s\s+", " ", text), self.WIDTH) | |||
def _print_no_nl(self, text): | |||
sys.stdout.write(fill(re.sub("\s\s+", " ", text), self.WIDTH)) | |||
sys.stdout.flush() | |||
def _pause(self): | |||
raw_input(self.PROMPT + "Press enter to continue: ") | |||
def _ask(self, text, default=None, require=True): | |||
text = self.PROMPT + text | |||
if default: | |||
text += " \x1b[33m[{0}]\x1b[0m".format(default) | |||
lines = wrap(re.sub("\s\s+", " ", text), self.WIDTH) | |||
if len(lines) > 1: | |||
print "\n".join(lines[:-1]) | |||
while True: | |||
answer = raw_input(lines[-1] + " ") or default | |||
if answer or not require: | |||
return answer | |||
def _ask_bool(self, text, default=True): | |||
text = self.PROMPT + text | |||
if default: | |||
text += " \x1b[33m[Y/n]\x1b[0m" | |||
else: | |||
text += " \x1b[33m[y/N]\x1b[0m" | |||
lines = wrap(re.sub("\s\s+", " ", text), self.WIDTH) | |||
if len(lines) > 1: | |||
print "\n".join(lines[:-1]) | |||
while True: | |||
answer = raw_input(lines[-1] + " ").lower() | |||
if not answer: | |||
return default | |||
if answer.startswith("y"): | |||
return True | |||
if answer.startswith("n"): | |||
return False | |||
def _ask_pass(self, text, encrypt=True): | |||
password = getpass(self.PROMPT + text + " ") | |||
if encrypt: | |||
return self._encrypt(password) | |||
return password | |||
def _encrypt(self, password): | |||
if self._cipher: | |||
mod = len(password) % 8 | |||
if mod: | |||
password = password.ljust(len(password) + (8 - mod), "\x00") | |||
return self._cipher.encrypt(password).encode("hex") | |||
else: | |||
return password | |||
def _ask_list(self, text): | |||
print fill(re.sub("\s\s+", " ", self.PROMPT + text), self.WIDTH) | |||
print "[one item per line; blank line to end]:" | |||
result = [] | |||
while True: | |||
line = raw_input(self.PROMPT) | |||
if line: | |||
result.append(line) | |||
else: | |||
return result | |||
def _set_metadata(self): | |||
self.data["metadata"] = OrderedDict([("version", 1)]) | |||
self._print("""I can encrypt passwords stored in your config file in | |||
addition to preventing other users on your system from | |||
reading the file. Encryption is recommended if the bot | |||
is to run on a public computer like the Toolserver, but | |||
otherwise the need to enter a key everytime you start | |||
the bot may be annoying.""") | |||
if self._ask_bool("Encrypt stored passwords?"): | |||
self.data["metadata"]["encryptPasswords"] = True | |||
key = getpass(self.PROMPT + "Enter an encryption key: ") | |||
msg = "Running {0} rounds of bcrypt...".format(self.BCRYPT_ROUNDS) | |||
self._print_no_nl(msg) | |||
signature = bcrypt.hashpw(key, bcrypt.gensalt(self.BCRYPT_ROUNDS)) | |||
self.data["metadata"]["signature"] = signature | |||
self._cipher = Blowfish.new(sha256(key).digest()) | |||
print " done." | |||
else: | |||
self.data["metadata"]["encryptPasswords"] = False | |||
self._print("""The bot can temporarily store its logs in the logs/ | |||
subdirectory. Error logs are kept for a month whereas | |||
normal logs are kept for a week. If you disable this, | |||
the bot will still print logs to stdout.""") | |||
logging = self._ask_bool("Enable logging?") | |||
self.data["metadata"]["enableLogging"] = logging | |||
def _set_components(self): | |||
self._print("""The bot contains three separate components that can run | |||
independently of each other.""") | |||
self._print("""- The IRC front-end runs on a normal IRC server, like | |||
freenode, and expects users to interact with it through | |||
commands.""") | |||
self._print("""- The IRC watcher runs on a wiki recent-changes server, | |||
like irc.wikimedia.org, and listens for edits. Users | |||
cannot interact with this component. It can detect | |||
specific events and report them to "feed" channels on | |||
the front-end or start bot tasks.""") | |||
self._print("""- The wiki task scheduler runs wiki-editing bot tasks in | |||
separate threads at user-defined times through a | |||
cron-like interface. Tasks which are not scheduled can | |||
be started by the IRC watcher manually through the IRC | |||
front-end.""") | |||
frontend = self._ask_bool("Enable the IRC front-end?") | |||
watcher = self._ask_bool("Enable the IRC watcher?") | |||
scheduler = self._ask_bool("Enable the wiki task scheduler?") | |||
self.data["components"]["irc_frontend"] = frontend | |||
self.data["components"]["irc_watcher"] = watcher | |||
self.data["components"]["wiki_scheduler"] = scheduler | |||
def _login(self, kwargs): | |||
self.config.wiki._load(self.data["wiki"]) | |||
self._print_no_nl("Trying to connect to the site...") | |||
try: | |||
site = self.config.bot.wiki.add_site(**kwargs) | |||
except exceptions.APIError as exc: | |||
print " API error!" | |||
print "\x1b[31m" + exc.message + "\x1b[0m" | |||
question = "Would you like to re-enter the site information?" | |||
if self._ask_bool(question): | |||
return self._set_wiki() | |||
question = "This will cancel the setup process. Are you sure?" | |||
if self._ask_bool(question, default=False): | |||
raise exceptions.NoConfigError() | |||
return self._set_wiki() | |||
except exceptions.LoginError as exc: | |||
print " login error!" | |||
print "\x1b[31m" + exc.message + "\x1b[0m" | |||
question = "Would you like to re-enter your login information?" | |||
if self._ask_bool(question): | |||
self.data["wiki"]["username"] = self._ask("Bot username:") | |||
password = self._ask_pass("Bot password:", encrypt=False) | |||
self.data["wiki"]["password"] = password | |||
return self._login(kwargs) | |||
else: | |||
password = self.data["wiki"]["password"] | |||
question = "Would you like to re-enter the site information?" | |||
if self._ask_bool(question): | |||
return self._set_wiki() | |||
self._print("""Moving on. You can modify the login information | |||
stored in the bot's config in the future.""") | |||
self.data["wiki"]["password"] = None # Clear so we don't login | |||
self.config.wiki._load(self.data["wiki"]) | |||
self._print_no_nl("Trying to connect to the site...") | |||
site = self.config.bot.wiki.add_site(**kwargs) | |||
print " success." | |||
self.data["wiki"]["password"] = password # Reset original value | |||
else: | |||
print " success." | |||
# Remember to store the encrypted password: | |||
password = self._encrypt(self.data["wiki"]["password"]) | |||
self.data["wiki"]["password"] = password | |||
return site | |||
def _set_wiki(self): | |||
self._wmf = self._ask_bool("""Will this bot run on Wikimedia Foundation | |||
wikis, like Wikipedia?""") | |||
if self._wmf: | |||
msg = "Site project (e.g. 'wikipedia', 'wiktionary', 'wikimedia'):" | |||
self._proj = project = self._ask(msg, "wikipedia").lower() | |||
msg = "Site language code (e.g. 'en', 'fr', 'commons'):" | |||
self._lang = lang = self._ask(msg, "en").lower() | |||
kwargs = {"project": project, "lang": lang} | |||
else: | |||
msg = "Site base URL, without the script path and trailing slash;" | |||
msg += " can be protocol-insensitive (e.g. '//en.wikipedia.org'):" | |||
url = self._ask(msg) | |||
script = self._ask("Site script path:", "/w") | |||
kwargs = {"base_url": url, "script_path": script} | |||
self.data["wiki"]["username"] = self._ask("Bot username:") | |||
password = self._ask_pass("Bot password:", encrypt=False) | |||
self.data["wiki"]["password"] = password | |||
self.data["wiki"]["userAgent"] = "EarwigBot/$1 (Python/$2; https://github.com/earwig/earwigbot)" | |||
self.data["wiki"]["summary"] = "([[WP:BOT|Bot]]): $2" | |||
self.data["wiki"]["useHTTPS"] = True | |||
self.data["wiki"]["assert"] = "user" | |||
self.data["wiki"]["maxlag"] = 10 | |||
self.data["wiki"]["waitTime"] = 2 | |||
self.data["wiki"]["defaultSite"] = self._login(kwargs).name | |||
self.data["wiki"]["sql"] = {} | |||
if self._wmf: | |||
msg = "Will this bot run from the Wikimedia Toolserver?" | |||
toolserver = self._ask_bool(msg, default=False) | |||
if toolserver: | |||
args = [("host", "$1-p.rrdb.toolserver.org"), ("db", "$1_p")] | |||
self.data["wiki"]["sql"] = OrderedDict(args) | |||
self.data["wiki"]["shutoff"] = {} | |||
msg = "Would you like to enable an automatic shutoff page for the bot?" | |||
if self._ask_bool(msg): | |||
self._print("""The page title can contain two wildcards: $1 will be | |||
substituted with the bot's username, and $2 with the | |||
current task number. This can be used to implement a | |||
separate shutoff page for each task.""") | |||
page = self._ask("Page title:", "User:$1/Shutoff") | |||
msg = "Page content to indicate the bot is *not* shut off:" | |||
disabled = self._ask(msg, "run") | |||
args = [("page", page), ("disabled", disabled)] | |||
self.data["wiki"]["shutoff"] = OrderedDict(args) | |||
self.data["wiki"]["search"] = {} | |||
def _set_irc(self): | |||
if self.data["components"]["irc_frontend"]: | |||
frontend = self.data["irc"]["frontend"] = OrderedDict() | |||
msg = "Hostname of the frontend's IRC server, without 'irc://':" | |||
frontend["host"] = self._ask(msg, "irc.freenode.net") | |||
frontend["port"] = self._ask("Frontend port:", 6667) | |||
frontend["nick"] = self._ask("Frontend bot's nickname:") | |||
frontend["ident"] = self._ask("Frontend bot's ident:", | |||
frontend["nick"].lower()) | |||
question = "Frontend bot's real name (gecos):" | |||
frontend["realname"] = self._ask(question, "EarwigBot") | |||
if self._ask_bool("Should the bot identify to NickServ?"): | |||
ns_user = self._ask("NickServ username:", frontend["nick"]) | |||
ns_pass = self._ask_pass("Nickserv password:") | |||
frontend["nickservUsername"] = ns_user | |||
frontend["nickservPassword"] = ns_pass | |||
chan_question = "Frontend channels to join by default:" | |||
frontend["channels"] = self._ask_list(chan_question) | |||
self._print("""The bot keeps a database of its admins (users who | |||
can use certain sensitive commands) and owners | |||
(users who can quit the bot and modify its access | |||
list), identified by nick, ident, and/or hostname. | |||
Hostname is the most secure option since it cannot | |||
be easily spoofed. If you have a cloak, this will | |||
probably look like 'wikipedia/Username' or | |||
'unaffiliated/nickname'.""") | |||
host = self._ask("Your hostname on the frontend:", require=False) | |||
if host: | |||
permdb = self.config._permissions | |||
permdb.load() | |||
permdb.add_owner(host=host) | |||
permdb.add_admin(host=host) | |||
else: | |||
frontend = {} | |||
if self.data["components"]["irc_watcher"]: | |||
watcher = self.data["irc"]["watcher"] = OrderedDict() | |||
if self._wmf: | |||
watcher["host"] = "irc.wikimedia.org" | |||
watcher["port"] = 6667 | |||
else: | |||
msg = "Hostname of the watcher's IRC server, without 'irc://':" | |||
watcher["host"] = self._ask(msg) | |||
watcher["port"] = self._ask("Watcher port:", 6667) | |||
nick = self._ask("Watcher bot's nickname:", frontend.get("nick")) | |||
ident = self._ask("Watcher bot's ident:", nick.lower()) | |||
watcher["nick"] = nick | |||
watcher["ident"] = ident | |||
question = "Watcher bot's real name (gecos):" | |||
default = frontend.get("realname", "EarwigBot") | |||
watcher["realname"] = self._ask(question, default) | |||
watcher_ns = "Should the bot identify to NickServ?" | |||
if not self._wmf and self._ask_bool(watcher_ns): | |||
ns_user = self._ask("NickServ username:", watcher["nick"]) | |||
ns_pass = self._ask_pass("Nickserv password:") | |||
watcher["nickservUsername"] = ns_user | |||
watcher["nickservPassword"] = ns_pass | |||
if self._wmf: | |||
chan = "#{0}.{1}".format(self._lang, self._proj) | |||
watcher["channels"] = [chan] | |||
else: | |||
chan_question = "Watcher channels to join by default:" | |||
watcher["channels"] = self._ask_list(chan_question) | |||
self._print("""I am now creating a blank 'rules.py' file, which | |||
will determine how the bot handles messages received | |||
from the IRC watcher. It contains a process() | |||
function that takes a Bot object (allowing you to | |||
start tasks) and an RC object (storing the message | |||
from the watcher). See the documentation for | |||
details.""") | |||
with open(path.join(self.config.root_dir, "rules.py"), "w") as fp: | |||
fp.write(RULES_TEMPLATE) | |||
self._pause() | |||
self.data["irc"]["version"] = "EarwigBot - $1 - Python/$2 https://github.com/earwig/earwigbot" | |||
def _set_commands(self): | |||
msg = """Would you like to disable the default IRC commands? You can | |||
fine-tune which commands are disabled later on.""" | |||
if (not self.data["components"]["irc_frontend"] or | |||
self._ask_bool(msg, default=False)): | |||
self.data["commands"]["disable"] = True | |||
self._print("""I am now creating the 'commands/' directory, where you | |||
can place custom IRC commands and plugins. Creating your | |||
own commands is described in the documentation.""") | |||
mkdir(path.join(self.config.root_dir, "commands")) | |||
self._pause() | |||
def _set_tasks(self): | |||
self._print("""I am now creating the 'tasks/' directory, where you can | |||
place custom bot tasks and plugins. Creating your own | |||
tasks is described in the documentation.""") | |||
mkdir(path.join(self.config.root_dir, "tasks")) | |||
self._pause() | |||
def _set_schedule(self): | |||
self._print("""The final section of your config file, 'schedule', is a | |||
list of bot tasks to be started by the wiki scheduler. | |||
Each entry contains cron-like time quantifiers and a | |||
list of tasks. For example, the following starts the | |||
'foobot' task every hour on the half-hour:""") | |||
print "\x1b[33mschedule:" | |||
print " - minute: 30" | |||
print " tasks:" | |||
print " - foobot\x1b[0m" | |||
self._print("""The following starts the 'barbot' task with the keyword | |||
arguments 'action="baz"' every Monday at 05:00 UTC:""") | |||
print "\x1b[33m - week_day: 1" | |||
print " hour: 5" | |||
print " tasks:" | |||
print ' - ["barbot", {"action": "baz"}]\x1b[0m' | |||
self._print("""The full list of quantifiers is minute, hour, month_day, | |||
month, and week_day. See the documentation for more | |||
information.""") | |||
self._pause() | |||
def _save(self): | |||
with open(self.config.path, "w") as stream: | |||
yaml.dump(self.data, stream, OrderedDumper, indent=4, | |||
allow_unicode=True, default_flow_style=False) | |||
def make_new(self): | |||
"""Make a new config file based on the user's input.""" | |||
try: | |||
open(self.config.path, "w").close() | |||
chmod(self.config.path, stat.S_IRUSR|stat.S_IWUSR) | |||
except IOError: | |||
print "I can't seem to write to the config file:" | |||
raise | |||
self._set_metadata() | |||
self._set_components() | |||
self._set_wiki() | |||
components = self.data["components"] | |||
if components["irc_frontend"] or components["irc_watcher"]: | |||
self._set_irc() | |||
self._set_commands() | |||
self._set_tasks() | |||
if components["wiki_scheduler"]: | |||
self._set_schedule() | |||
self._print("""I am now saving config.yml with your settings. YAML is a | |||
relatively straightforward format and you should be able | |||
to update these settings in the future when necessary. | |||
I will start the bot at your signal. Feel free to | |||
contact me at wikipedia.earwig@gmail.com if you have any | |||
questions.""") | |||
self._save() | |||
if not self._ask_bool("Start the bot now?"): | |||
exit() |
@@ -0,0 +1,256 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
""" | |||
**EarwigBot: Exceptions** | |||
This module contains all exceptions used by EarwigBot:: | |||
EarwigBotError | |||
+-- NoConfigError | |||
+-- IRCError | |||
| +-- BrokenSocketError | |||
+-- WikiToolsetError | |||
+-- SiteNotFoundError | |||
+-- ServiceError | |||
| +-- APIError | |||
| +-- SQLError | |||
+-- NoServiceError | |||
+-- LoginError | |||
+-- NamespaceNotFoundError | |||
+-- PageNotFoundError | |||
+-- InvalidPageError | |||
+-- RedirectError | |||
+-- UserNotFoundError | |||
+-- EditError | |||
| +-- PermissionsError | |||
| +-- EditConflictError | |||
| +-- NoContentError | |||
| +-- ContentTooBigError | |||
| +-- SpamDetectedError | |||
| +-- FilteredError | |||
+-- CopyvioCheckError | |||
+-- UnknownSearchEngineError | |||
+-- UnsupportedSearchEngineError | |||
+-- SearchQueryError | |||
""" | |||
class EarwigBotError(Exception): | |||
"""Base exception class for errors in EarwigBot.""" | |||
class NoConfigError(EarwigBotError): | |||
"""The bot cannot be run without a config file. | |||
This occurs if no config file exists, and the user said they did not want | |||
one to be created. | |||
""" | |||
class IRCError(EarwigBotError): | |||
"""Base exception class for errors in IRC-relation sections of the bot.""" | |||
class BrokenSocketError(IRCError): | |||
"""A socket has broken, because it is not sending data. | |||
Raised by :py:meth:`IRCConnection._get | |||
<earwigbot.irc.connection.IRCConnection._get>`. | |||
""" | |||
class WikiToolsetError(EarwigBotError): | |||
"""Base exception class for errors in the Wiki Toolset.""" | |||
class SiteNotFoundError(WikiToolsetError): | |||
"""A particular site could not be found in the sites database. | |||
Raised by :py:class:`~earwigbot.wiki.sitesdb.SitesDB`. | |||
""" | |||
class ServiceError(WikiToolsetError): | |||
"""Base exception class for an error within a service (the API or SQL). | |||
This is caught by :py:meth:`Site.delegate | |||
<earwigbot.wiki.site.Site.delegate>` to indicate a service is | |||
non-functional so another, less-preferred one can be tried. | |||
""" | |||
class APIError(ServiceError): | |||
"""Couldn't connect to a site's API. | |||
Perhaps the server doesn't exist, our URL is wrong or incomplete, or | |||
there are temporary problems on their end. | |||
Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`. | |||
""" | |||
class SQLError(ServiceError): | |||
"""Some error involving SQL querying occurred. | |||
Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`. | |||
""" | |||
class NoServiceError(WikiToolsetError): | |||
"""No service is functioning to handle a specific task. | |||
Raised by :py:meth:`Site.delegate <earwigbot.wiki.site.Site.delegate>`. | |||
""" | |||
class LoginError(WikiToolsetError): | |||
"""An error occured while trying to login. | |||
Perhaps the username/password is incorrect. | |||
Raised by :py:meth:`Site._login <earwigbot.wiki.site.Site._login>`. | |||
""" | |||
class NamespaceNotFoundError(WikiToolsetError): | |||
"""A requested namespace name or namespace ID does not exist. | |||
Raised by :py:meth:`Site.namespace_id_to_name | |||
<earwigbot.wiki.site.Site.namespace_id_to_name>` and | |||
:py:meth:`Site.namespace_name_to_id | |||
<earwigbot.wiki.site.Site.namespace_name_to_id>`. | |||
""" | |||
class PageNotFoundError(WikiToolsetError): | |||
"""Attempted to get information about a page that does not exist. | |||
Raised by :py:class:`~earwigbot.wiki.page.Page`. | |||
""" | |||
class InvalidPageError(WikiToolsetError): | |||
"""Attempted to get information about a page whose title is invalid. | |||
Raised by :py:class:`~earwigbot.wiki.page.Page`. | |||
""" | |||
class RedirectError(WikiToolsetError): | |||
"""A redirect-only method was called on a malformed or non-redirect page. | |||
Raised by :py:meth:`Page.get_redirect_target | |||
<earwigbot.wiki.page.Page.get_redirect_target>`. | |||
""" | |||
class UserNotFoundError(WikiToolsetError): | |||
"""Attempted to get certain information about a user that does not exist. | |||
Raised by :py:class:`~earwigbot.wiki.user.User`. | |||
""" | |||
class EditError(WikiToolsetError): | |||
"""An error occured while editing. | |||
This is used as a base class for all editing errors; this one specifically | |||
is used only when a generic error occurs that we don't know about. | |||
Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and | |||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||
""" | |||
class PermissionsError(EditError): | |||
"""A permissions error ocurred while editing. | |||
We tried to do something we don't have permission to, like trying to delete | |||
a page as a non-admin, or trying to edit a page without login information | |||
and AssertEdit enabled. | |||
Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and | |||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||
""" | |||
class EditConflictError(EditError): | |||
"""We gotten an edit conflict or a (rarer) delete/recreate conflict. | |||
Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and | |||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||
""" | |||
class NoContentError(EditError): | |||
"""We tried to create a page or new section with no content. | |||
Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and | |||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||
""" | |||
class ContentTooBigError(EditError): | |||
"""The edit we tried to push exceeded the article size limit. | |||
Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and | |||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||
""" | |||
class SpamDetectedError(EditError): | |||
"""The spam filter refused our edit. | |||
Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and | |||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||
""" | |||
class FilteredError(EditError): | |||
"""The edit filter refused our edit. | |||
Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and | |||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||
""" | |||
class CopyvioCheckError(WikiToolsetError): | |||
"""An error occured when checking a page for copyright violations. | |||
This is a base class for multiple exceptions; usually one of those will be | |||
raised instead of this. | |||
Raised by :py:meth:`Page.copyvio_check | |||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>` and | |||
:py:meth:`Page.copyvio_compare | |||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`. | |||
""" | |||
class UnknownSearchEngineError(CopyvioCheckError): | |||
"""Attempted to do a copyvio check with an unknown search engine. | |||
Search engines are specified in :file:`config.yml` as | |||
:py:attr:`config.wiki["search"]["engine"]`. | |||
Raised by :py:meth:`Page.copyvio_check | |||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>` and | |||
:py:meth:`Page.copyvio_compare | |||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`. | |||
""" | |||
class UnsupportedSearchEngineError(CopyvioCheckError): | |||
"""Attmpted to do a copyvio check using an unavailable engine. | |||
This might occur if, for example, an engine requires oauth2 but the package | |||
couldn't be imported. | |||
Raised by :py:meth:`Page.copyvio_check | |||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>` and | |||
:py:meth:`Page.copyvio_compare | |||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`. | |||
""" | |||
class SearchQueryError(CopyvioCheckError): | |||
"""Some error ocurred while doing a search query. | |||
Raised by :py:meth:`Page.copyvio_check | |||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>` and | |||
:py:meth:`Page.copyvio_compare | |||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`. | |||
""" |
@@ -0,0 +1,27 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from earwigbot.irc.connection import * | |||
from earwigbot.irc.data import * | |||
from earwigbot.irc.frontend import * | |||
from earwigbot.irc.rc import * | |||
from earwigbot.irc.watcher import * |
@@ -0,0 +1,259 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import socket | |||
from threading import Lock | |||
from time import sleep, time | |||
from earwigbot.exceptions import BrokenSocketError | |||
__all__ = ["IRCConnection"] | |||
class IRCConnection(object): | |||
"""Interface with an IRC server.""" | |||
def __init__(self, host, port, nick, ident, realname, logger): | |||
self._host = host | |||
self._port = port | |||
self._nick = nick | |||
self._ident = ident | |||
self._realname = realname | |||
self.logger = logger | |||
self._is_running = False | |||
self._send_lock = Lock() | |||
self._last_recv = time() | |||
self._last_send = 0 | |||
self._last_ping = 0 | |||
def __repr__(self): | |||
"""Return the canonical string representation of the IRCConnection.""" | |||
res = "IRCConnection(host={0!r}, port={1!r}, nick={2!r}, ident={3!r}, realname={4!r})" | |||
return res.format(self.host, self.port, self.nick, self.ident, | |||
self.realname) | |||
def __str__(self): | |||
"""Return a nice string representation of the IRCConnection.""" | |||
res = "<IRCConnection {0}!{1} at {2}:{3}>" | |||
return res.format(self.nick, self.ident, self.host, self.port) | |||
def _connect(self): | |||
"""Connect to our IRC server.""" | |||
self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |||
try: | |||
self._sock.connect((self.host, self.port)) | |||
except socket.error: | |||
self.logger.exception("Couldn't connect to IRC server; retrying") | |||
sleep(8) | |||
self._connect() | |||
self._send("NICK {0}".format(self.nick)) | |||
self._send("USER {0} {1} * :{2}".format(self.ident, self.host, self.realname)) | |||
def _close(self): | |||
"""Completely close our connection with the IRC server.""" | |||
try: | |||
self._sock.shutdown(socket.SHUT_RDWR) # Shut down connection first | |||
except socket.error: | |||
pass # Ignore if the socket is already down | |||
self._sock.close() | |||
def _get(self, size=4096): | |||
"""Receive (i.e. get) data from the server.""" | |||
data = self._sock.recv(size) | |||
if not data: | |||
# Socket isn't giving us any data, so it is dead or broken: | |||
raise BrokenSocketError() | |||
return data | |||
def _send(self, msg, hidelog=False): | |||
"""Send data to the server.""" | |||
with self._send_lock: | |||
time_since_last = time() - self._last_send | |||
if time_since_last < 0.75: | |||
sleep(0.75 - time_since_last) | |||
try: | |||
self._sock.sendall(msg + "\r\n") | |||
except socket.error: | |||
self._is_running = False | |||
else: | |||
if not hidelog: | |||
self.logger.debug(msg) | |||
self._last_send = time() | |||
def _split(self, msgs, maxlen, maxsplits=3): | |||
"""Split a large message into multiple messages smaller than maxlen.""" | |||
words = msgs.split(" ") | |||
splits = 0 | |||
while words and splits < maxsplits: | |||
splits += 1 | |||
if len(words[0]) > maxlen: | |||
word = words.pop(0) | |||
yield word[:maxlen] | |||
words.insert(0, word[maxlen:]) | |||
else: | |||
msg = [] | |||
while words and len(" ".join(msg + [words[0]])) <= maxlen: | |||
msg.append(words.pop(0)) | |||
yield " ".join(msg) | |||
def _quit(self, msg=None): | |||
"""Issue a quit message to the server. Doesn't close the connection.""" | |||
if msg: | |||
self._send("QUIT :{0}".format(msg)) | |||
else: | |||
self._send("QUIT") | |||
def _process_defaults(self, line): | |||
"""Default process hooks for lines received on IRC.""" | |||
self._last_recv = time() | |||
if line[0] == "PING": # If we are pinged, pong back | |||
self.pong(line[1][1:]) | |||
def _process_message(self, line): | |||
"""To be overridden in subclasses.""" | |||
raise NotImplementedError() | |||
@property | |||
def host(self): | |||
"""The hostname of the IRC server, like ``"irc.freenode.net"``.""" | |||
return self._host | |||
@property | |||
def port(self): | |||
"""The port of the IRC server, like ``6667``.""" | |||
return self._port | |||
@property | |||
def nick(self): | |||
"""Our nickname on the server, like ``"EarwigBot"``.""" | |||
return self._nick | |||
@property | |||
def ident(self): | |||
"""Our ident on the server, like ``"earwig"``. | |||
See http://en.wikipedia.org/wiki/Ident. | |||
""" | |||
return self._ident | |||
@property | |||
def realname(self): | |||
"""Our realname (gecos field) on the server.""" | |||
return self._realname | |||
def say(self, target, msg, hidelog=False): | |||
"""Send a private message to a target on the server.""" | |||
for msg in self._split(msg, 400): | |||
msg = "PRIVMSG {0} :{1}".format(target, msg) | |||
self._send(msg, hidelog) | |||
def reply(self, data, msg, hidelog=False): | |||
"""Send a private message as a reply to a user on the server.""" | |||
if data.is_private: | |||
self.say(data.chan, msg, hidelog) | |||
else: | |||
msg = "\x02{0}\x0F: {1}".format(data.nick, msg) | |||
self.say(data.chan, msg, hidelog) | |||
def action(self, target, msg, hidelog=False): | |||
"""Send a private message to a target on the server as an action.""" | |||
msg = "\x01ACTION {0}\x01".format(msg) | |||
self.say(target, msg, hidelog) | |||
def notice(self, target, msg, hidelog=False): | |||
"""Send a notice to a target on the server.""" | |||
for msg in self._split(msg, 400): | |||
msg = "NOTICE {0} :{1}".format(target, msg) | |||
self._send(msg, hidelog) | |||
def join(self, chan, hidelog=False): | |||
"""Join a channel on the server.""" | |||
msg = "JOIN {0}".format(chan) | |||
self._send(msg, hidelog) | |||
def part(self, chan, msg=None, hidelog=False): | |||
"""Part from a channel on the server, optionally using an message.""" | |||
if msg: | |||
self._send("PART {0} :{1}".format(chan, msg), hidelog) | |||
else: | |||
self._send("PART {0}".format(chan), hidelog) | |||
def mode(self, target, level, msg, hidelog=False): | |||
"""Send a mode message to the server.""" | |||
msg = "MODE {0} {1} {2}".format(target, level, msg) | |||
self._send(msg, hidelog) | |||
def ping(self, target, hidelog=False): | |||
"""Ping another entity on the server.""" | |||
msg = "PING {0}".format(target) | |||
self._send(msg, hidelog) | |||
def pong(self, target, hidelog=False): | |||
"""Pong another entity on the server.""" | |||
msg = "PONG {0}".format(target) | |||
self._send(msg, hidelog) | |||
def loop(self): | |||
"""Main loop for the IRC connection.""" | |||
self._is_running = True | |||
read_buffer = "" | |||
while 1: | |||
try: | |||
read_buffer += self._get() | |||
except BrokenSocketError: | |||
self._is_running = False | |||
break | |||
lines = read_buffer.split("\n") | |||
read_buffer = lines.pop() | |||
for line in lines: | |||
line = line.strip().split() | |||
self._process_defaults(line) | |||
self._process_message(line) | |||
if self.is_stopped(): | |||
break | |||
self._close() | |||
def keep_alive(self): | |||
"""Ensure that we stay connected, stopping if the connection breaks.""" | |||
now = time() | |||
if now - self._last_recv > 120: | |||
if self._last_ping < self._last_recv: | |||
log = "Last message was received over 120 seconds ago. Pinging." | |||
self.logger.debug(log) | |||
self.ping(self.host) | |||
self._last_ping = now | |||
elif now - self._last_ping > 60: | |||
self.logger.debug("No ping response in 60 seconds. Stopping.") | |||
self.stop() | |||
def stop(self, msg=None): | |||
"""Request the IRC connection to close at earliest convenience.""" | |||
if self._is_running: | |||
self._quit(msg) | |||
self._is_running = False | |||
def is_stopped(self): | |||
"""Return whether the IRC connection has been (or is to be) closed.""" | |||
return not self._is_running |
@@ -0,0 +1,212 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import re | |||
__all__ = ["Data"] | |||
class Data(object): | |||
"""Store data from an individual line received on IRC.""" | |||
def __init__(self, bot, my_nick, line, msgtype): | |||
self._bot = bot | |||
self._my_nick = my_nick.lower() | |||
self._line = line | |||
self._is_private = self._is_command = False | |||
self._msg = self._command = self._trigger = None | |||
self._args = [] | |||
self._kwargs = {} | |||
self._parse(msgtype) | |||
def __repr__(self): | |||
"""Return the canonical string representation of the Data.""" | |||
res = "Data(bot={0!r}, my_nick={1!r}, line={2!r})" | |||
return res.format(self._bot, self.my_nick, self.line) | |||
def __str__(self): | |||
"""Return a nice string representation of the Data.""" | |||
return "<Data of {0!r}>".format(" ".join(self.line)) | |||
def _parse(self, msgtype): | |||
"""Parse a line from IRC into its components as instance attributes.""" | |||
sender = re.findall(r":(.*?)!(.*?)@(.*?)\Z", self.line[0])[0] | |||
self._nick, self._ident, self._host = sender | |||
self._chan = self.line[2] | |||
if msgtype == "PRIVMSG": | |||
if self.chan.lower() == self.my_nick: | |||
# This is a privmsg to us, so set 'chan' as the nick of the | |||
# sender instead of the 'channel', which is ourselves: | |||
self._chan = self._nick | |||
self._is_private = True | |||
self._msg = " ".join(self.line[3:])[1:] | |||
self._parse_args() | |||
self._parse_kwargs() | |||
def _parse_args(self): | |||
"""Parse command arguments from the message. | |||
self.msg is converted into the string self.command and the argument | |||
list self.args if the message starts with a "trigger" ("!", ".", or the | |||
bot's name); self.is_command will be set to True, and self.trigger will | |||
store the trigger string. Otherwise, is_command will be set to False. | |||
""" | |||
self._args = self.msg.strip().split() | |||
try: | |||
self._command = self.args.pop(0).lower() | |||
except IndexError: | |||
return | |||
if self.command.startswith("!") or self.command.startswith("."): | |||
# e.g. "!command arg1 arg2" | |||
self._is_command = True | |||
self._trigger = self.command[0] | |||
self._command = self.command[1:] # Strip the "!" or "." | |||
elif re.match(r"{0}\W*?$".format(re.escape(self.my_nick)), | |||
self.command, re.U): | |||
# e.g. "EarwigBot, command arg1 arg2" | |||
self._is_command = True | |||
self._trigger = self.my_nick | |||
try: | |||
self._command = self.args.pop(0).lower() | |||
except IndexError: | |||
self._command = "" | |||
else: | |||
try: | |||
if self.msg[-1] == "." and self.msg[-2] != ".": | |||
if self.args: | |||
self.args[-1] = self.args[-1][:-1] | |||
else: | |||
self._command = self.command[:-1] | |||
except IndexError: | |||
pass | |||
def _parse_kwargs(self): | |||
"""Parse keyword arguments embedded in self.args. | |||
Parse a command given as "!command key1=value1 key2=value2..." into a | |||
dict, self.kwargs, like {'key1': 'value2', 'key2': 'value2'...}. | |||
""" | |||
for arg in self.args: | |||
try: | |||
key, value = re.findall(r"^(.*?)\=(.*?)$", arg)[0] | |||
except IndexError: | |||
continue | |||
if key and value: | |||
self.kwargs[key] = value | |||
@property | |||
def my_nick(self): | |||
"""Our nickname, *not* the nickname of the sender.""" | |||
return self._my_nick | |||
@property | |||
def line(self): | |||
"""The full message received on IRC, including escape characters.""" | |||
return self._line | |||
@property | |||
def chan(self): | |||
"""Channel the message was sent from. | |||
This will be equal to :py:attr:`nick` if the message is a private | |||
message. | |||
""" | |||
return self._chan | |||
@property | |||
def nick(self): | |||
"""Nickname of the sender.""" | |||
return self._nick | |||
@property | |||
def ident(self): | |||
"""`Ident <http://en.wikipedia.org/wiki/Ident>`_ of the sender.""" | |||
return self._ident | |||
@property | |||
def host(self): | |||
"""Hostname of the sender.""" | |||
return self._host | |||
@property | |||
def msg(self): | |||
"""Text of the sent message, if it is a message, else ``None``.""" | |||
return self._msg | |||
@property | |||
def is_private(self): | |||
"""``True`` if this message was sent to us *only*, else ``False``.""" | |||
return self._is_private | |||
@property | |||
def is_command(self): | |||
"""Boolean telling whether or not this message is a bot command. | |||
A message is considered a command if and only if it begins with the | |||
character ``"!"``, ``"."``, or the bot's name followed by optional | |||
punctuation and a space (so ``EarwigBot: do something``, ``EarwigBot, | |||
do something``, and ``EarwigBot do something`` are all valid). | |||
""" | |||
return self._is_command | |||
@property | |||
def command(self): | |||
"""If the message is a command, this is the name of the command used. | |||
See :py:attr:`is_command <self.is_command>` for when a message is | |||
considered a command. If it's not a command, this will be set to | |||
``None``. | |||
""" | |||
return self._command | |||
@property | |||
def trigger(self): | |||
"""If this message is a command, this is what triggered it. | |||
It can be either "!" (``"!help"``), "." (``".help"``), or the bot's | |||
name (``"EarwigBot: help"``). Otherwise, it will be ``None``.""" | |||
return self._trigger | |||
@property | |||
def args(self): | |||
"""List of all arguments given to this command. | |||
For example, the message ``"!command arg1 arg2 arg3=val3"`` will | |||
produce the args ``["arg1", "arg2", "arg3=val3"]``. This is empty if | |||
the message was not a command or if it doesn't have arguments. | |||
""" | |||
return self._args | |||
@property | |||
def kwargs(self): | |||
"""Dictionary of keyword arguments given to this command. | |||
For example, the message ``"!command arg1=val1 arg2=val2"`` will | |||
produce the kwargs ``{"arg1": "val1", "arg2": "val2"}``. This is empty | |||
if the message was not a command or if it doesn't have keyword | |||
arguments. | |||
""" | |||
return self._kwargs |
@@ -0,0 +1,86 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from earwigbot.irc import IRCConnection, Data | |||
__all__ = ["Frontend"] | |||
class Frontend(IRCConnection): | |||
""" | |||
**EarwigBot: IRC Frontend Component** | |||
The IRC frontend runs on a normal IRC server and expects users to interact | |||
with it and give it commands. Commands are stored as "command classes", | |||
subclasses of :py:class:`~earwigbot.commands.Command`. All command classes | |||
are automatically imported by :py:meth:`commands.load() | |||
<earwigbot.managers._ResourceManager.load>` if they are in | |||
:py:mod:`earwigbot.commands` or the bot's custom command directory | |||
(explained in the :doc:`documentation </customizing>`). | |||
""" | |||
def __init__(self, bot): | |||
self.bot = bot | |||
cf = bot.config.irc["frontend"] | |||
base = super(Frontend, self) | |||
base.__init__(cf["host"], cf["port"], cf["nick"], cf["ident"], | |||
cf["realname"], bot.logger.getChild("frontend")) | |||
self._connect() | |||
def __repr__(self): | |||
"""Return the canonical string representation of the Frontend.""" | |||
res = "Frontend(host={0!r}, port={1!r}, nick={2!r}, ident={3!r}, realname={4!r}, bot={5!r})" | |||
return res.format(self.host, self.port, self.nick, self.ident, | |||
self.realname, self.bot) | |||
def __str__(self): | |||
"""Return a nice string representation of the Frontend.""" | |||
res = "<Frontend {0}!{1} at {2}:{3}>" | |||
return res.format(self.nick, self.ident, self.host, self.port) | |||
def _process_message(self, line): | |||
"""Process a single message from IRC.""" | |||
if line[1] == "JOIN": | |||
data = Data(self.bot, self.nick, line, msgtype="JOIN") | |||
self.bot.commands.call("join", data) | |||
elif line[1] == "PRIVMSG": | |||
data = Data(self.bot, self.nick, line, msgtype="PRIVMSG") | |||
if data.is_private: | |||
self.bot.commands.call("msg_private", data) | |||
else: | |||
self.bot.commands.call("msg_public", data) | |||
self.bot.commands.call("msg", data) | |||
elif line[1] == "376": # On successful connection to the server | |||
# If we're supposed to auth to NickServ, do that: | |||
try: | |||
username = self.bot.config.irc["frontend"]["nickservUsername"] | |||
password = self.bot.config.irc["frontend"]["nickservPassword"] | |||
except KeyError: | |||
pass | |||
else: | |||
msg = "IDENTIFY {0} {1}".format(username, password) | |||
self.say("NickServ", msg, hidelog=True) | |||
# Join all of our startup channels: | |||
for chan in self.bot.config.irc["frontend"]["channels"]: | |||
self.join(chan) |
@@ -0,0 +1,96 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import re | |||
__all__ = ["RC"] | |||
class RC(object): | |||
"""Store data from an event received from our IRC watcher.""" | |||
re_color = re.compile("\x03([0-9]{1,2}(,[0-9]{1,2})?)?") | |||
re_edit = re.compile("\A\[\[(.*?)\]\]\s(.*?)\s(http://.*?)\s\*\s(.*?)\s\*\s(.*?)\Z") | |||
re_log = re.compile("\A\[\[(.*?)\]\]\s(.*?)\s\s\*\s(.*?)\s\*\s(.*?)\Z") | |||
pretty_edit = "\x02New {0}\x0F: \x0314[[\x0307{1}\x0314]]\x0306 * \x0303{2}\x0306 * \x0302{3}\x0306 * \x0310{4}" | |||
pretty_log = "\x02New {0}\x0F: \x0303{1}\x0306 * \x0302{2}\x0306 * \x0310{3}" | |||
def __init__(self, chan, msg): | |||
self.chan = chan | |||
self.msg = msg | |||
def __repr__(self): | |||
"""Return the canonical string representation of the RC.""" | |||
return "RC(chan={0!r}, msg={1!r})".format(self.chan, self.msg) | |||
def __str__(self): | |||
"""Return a nice string representation of the RC.""" | |||
return "<RC of {0!r} on {1}>".format(self.msg, self.chan) | |||
def parse(self): | |||
"""Parse a recent change event into some variables.""" | |||
# Strip IRC color codes; we don't want or need 'em: | |||
self.msg = self.re_color.sub("", self.msg).strip() | |||
msg = self.msg | |||
self.is_edit = True | |||
# Flags: 'M' for minor edit, 'B' for bot edit, 'create' for a user | |||
# creation log entry, etc: | |||
try: | |||
page, self.flags, url, user, comment = self.re_edit.findall(msg)[0] | |||
except IndexError: | |||
# We're probably missing the http:// part, because it's a log | |||
# entry, which lacks a URL: | |||
page, flags, user, comment = self.re_log.findall(msg)[0] | |||
url = "http://{0}.org/wiki/{1}".format(self.chan[1:], page) | |||
self.is_edit = False # This is a log entry, not edit | |||
# Flags tends to have extra whitespace at the end when they're | |||
# log entries: | |||
self.flags = flags.strip() | |||
self.page, self.url, self.user, self.comment = page, url, user, comment | |||
def prettify(self): | |||
"""Make a nice, colorful message to send back to the IRC front-end.""" | |||
flags = self.flags | |||
if self.is_edit: | |||
if "N" in flags: | |||
event = "page" # "New page:" | |||
else: | |||
event = "edit" # "New edit:" | |||
if "B" in flags: | |||
event = "bot edit" # "New bot edit:" | |||
if "M" in flags: | |||
event = "minor " + event # "New minor (bot)? edit:" | |||
return self.pretty_edit.format(event, self.page, self.user, | |||
self.url, self.comment) | |||
if flags == "delete": | |||
event = "deletion" # "New deletion:" | |||
elif flags == "protect": | |||
event = "protection" # "New protection:" | |||
elif flags == "create": | |||
event = "user" # "New user:" | |||
else: | |||
event = flags # Works for "move", "block", etc | |||
return self.pretty_log.format(event, self.user, self.url, self.comment) |
@@ -0,0 +1,129 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import imp | |||
import os | |||
from earwigbot.irc import IRCConnection, RC | |||
__all__ = ["Watcher"] | |||
class Watcher(IRCConnection): | |||
""" | |||
**EarwigBot: IRC Watcher Component** | |||
The IRC watcher runs on a wiki recent-changes server and listens for | |||
edits. Users cannot interact with this part of the bot. When an event | |||
occurs, we run it through some rules stored in our working directory under | |||
:file:`rules.py`, which can result in wiki bot tasks being started or | |||
messages being sent to channels on the IRC frontend. | |||
""" | |||
def __init__(self, bot): | |||
self.bot = bot | |||
cf = bot.config.irc["watcher"] | |||
base = super(Watcher, self) | |||
base.__init__(cf["host"], cf["port"], cf["nick"], cf["ident"], | |||
cf["realname"], bot.logger.getChild("watcher")) | |||
self._prepare_process_hook() | |||
self._connect() | |||
def __repr__(self): | |||
"""Return the canonical string representation of the Watcher.""" | |||
res = "Watcher(host={0!r}, port={1!r}, nick={2!r}, ident={3!r}, realname={4!r}, bot={5!r})" | |||
return res.format(self.host, self.port, self.nick, self.ident, | |||
self.realname, self.bot) | |||
def __str__(self): | |||
"""Return a nice string representation of the Watcher.""" | |||
res = "<Watcher {0}!{1} at {2}:{3}>" | |||
return res.format(self.nick, self.ident, self.host, self.port) | |||
def _process_message(self, line): | |||
"""Process a single message from IRC.""" | |||
if line[1] == "PRIVMSG": | |||
chan = line[2] | |||
# Ignore messages originating from channels not in our list, to | |||
# prevent someone PMing us false data: | |||
if chan not in self.bot.config.irc["watcher"]["channels"]: | |||
return | |||
msg = " ".join(line[3:])[1:] | |||
rc = RC(chan, msg) # New RC object to store this event's data | |||
rc.parse() # Parse a message into pagenames, usernames, etc. | |||
self._process_rc_event(rc) | |||
# When we've finished starting up, join all watcher channels: | |||
elif line[1] == "376": | |||
for chan in self.bot.config.irc["watcher"]["channels"]: | |||
self.join(chan) | |||
def _prepare_process_hook(self): | |||
"""Create our RC event process hook from information in rules.py. | |||
This will get put in the function self._process_hook, which takes the | |||
Bot object and an RC object and returns a list of frontend channels to | |||
report this event to. | |||
""" | |||
# Set a default RC process hook that does nothing: | |||
self._process_hook = lambda bot, rc: () | |||
path = self.bot.config.root_dir | |||
try: | |||
f, path, desc = imp.find_module("rules", [path]) | |||
except ImportError: | |||
return | |||
try: | |||
module = imp.load_module("rules", f, path, desc) | |||
except Exception: | |||
return | |||
finally: | |||
f.close() | |||
self._process_hook_module = module | |||
try: | |||
self._process_hook = module.process | |||
except AttributeError: | |||
e = "RC event rules imported correctly, but no process(bot, rc) function was found" | |||
self.logger.error(e) | |||
return | |||
def _process_rc_event(self, rc): | |||
"""Process a recent change event from IRC (or, an RC object). | |||
The actual processing is configurable, so we don't have that hard-coded | |||
here. We simply call our process hook (self._process_hook), created by | |||
self._prepare_process_hook() from information in the "rules" section of | |||
our config. | |||
""" | |||
chans = self._process_hook(self.bot, rc) | |||
with self.bot.component_lock: | |||
frontend = self.bot.frontend | |||
if chans and frontend and not frontend.is_stopped(): | |||
pretty = rc.prettify() | |||
if len(pretty) > 400: | |||
msg = pretty[:397] + "..." | |||
else: | |||
msg = pretty[:400] | |||
for chan in chans: | |||
frontend.say(chan, msg) |
@@ -0,0 +1,81 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
""" | |||
Implements a hierarchy of importing classes as defined in PEP 302 to load | |||
modules in a safe yet lazy manner. | |||
""" | |||
from imp import acquire_lock, release_lock | |||
import sys | |||
from types import ModuleType | |||
__all__ = ["LazyImporter"] | |||
def _getattribute(self, attr): | |||
_load(self) | |||
return self.__getattribute__(attr) | |||
def _setattr(self, attr, value): | |||
_load(self) | |||
self.__setattr__(attr, value) | |||
def _load(self): | |||
type(self).__getattribute__ = ModuleType.__getattribute__ | |||
type(self).__setattr__ = ModuleType.__setattr__ | |||
reload(self) | |||
class _LazyModule(type): | |||
def __new__(cls, name): | |||
acquire_lock() | |||
try: | |||
if name not in sys.modules: | |||
attributes = { | |||
"__name__": name, | |||
"__getattribute__": _getattribute, | |||
"__setattr__": _setattr | |||
} | |||
parents = (ModuleType,) | |||
klass = type.__new__(cls, "module", parents, attributes) | |||
sys.modules[name] = klass(name) | |||
return sys.modules[name] | |||
finally: | |||
release_lock() | |||
class LazyImporter(object): | |||
def __init__(self): | |||
self._modules = {} | |||
sys.meta_path.append(self) | |||
def new(self, name): | |||
module = _LazyModule(name) | |||
self._modules[name] = module | |||
return module | |||
def find_module(self, fullname, path=None): | |||
if fullname in self._modules and fullname not in sys.modules: | |||
return self | |||
def load_module(self, fullname): | |||
return self._modules.pop(fullname) |
@@ -0,0 +1,269 @@ | |||
#! /usr/bin/env python | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import imp | |||
from os import listdir, path | |||
from re import sub | |||
from threading import RLock, Thread | |||
from time import gmtime, strftime | |||
from earwigbot.commands import Command | |||
from earwigbot.tasks import Task | |||
__all__ = ["CommandManager", "TaskManager"] | |||
class _ResourceManager(object): | |||
""" | |||
**EarwigBot: Resource Manager** | |||
Resources are essentially objects dynamically loaded by the bot, both | |||
packaged with it (built-in resources) and created by users (plugins, aka | |||
custom resources). Currently, the only two types of resources are IRC | |||
commands and bot tasks. These are both loaded from two locations: the | |||
:py:mod:`earwigbot.commands` and :py:mod:`earwigbot.tasks packages`, and | |||
the :file:`commands/` and :file:`tasks/` directories within the bot's | |||
working directory. | |||
This class handles the low-level tasks of (re)loading resources via | |||
:py:meth:`load`, retrieving specific resources via :py:meth:`get`, and | |||
iterating over all resources via :py:meth:`__iter__`. | |||
""" | |||
def __init__(self, bot, name, base): | |||
self.bot = bot | |||
self.logger = bot.logger.getChild(name) | |||
self._resources = {} | |||
self._resource_name = name # e.g. "commands" or "tasks" | |||
self._resource_base = base # e.g. Command or Task | |||
self._resource_access_lock = RLock() | |||
def __repr__(self): | |||
"""Return the canonical string representation of the manager.""" | |||
res = "{0}(bot={1!r}, name={2!r}, base={3!r})" | |||
return res.format(self.__class__.__name__, self.bot, | |||
self._resource_name, self._resource_base) | |||
def __str__(self): | |||
"""Return a nice string representation of the manager.""" | |||
return "<{0} of {1}>".format(self.__class__.__name__, self.bot) | |||
def __iter__(self): | |||
with self.lock: | |||
for resource in self._resources.itervalues(): | |||
yield resource | |||
def _load_resource(self, name, path, klass): | |||
"""Instantiate a resource class and add it to the dictionary.""" | |||
res_type = self._resource_name[:-1] # e.g. "command" or "task" | |||
if hasattr(klass, "name"): | |||
res_config = getattr(self.bot.config, self._resource_name) | |||
if getattr(klass, "name") in res_config.get("disable", []): | |||
log = "Skipping disabled {0} {1}" | |||
self.logger.debug(log.format(res_type, getattr(klass, "name"))) | |||
return | |||
try: | |||
resource = klass(self.bot) # Create instance of resource | |||
except Exception: | |||
e = "Error instantiating {0} class in '{1}' (from {2})" | |||
self.logger.exception(e.format(res_type, name, path)) | |||
else: | |||
self._resources[resource.name] = resource | |||
self.logger.debug("Loaded {0} {1}".format(res_type, resource.name)) | |||
def _load_module(self, name, path): | |||
"""Load a specific resource from a module, identified by name and path. | |||
We'll first try to import it using imp magic, and if that works, make | |||
instances of any classes inside that are subclasses of the base | |||
(:py:attr:`self._resource_base <_resource_base>`), add them to the | |||
resources dictionary with :py:meth:`self._load_resource() | |||
<_load_resource>`, and finally log the addition. Any problems along | |||
the way will either be ignored or logged. | |||
""" | |||
f, path, desc = imp.find_module(name, [path]) | |||
try: | |||
module = imp.load_module(name, f, path, desc) | |||
except Exception: | |||
e = "Couldn't load module '{0}' (from {1})" | |||
self.logger.exception(e.format(name, path)) | |||
return | |||
finally: | |||
f.close() | |||
for obj in vars(module).values(): | |||
if type(obj) is type: | |||
isresource = issubclass(obj, self._resource_base) | |||
if isresource and not obj is self._resource_base: | |||
self._load_resource(name, path, obj) | |||
def _load_directory(self, dir): | |||
"""Load all valid resources in a given directory.""" | |||
self.logger.debug("Loading directory {0}".format(dir)) | |||
res_config = getattr(self.bot.config, self._resource_name) | |||
disabled = res_config.get("disable", []) | |||
processed = [] | |||
for name in listdir(dir): | |||
if not name.endswith(".py") and not name.endswith(".pyc"): | |||
continue | |||
if name.startswith("_") or name.startswith("."): | |||
continue | |||
modname = sub("\.pyc?$", "", name) # Remove extension | |||
if modname in disabled: | |||
log = "Skipping disabled module {0}".format(modname) | |||
self.logger.debug(log) | |||
continue | |||
if modname not in processed: | |||
self._load_module(modname, dir) | |||
processed.append(modname) | |||
@property | |||
def lock(self): | |||
"""The resource access/modify lock.""" | |||
return self._resource_access_lock | |||
def load(self): | |||
"""Load (or reload) all valid resources into :py:attr:`_resources`.""" | |||
name = self._resource_name # e.g. "commands" or "tasks" | |||
with self.lock: | |||
self._resources.clear() | |||
builtin_dir = path.join(path.dirname(__file__), name) | |||
plugins_dir = path.join(self.bot.config.root_dir, name) | |||
if getattr(self.bot.config, name).get("disable") is True: | |||
log = "Skipping disabled builtins directory: {0}" | |||
self.logger.debug(log.format(builtin_dir)) | |||
else: | |||
self._load_directory(builtin_dir) # Built-in resources | |||
if path.exists(plugins_dir) and path.isdir(plugins_dir): | |||
self._load_directory(plugins_dir) # Custom resources, plugins | |||
else: | |||
log = "Skipping nonexistent plugins directory: {0}" | |||
self.logger.debug(log.format(plugins_dir)) | |||
if self._resources: | |||
msg = "Loaded {0} {1}: {2}" | |||
resources = ", ".join(self._resources.keys()) | |||
self.logger.info(msg.format(len(self._resources), name, resources)) | |||
else: | |||
self.logger.info("Loaded 0 {0}".format(name)) | |||
def get(self, key): | |||
"""Return the class instance associated with a certain resource. | |||
Will raise :py:exc:`KeyError` if the resource (a command or task) is | |||
not found. | |||
""" | |||
with self.lock: | |||
return self._resources[key] | |||
class CommandManager(_ResourceManager): | |||
""" | |||
Manages (i.e., loads, reloads, and calls) IRC commands. | |||
""" | |||
def __init__(self, bot): | |||
super(CommandManager, self).__init__(bot, "commands", Command) | |||
def _wrap_check(self, command, data): | |||
"""Check whether a command should be called, catching errors.""" | |||
try: | |||
return command.check(data) | |||
except Exception: | |||
e = "Error checking command '{0}' with data: {1}:" | |||
self.logger.exception(e.format(command.name, data)) | |||
def _wrap_process(self, command, data): | |||
"""process() the message, catching and reporting any errors.""" | |||
try: | |||
command.process(data) | |||
except Exception: | |||
e = "Error executing command '{0}':" | |||
self.logger.exception(e.format(command.name)) | |||
def call(self, hook, data): | |||
"""Respond to a hook type and a :py:class:`Data` object.""" | |||
for command in self: | |||
if hook in command.hooks and self._wrap_check(command, data): | |||
thread = Thread(target=self._wrap_process, | |||
args=(command, data)) | |||
start_time = strftime("%b %d %H:%M:%S") | |||
thread.name = "irc:{0} ({1})".format(command.name, start_time) | |||
thread.daemon = True | |||
thread.start() | |||
return | |||
class TaskManager(_ResourceManager): | |||
""" | |||
Manages (i.e., loads, reloads, schedules, and runs) wiki bot tasks. | |||
""" | |||
def __init__(self, bot): | |||
super(TaskManager, self).__init__(bot, "tasks", Task) | |||
def _wrapper(self, task, **kwargs): | |||
"""Wrapper for task classes: run the task and catch any errors.""" | |||
try: | |||
task.run(**kwargs) | |||
except Exception: | |||
msg = "Task '{0}' raised an exception and had to stop:" | |||
self.logger.exception(msg.format(task.name)) | |||
else: | |||
msg = "Task '{0}' finished successfully" | |||
self.logger.info(msg.format(task.name)) | |||
def start(self, task_name, **kwargs): | |||
"""Start a given task in a new daemon thread, and return the thread. | |||
kwargs are passed to :py:meth:`task.run() <earwigbot.tasks.Task.run>`. | |||
If the task is not found, ``None`` will be returned and an error will | |||
be logged. | |||
""" | |||
msg = "Starting task '{0}' in a new thread" | |||
self.logger.info(msg.format(task_name)) | |||
try: | |||
task = self.get(task_name) | |||
except KeyError: | |||
e = "Couldn't find task '{0}'" | |||
self.logger.error(e.format(task_name)) | |||
return | |||
task_thread = Thread(target=self._wrapper, args=(task,), kwargs=kwargs) | |||
start_time = strftime("%b %d %H:%M:%S") | |||
task_thread.name = "{0} ({1})".format(task_name, start_time) | |||
task_thread.daemon = True | |||
task_thread.start() | |||
return task_thread | |||
def schedule(self, now=None): | |||
"""Start all tasks that are supposed to be run at a given time.""" | |||
if not now: | |||
now = gmtime() | |||
# Get list of tasks to run this turn: | |||
tasks = self.bot.config.schedule(now.tm_min, now.tm_hour, now.tm_mday, | |||
now.tm_mon, now.tm_wday) | |||
for task in tasks: | |||
if isinstance(task, list): # They've specified kwargs, | |||
self.start(task[0], **task[1]) # so pass those to start | |||
else: # Otherwise, just pass task_name | |||
self.start(task) |
@@ -0,0 +1,143 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from earwigbot import exceptions | |||
from earwigbot import wiki | |||
__all__ = ["Task"] | |||
class Task(object): | |||
""" | |||
**EarwigBot: Base Bot Task** | |||
This package provides built-in wiki bot "tasks" EarwigBot runs. Additional | |||
tasks can be installed as plugins in the bot's working directory. | |||
This class (import with ``from earwigbot.tasks import Task``) can be | |||
subclassed to create custom bot tasks. | |||
To run a task, use :py:meth:`bot.tasks.start(name, **kwargs) | |||
<earwigbot.managers.TaskManager.start>`. ``**kwargs`` get passed to the | |||
Task's :meth:`run` method. | |||
""" | |||
name = None | |||
number = 0 | |||
def __init__(self, bot): | |||
"""Constructor for new tasks. | |||
This is called once immediately after the task class is loaded by | |||
the task manager (in :py:meth:`tasks.load() | |||
<earwigbot.managers._ResourceManager.load>`). Don't override this | |||
directly; if you do, remember to place ``super(Task, self).__init()`` | |||
first. Use :py:meth:`setup` for typical task-init/setup needs. | |||
""" | |||
self.bot = bot | |||
self.config = bot.config | |||
self.logger = bot.tasks.logger.getChild(self.name) | |||
self.setup() | |||
def __repr__(self): | |||
"""Return the canonical string representation of the Task.""" | |||
res = "Task(name={0!r}, number={1!r}, bot={2!r})" | |||
return res.format(self.name, self.number, self.bot) | |||
def __str__(self): | |||
"""Return a nice string representation of the Task.""" | |||
res = "<Task {0} ({1}) of {2}>" | |||
return res.format(self.name, self.number, self.bot) | |||
def setup(self): | |||
"""Hook called immediately after the task is loaded. | |||
Does nothing by default; feel free to override. | |||
""" | |||
pass | |||
def run(self, **kwargs): | |||
"""Main entry point to run a given task. | |||
This is called directly by :py:meth:`tasks.start() | |||
<earwigbot.managers.TaskManager.start>` and is the main way to make a | |||
task do stuff. *kwargs* will be any keyword arguments passed to | |||
:py:meth:`~earwigbot.managers.TaskManager.start`, which are entirely | |||
optional. | |||
""" | |||
pass | |||
def make_summary(self, comment): | |||
"""Make an edit summary by filling in variables in a config value. | |||
:py:attr:`config.wiki["summary"] <earwigbot.config.BotConfig.wiki>` is | |||
used, where ``$2`` is replaced by the main summary body, given by the | |||
*comment* argument, and ``$1`` is replaced by the task number. | |||
If the config value is not found, we'll just return *comment* as-is. | |||
""" | |||
try: | |||
summary = self.bot.config.wiki["summary"] | |||
except KeyError: | |||
return comment | |||
return summary.replace("$1", str(self.number)).replace("$2", comment) | |||
def shutoff_enabled(self, site=None): | |||
"""Return whether on-wiki shutoff is enabled for this task. | |||
We check a certain page for certain content. This is determined by | |||
our config file: :py:attr:`config.wiki["shutoff"]["page"] | |||
<earwigbot.config.BotConfig.wiki>` is used as the title, with any | |||
embedded ``$1`` replaced by our username and ``$2`` replaced by the | |||
task number; and :py:attr:`config.wiki["shutoff"]["disabled"] | |||
<earwigbot.config.BotConfig.wiki>` is used as the content. | |||
If the page has that exact content or the page does not exist, then | |||
shutoff is "disabled", meaning the bot is supposed to run normally, and | |||
we return ``False``. If the page's content is something other than | |||
what we expect, shutoff is enabled, and we return ``True``. | |||
If a site is not provided, we'll try to use :py:attr:`self.site <site>` | |||
if it's set. Otherwise, we'll use our default site. | |||
""" | |||
if not site: | |||
if hasattr(self, "site"): | |||
site = getattr(self, "site") | |||
else: | |||
site = self.bot.wiki.get_site() | |||
try: | |||
cfg = self.config.wiki["shutoff"] | |||
except KeyError: | |||
return False | |||
title = cfg.get("page", "User:$1/Shutoff/Task $2") | |||
username = site.get_user().name | |||
title = title.replace("$1", username).replace("$2", str(self.number)) | |||
page = site.get_page(title) | |||
try: | |||
content = page.get() | |||
except exceptions.PageNotFoundError: | |||
return False | |||
if content == cfg.get("disabled", "run"): | |||
return False | |||
self.logger.warn("Emergency task shutoff has been enabled!") | |||
return True |
@@ -0,0 +1,329 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import re | |||
from earwigbot import exceptions | |||
from earwigbot.tasks import Task | |||
from earwigbot.wiki import constants | |||
class WikiProjectTagger(Task): | |||
"""A task to tag talk pages with WikiProject banners. | |||
Usage: :command:`earwigbot -t wikiproject_tagger PATH | |||
--banner BANNER (--category CAT | --file FILE) [--summary SUM] | |||
[--append TEXT] [--autoassess] [--nocreate] [--recursive NUM] | |||
[--site SITE]` | |||
.. glossary:: | |||
``--banner BANNER`` | |||
the page name of the banner to add, without a namespace (unless the | |||
namespace is something other than ``Template``) so | |||
``--banner WikiProject Biography`` for ``{{WikiProject Biography}}`` | |||
``--category CAT`` or ``--file FILE`` | |||
determines which pages to tag; either all pages in a category (to | |||
include subcategories as well, see ``--recursive``) or all | |||
pages/categories in a file (utf-8 encoded and path relative to the | |||
current directory) | |||
``--summary SUM`` | |||
an optional edit summary to use; defaults to | |||
``"Adding WikiProject banner {{BANNER}}."`` | |||
``--append TEXT`` | |||
optional text to append to the banner (after an autoassessment, if | |||
any), like ``|importance=low`` | |||
``--autoassess`` | |||
try to assess each article's class automatically based on the class of | |||
other banners on the same page | |||
``--nocreate`` | |||
don't create new talk pages with just a banner if the page doesn't | |||
already exist | |||
``--recursive NUM`` | |||
recursively go through subcategories up to a maximum depth of ``NUM``, | |||
or if ``NUM`` isn't provided, go infinitely (this can be dangerous) | |||
``--site SITE`` | |||
the ID of the site to tag pages on, defaulting to the... default site | |||
""" | |||
name = "wikiproject_tagger" | |||
# Regexes for template names that should always go above the banner, based | |||
# on [[Wikipedia:Talk page layout]]: | |||
TOP_TEMPS = [ | |||
r"skip ?to ?(toc|talk|toctalk)$", | |||
r"ga ?nominee$", | |||
r"(user ?)?talk ?(header|page|page ?header)$", | |||
r"community ?article ?probation$", | |||
r"censor(-nudity)?$", | |||
r"blp(o| ?others?)?$", | |||
r"controvers(ial2?|y)$", | |||
r"(not ?(a ?)?)?forum$", | |||
r"tv(episode|series)talk$", | |||
r"recurring ?themes$", | |||
r"faq$", | |||
r"(round ?in ?)?circ(les|ular)$", | |||
r"ar(ti|it)cle ?(history|milestones)$", | |||
r"failed ?ga$", | |||
r"old ?prod( ?full)?$", | |||
r"(old|previous) ?afd$", | |||
r"((wikiproject|wp) ?)?bio(graph(y|ies))?$", | |||
] | |||
def _upperfirst(self, text): | |||
"""Try to uppercase the first letter of a string.""" | |||
try: | |||
return text[0].upper() + text[1:] | |||
except IndexError: | |||
return text | |||
def run(self, **kwargs): | |||
"""Main entry point for the bot task.""" | |||
if "file" not in kwargs and "category" not in kwargs: | |||
log = "No pages to tag; I need either a 'category' or a 'file' passed as kwargs" | |||
self.logger.error(log) | |||
return | |||
if "banner" not in kwargs: | |||
log = "Needs a banner to add passed as the 'banner' kwarg" | |||
self.logger.error(log) | |||
return | |||
site = self.bot.wiki.get_site(name=kwargs.get("site")) | |||
banner = kwargs["banner"] | |||
summary = kwargs.get("summary", "Adding WikiProject banner $3.") | |||
append = kwargs.get("append") | |||
autoassess = kwargs.get("autoassess", False) | |||
nocreate = kwargs.get("nocreate", False) | |||
recursive = kwargs.get("recursive", 0) | |||
banner, names = self.get_names(site, banner) | |||
if not names: | |||
return | |||
job = _Job(banner, names, summary, append, autoassess, nocreate) | |||
try: | |||
self.run_job(kwargs, site, job, recursive) | |||
except _ShutoffEnabled: | |||
return | |||
def run_job(self, kwargs, site, job, recursive): | |||
"""Run a tagging *job* on a given *site*.""" | |||
if "category" in kwargs: | |||
title = kwargs["category"] | |||
title = self.guess_namespace(site, title, constants.NS_CATEGORY) | |||
self.process_category(site.get_page(title), job, recursive) | |||
if "file" in kwargs: | |||
with open(kwargs["file"], "r") as fileobj: | |||
for line in fileobj: | |||
if line.strip(): | |||
line = line.decode("utf8") | |||
if line.startswith("[[") and line.endswith("]]"): | |||
line = line[2:-2] | |||
page = site.get_page(line) | |||
if page.namespace == constants.NS_CATEGORY: | |||
self.process_category(page, job, recursive) | |||
else: | |||
self.process_page(page, job) | |||
def guess_namespace(self, site, title, assumed): | |||
"""If the given *title* does not have an explicit namespace, guess it. | |||
For example, when transcluding templates, the namespace is guessed to | |||
be ``NS_TEMPLATE`` unless one is explicitly declared (so ``{{foo}}`` -> | |||
``[[Template:Foo]]``, but ``{{:foo}}`` -> ``[[Foo]]``). | |||
""" | |||
prefix = title.split(":", 1)[0] | |||
if prefix == title: | |||
return u":".join((site.namespace_id_to_name(assumed), title)) | |||
try: | |||
site.namespace_name_to_id(prefix) | |||
except exceptions.NamespaceNotFoundError: | |||
return u":".join((site.namespace_id_to_name(assumed), title)) | |||
return title | |||
def get_names(self, site, banner): | |||
"""Return all possible aliases for a given *banner* template.""" | |||
title = self.guess_namespace(site, banner, constants.NS_TEMPLATE) | |||
if title == banner: | |||
banner = banner.split(":", 1)[1] | |||
page = site.get_page(title) | |||
if page.exists != page.PAGE_EXISTS: | |||
self.logger.error(u"Banner [[{0}]] does not exist".format(title)) | |||
return banner, None | |||
if banner == title: | |||
names = [self._upperfirst(banner)] | |||
else: | |||
names = [self._upperfirst(banner), self._upperfirst(title)] | |||
result = site.api_query(action="query", list="backlinks", bllimit=500, | |||
blfilterredir="redirects", bltitle=title) | |||
for backlink in result["query"]["backlinks"]: | |||
names.append(backlink["title"]) | |||
if backlink["ns"] == constants.NS_TEMPLATE: | |||
names.append(backlink["title"].split(":", 1)[1]) | |||
log = u"Found {0} aliases for banner [[{1}]]".format(len(names), title) | |||
self.logger.debug(log) | |||
return banner, names | |||
def process_category(self, page, job, recursive): | |||
"""Try to tag all pages in the given category.""" | |||
self.logger.info(u"Processing category: [[{0]]".format(page.title)) | |||
for member in page.get_members(): | |||
if member.namespace == constants.NS_CATEGORY: | |||
if recursive is True: | |||
self.process_category(member, job, True) | |||
elif recursive: | |||
self.process_category(member, job, recursive - 1) | |||
else: | |||
self.process_page(member, job) | |||
def process_page(self, page, job): | |||
"""Try to tag a specific *page* using the *job* description.""" | |||
if job.counter % 10 == 0: # Do a shutoff check every ten pages | |||
if self.shutoff_enabled(page.site): | |||
raise _ShutoffEnabled() | |||
job.counter += 1 | |||
if not page.is_talkpage: | |||
page = page.toggle_talk() | |||
try: | |||
code = page.parse() | |||
except exceptions.PageNotFoundError: | |||
if job.nocreate: | |||
log = u"Skipping nonexistent page: [[{0}]]".format(page.title) | |||
self.logger.info(log) | |||
else: | |||
log = u"Tagging new page: [[{0}]]".format(page.title) | |||
self.logger.info(log) | |||
banner = "{{" + job.banner + job.append + "}}" | |||
summary = job.summary.replace("$3", banner) | |||
page.edit(banner, self.make_summary(summary)) | |||
return | |||
except exceptions.InvalidPageError: | |||
log = u"Skipping invalid page: [[{0}]]".format(page.title) | |||
self.logger.error(log) | |||
return | |||
for template in code.ifilter_templates(recursive=True): | |||
name = self._upperfirst(template.name.strip()) | |||
if name in job.names: | |||
log = u"Skipping page: [[{0}]]; already tagged with '{1}'" | |||
self.logger.info(log.format(page.title, name)) | |||
return | |||
banner = self.make_banner(job, code) | |||
shell = self.get_banner_shell(code) | |||
if shell: | |||
if shell.has_param(1): | |||
shell.get(1).value.insert(0, banner + "\n") | |||
else: | |||
shell.add(1, banner) | |||
else: | |||
self.add_banner(code, banner) | |||
self.apply_genfixes(code) | |||
self.logger.info(u"Tagging page: [[{0}]]".format(page.title)) | |||
summary = job.summary.replace("$3", banner) | |||
page.edit(unicode(code), self.make_summary(summary)) | |||
def make_banner(self, job, code): | |||
"""Return banner text to add based on a *job* and a page's *code*.""" | |||
banner = "{{" + job.banner | |||
if job.autoassess: | |||
classes = {"fa": 0, "fl": 0, "ga": 0, "a": 0, "b": 0, "start": 0, | |||
"stub": 0, "list": 0, "dab": 0, "c": 0, "redirect": 0, | |||
"book": 0, "template": 0, "category": 0} | |||
for template in code.ifilter_templates(recursive=True): | |||
if template.has_param("class"): | |||
value = unicode(template.get("class").value).lower() | |||
if value in classes: | |||
classes[value] += 1 | |||
values = tuple(classes.values()) | |||
best = max(values) | |||
confidence = float(best) / sum(values) | |||
if confidence > 0.75: | |||
rank = tuple(classes.keys())[values.index(best)] | |||
if rank in ("fa", "fl", "ga"): | |||
banner += "|class=" + rank.upper() | |||
else: | |||
banner += "|class=" + self._upperfirst(rank) | |||
return banner + job.append + "}}" | |||
def get_banner_shell(self, code): | |||
"""Return the banner shell template within *code*, else ``None``.""" | |||
regex = r"^\{\{\s*((WikiProject|WP)[ _]?Banner[ _]?S(hell)?|W(BPS|PBS|PB)|Shell)" | |||
shells = code.filter_templates(matches=regex) | |||
if not shells: | |||
shells = code.filter_templates(matches=regex, recursive=True) | |||
if shells: | |||
log = u"Inserting banner into shell: {0}" | |||
self.logger.debug(log.format(shells[0].name)) | |||
return shells[0] | |||
def add_banner(self, code, banner): | |||
"""Add *banner* to *code*, following template order conventions.""" | |||
index = 0 | |||
for i, template in enumerate(code.ifilter_templates()): | |||
name = template.name.lower().replace("_", " ") | |||
for regex in self.TOP_TEMPS: | |||
if re.match(regex, name): | |||
self.logger.info("Skipping top template: {0}".format(name)) | |||
index = i + 1 | |||
self.logger.debug(u"Inserting banner at index {0}".format(index)) | |||
code.insert(index, banner) | |||
def apply_genfixes(self, code): | |||
"""Apply general fixes to *code*, such as template substitution.""" | |||
regex = r"^\{\{\s*((un|no)?s(i((gn|ng)(ed3?)?|g))?|usu|tilde|forgot to sign|without signature)" | |||
for template in code.ifilter_templates(matches=regex): | |||
self.logger.debug("Applying genfix: substitute {{unsigned}}") | |||
template.name = "subst:unsigned" | |||
class _Job(object): | |||
"""Represents a single wikiproject-tagging task. | |||
Stores information on the banner to add, the edit summary to use, whether | |||
or not to autoassess and create new pages from scratch, and a counter of | |||
the number of pages edited. | |||
""" | |||
def __init__(self, banner, names, summary, append, autoassess, nocreate): | |||
self.banner = banner | |||
self.names = names | |||
self.summary = summary | |||
self.append = append | |||
self.autoassess = autoassess | |||
self.nocreate = nocreate | |||
self.counter = 0 | |||
class _ShutoffEnabled(Exception): | |||
"""Raised by process_page() if shutoff is enabled. Caught by run(), which | |||
will then stop the task.""" | |||
pass |
@@ -0,0 +1,157 @@ | |||
#! /usr/bin/env python | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
""" | |||
usage: :command:`earwigbot [-h] [-v] [-d | -q] [-t NAME] [PATH] ...` | |||
This is EarwigBot's command-line utility, enabling you to easily start the bot | |||
or run specific tasks. | |||
.. glossary:: | |||
``PATH`` | |||
path to the bot's working directory, which will be created if it doesn't | |||
exist; current directory assumed if not specified | |||
``-h``, ``--help`` | |||
show this help message and exit | |||
``-v``, ``--version`` | |||
show program's version number and exit | |||
``-d``, ``--debug`` | |||
print all logs, including ``DEBUG``-level messages | |||
``-q``, ``--quiet`` | |||
don't print any logs except warnings and errors | |||
``-t NAME``, ``--task NAME`` | |||
given the name of a task, the bot will run it instead of the main bot and | |||
then exit | |||
``TASK_ARGS`` | |||
with --task, will pass any remaining arguments to the task's | |||
:py:meth:`.Task.run` method | |||
""" | |||
from argparse import Action, ArgumentParser, REMAINDER | |||
import logging | |||
from os import path | |||
from time import sleep | |||
from earwigbot import __version__ | |||
from earwigbot.bot import Bot | |||
__all__ = ["main"] | |||
class _StoreTaskArg(Action): | |||
"""A custom argparse action to read remaining command-line arguments.""" | |||
def __call__(self, parser, namespace, values, option_string=None): | |||
kwargs = {} | |||
name = None | |||
for value in values: | |||
if value.startswith("-") and "=" in value: | |||
key, value = value.split("=", 1) | |||
self.insert(kwargs, key.lstrip("-"), value) | |||
elif name: | |||
if value.startswith("-"): | |||
if name not in kwargs: | |||
kwargs[name] = True | |||
name = value.lstrip("-") | |||
else: | |||
self.insert(kwargs, name, value) | |||
name = None | |||
else: | |||
if value.startswith("-"): | |||
name = value.lstrip("-") | |||
if name and name not in kwargs: | |||
kwargs[name] = True | |||
namespace.task_args = kwargs | |||
def insert(self, kwargs, key, value): | |||
"""Add a key/value pair to kwargs; support multiple values per key.""" | |||
if key in kwargs: | |||
try: | |||
kwargs[key].append(value) | |||
except AttributeError: | |||
kwargs[key] = [kwargs[key], value] | |||
else: | |||
kwargs[key] = value | |||
def main(): | |||
"""Main entry point for the command-line utility.""" | |||
version = "EarwigBot v{0}".format(__version__) | |||
desc = """This is EarwigBot's command-line utility, enabling you to easily | |||
start the bot or run specific tasks.""" | |||
parser = ArgumentParser(description=desc) | |||
parser.add_argument("path", nargs="?", metavar="PATH", default=path.curdir, | |||
help="""path to the bot's working directory, which will | |||
be created if it doesn't exist; current | |||
directory assumed if not specified""") | |||
parser.add_argument("-v", "--version", action="version", version=version) | |||
logger = parser.add_mutually_exclusive_group() | |||
logger.add_argument("-d", "--debug", action="store_true", | |||
help="print all logs, including DEBUG-level messages") | |||
logger.add_argument("-q", "--quiet", action="store_true", | |||
help="don't print any logs except warnings and errors") | |||
parser.add_argument("-t", "--task", metavar="NAME", | |||
help="""given the name of a task, the bot will run it | |||
instead of the main bot and then exit""") | |||
parser.add_argument("task_args", nargs=REMAINDER, action=_StoreTaskArg, | |||
metavar="TASK_ARGS", | |||
help="""with --task, will pass these arguments to the | |||
task's run() method""") | |||
args = parser.parse_args() | |||
if not args.task and args.task_args: | |||
unrecognized = " ".join(args.task_args) | |||
parser.error("unrecognized arguments: {0}".format(unrecognized)) | |||
level = logging.INFO | |||
if args.debug: | |||
level = logging.DEBUG | |||
elif args.quiet: | |||
level = logging.WARNING | |||
print version | |||
bot = Bot(path.abspath(args.path), level=level) | |||
if args.task: | |||
thread = bot.tasks.start(args.task, **args.task_args) | |||
if not thread: | |||
return | |||
try: | |||
while thread.is_alive(): # Keep it alive; it's a daemon | |||
sleep(1) | |||
except KeyboardInterrupt: | |||
pass | |||
finally: | |||
if thread.is_alive(): | |||
bot.tasks.logger.warn("The task is will be killed") | |||
else: | |||
try: | |||
bot.run() | |||
except KeyboardInterrupt: | |||
pass | |||
finally: | |||
if bot.is_running: | |||
bot.stop() | |||
if __name__ == "__main__": | |||
main() |
@@ -0,0 +1,51 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
""" | |||
**EarwigBot: Wiki Toolset** | |||
This is a collection of classes and functions to read from and write to | |||
Wikipedia and other wiki sites. No connection whatsoever to `python-wikitools | |||
<http://code.google.com/p/python-wikitools/>`_ written by `Mr.Z-man | |||
<http://en.wikipedia.org/wiki/User:Mr.Z-man>`_, other than a similar purpose. | |||
We share no code. | |||
Import the toolset directly with ``from earwigbot import wiki``. If using the | |||
built-in integration with the rest of the bot, :py:class:`~earwigbot.bot.Bot` | |||
objects contain a :py:attr:`~earwigbot.bot.Bot.wiki` attribute, which is a | |||
:py:class:`~earwigbot.wiki.sitesdb.SitesDB` object tied to the :file:`sites.db` | |||
file located in the same directory as :file:`config.yml`. That object has the | |||
principal methods :py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site`, | |||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site`, and | |||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.remove_site` that should handle all | |||
of your :py:class:`~earwigbot.wiki.site.Site` (and thus, | |||
:py:class:`~earwigbot.wiki.page.Page`, | |||
:py:class:`~earwigbot.wiki.category.Category`, and | |||
:py:class:`~earwigbot.wiki.user.User`) needs. | |||
""" | |||
from earwigbot.wiki.category import * | |||
from earwigbot.wiki.constants import * | |||
from earwigbot.wiki.page import * | |||
from earwigbot.wiki.site import * | |||
from earwigbot.wiki.sitesdb import * | |||
from earwigbot.wiki.user import * |
@@ -0,0 +1,205 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from earwigbot.wiki.page import Page | |||
__all__ = ["Category"] | |||
class Category(Page): | |||
""" | |||
**EarwigBot: Wiki Toolset: Category** | |||
Represents a category on a given :py:class:`~earwigbot.wiki.site.Site`, a | |||
subclass of :py:class:`~earwigbot.wiki.page.Page`. Provides additional | |||
methods, but :py:class:`~earwigbot.wiki.page.Page`'s own methods should | |||
work fine on :py:class:`Category` objects. :py:meth:`site.get_page() | |||
<earwigbot.wiki.site.Site.get_page>` will return a :py:class:`Category` | |||
instead of a :py:class:`~earwigbot.wiki.page.Page` if the given title is in | |||
the category namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category` | |||
is shorthand, accepting category names without the namespace prefix. | |||
*Attributes:* | |||
- :py:attr:`size`: the total number of members in the category | |||
- :py:attr:`pages`: the number of pages in the category | |||
- :py:attr:`files`: the number of files in the category | |||
- :py:attr:`subcats`: the number of subcategories in the category | |||
*Public methods:* | |||
- :py:meth:`get_members`: iterates over Pages in the category | |||
""" | |||
def __repr__(self): | |||
"""Return the canonical string representation of the Category.""" | |||
res = "Category(title={0!r}, follow_redirects={1!r}, site={2!r})" | |||
return res.format(self._title, self._follow_redirects, self._site) | |||
def __str__(self): | |||
"""Return a nice string representation of the Category.""" | |||
return '<Category "{0}" of {1}>'.format(self.title, str(self.site)) | |||
def _get_members_via_api(self, limit, follow): | |||
"""Iterate over Pages in the category using the API.""" | |||
params = {"action": "query", "list": "categorymembers", | |||
"cmtitle": self.title} | |||
while 1: | |||
params["cmlimit"] = limit if limit else "max" | |||
result = self.site.api_query(**params) | |||
for member in result["query"]["categorymembers"]: | |||
title = member["title"] | |||
yield self.site.get_page(title, follow_redirects=follow) | |||
if "query-continue" in result: | |||
qcontinue = result["query-continue"]["categorymembers"] | |||
params["cmcontinue"] = qcontinue["cmcontinue"] | |||
if limit: | |||
limit -= len(result["query"]["categorymembers"]) | |||
else: | |||
break | |||
def _get_members_via_sql(self, limit, follow): | |||
"""Iterate over Pages in the category using SQL.""" | |||
query = """SELECT page_title, page_namespace, page_id FROM page | |||
JOIN categorylinks ON page_id = cl_from | |||
WHERE cl_to = ?""" | |||
title = self.title.replace(" ", "_").split(":", 1)[1] | |||
if limit: | |||
query += " LIMIT ?" | |||
result = self.site.sql_query(query, (title, limit)) | |||
else: | |||
result = self.site.sql_query(query, (title,)) | |||
members = list(result) | |||
for row in members: | |||
base = row[0].replace("_", " ").decode("utf8") | |||
namespace = self.site.namespace_id_to_name(row[1]) | |||
if namespace: | |||
title = u":".join((namespace, base)) | |||
else: # Avoid doing a silly (albeit valid) ":Pagename" thing | |||
title = base | |||
yield self.site.get_page(title, follow_redirects=follow, | |||
pageid=row[2]) | |||
def _get_size_via_api(self, member_type): | |||
"""Return the size of the category using the API.""" | |||
result = self.site.api_query(action="query", prop="categoryinfo", | |||
titles=self.title) | |||
info = result["query"]["pages"].values()[0]["categoryinfo"] | |||
return info[member_type] | |||
def _get_size_via_sql(self, member_type): | |||
"""Return the size of the category using SQL.""" | |||
query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?" | |||
title = self.title.replace(" ", "_").split(":", 1)[1] | |||
if member_type == "size": | |||
result = self.site.sql_query(query, (title,)) | |||
else: | |||
query += " AND cl_type = ?" | |||
result = self.site.sql_query(query, (title, member_type[:-1])) | |||
return list(result)[0][0] | |||
def _get_size(self, member_type): | |||
"""Return the size of the category.""" | |||
services = { | |||
self.site.SERVICE_API: self._get_size_via_api, | |||
self.site.SERVICE_SQL: self._get_size_via_sql | |||
} | |||
return self.site.delegate(services, (member_type,)) | |||
@property | |||
def size(self): | |||
"""The total number of members in the category. | |||
Includes pages, files, and subcats. Equal to :py:attr:`pages` + | |||
:py:attr:`files` + :py:attr:`subcats`. This will use either the API or | |||
SQL depending on which are enabled and the amount of lag on each. This | |||
is handled by :py:meth:`site.delegate() | |||
<earwigbot.wiki.site.Site.delegate>`. | |||
""" | |||
return self._get_size("size") | |||
@property | |||
def pages(self): | |||
"""The number of pages in the category. | |||
This will use either the API or SQL depending on which are enabled and | |||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||
<earwigbot.wiki.site.Site.delegate>`. | |||
""" | |||
return self._get_size("pages") | |||
@property | |||
def files(self): | |||
"""The number of files in the category. | |||
This will use either the API or SQL depending on which are enabled and | |||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||
<earwigbot.wiki.site.Site.delegate>`. | |||
""" | |||
return self._get_size("files") | |||
@property | |||
def subcats(self): | |||
"""The number of subcategories in the category. | |||
This will use either the API or SQL depending on which are enabled and | |||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||
<earwigbot.wiki.site.Site.delegate>`. | |||
""" | |||
return self._get_size("subcats") | |||
def get_members(self, limit=None, follow_redirects=None): | |||
"""Iterate over Pages in the category. | |||
If *limit* is given, we will provide this many pages, or less if the | |||
category is smaller. By default, *limit* is ``None``, meaning we will | |||
keep iterating over members until the category is exhausted. | |||
*follow_redirects* is passed directly to :py:meth:`site.get_page() | |||
<earwigbot.wiki.site.Site.get_page>`; it defaults to ``None``, which | |||
will use the value passed to our :py:meth:`__init__`. | |||
This will use either the API or SQL depending on which are enabled and | |||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||
<earwigbot.wiki.site.Site.delegate>`. | |||
.. note:: | |||
Be careful when iterating over very large categories with no limit. | |||
If using the API, at best, you will make one query per 5000 pages, | |||
which can add up significantly for categories with hundreds of | |||
thousands of members. As for SQL, note that *all page titles are | |||
stored internally* as soon as the query is made, so the site-wide | |||
SQL lock can be freed and unrelated queries can be made without | |||
requiring a separate connection to be opened. This is generally not | |||
an issue unless your category's size approaches several hundred | |||
thousand, in which case the sheer number of titles in memory becomes | |||
problematic. | |||
""" | |||
services = { | |||
self.site.SERVICE_API: self._get_members_via_api, | |||
self.site.SERVICE_SQL: self._get_members_via_sql | |||
} | |||
if follow_redirects is None: | |||
follow_redirects = self._follow_redirects | |||
return self.site.delegate(services, (limit, follow_redirects)) |
@@ -0,0 +1,61 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
""" | |||
**EarwigBot: Wiki Toolset: Constants** | |||
This module defines some useful constants: | |||
- :py:const:`USER_AGENT`: our default User Agent when making API queries | |||
- :py:const:`NS_*`: default namespace IDs for easy lookup | |||
Import directly with ``from earwigbot.wiki import constants`` or | |||
``from earwigbot.wiki.constants import *``. These are also available from | |||
:py:mod:`earwigbot.wiki` directly (e.g. ``earwigbot.wiki.USER_AGENT``). | |||
""" | |||
# Default User Agent when making API queries: | |||
from earwigbot import __version__ as _v | |||
from platform import python_version as _p | |||
USER_AGENT = "EarwigBot/{0} (Python/{1}; https://github.com/earwig/earwigbot)" | |||
USER_AGENT = USER_AGENT.format(_v, _p()) | |||
del _v, _p | |||
# Default namespace IDs: | |||
NS_MAIN = 0 | |||
NS_TALK = 1 | |||
NS_USER = 2 | |||
NS_USER_TALK = 3 | |||
NS_PROJECT = 4 | |||
NS_PROJECT_TALK = 5 | |||
NS_FILE = 6 | |||
NS_FILE_TALK = 7 | |||
NS_MEDIAWIKI = 8 | |||
NS_MEDIAWIKI_TALK = 9 | |||
NS_TEMPLATE = 10 | |||
NS_TEMPLATE_TALK = 11 | |||
NS_HELP = 12 | |||
NS_HELP_TALK = 13 | |||
NS_CATEGORY = 14 | |||
NS_CATEGORY_TALK = 15 | |||
NS_SPECIAL = -1 | |||
NS_MEDIA = -2 |
@@ -0,0 +1,229 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from gzip import GzipFile | |||
from socket import timeout | |||
from StringIO import StringIO | |||
from time import sleep, time | |||
from urllib2 import build_opener, URLError | |||
import oauth2 as oauth | |||
from earwigbot import exceptions | |||
from earwigbot.wiki.copyvios.markov import MarkovChain, MarkovChainIntersection | |||
from earwigbot.wiki.copyvios.parsers import ArticleTextParser, HTMLTextParser | |||
from earwigbot.wiki.copyvios.result import CopyvioCheckResult | |||
from earwigbot.wiki.copyvios.search import YahooBOSSSearchEngine | |||
__all__ = ["CopyvioMixIn"] | |||
class CopyvioMixIn(object): | |||
""" | |||
**EarwigBot: Wiki Toolset: Copyright Violation MixIn** | |||
This is a mixin that provides two public methods, :py:meth:`copyvio_check` | |||
and :py:meth:`copyvio_compare`. The former checks the page for copyright | |||
violations using a search engine API, and the latter compares the page | |||
against a given URL. Credentials for the search engine API are stored in | |||
the :py:class:`~earwigbot.wiki.site.Site`'s config. | |||
""" | |||
def __init__(self, site): | |||
self._search_config = site._search_config | |||
self._exclusions_db = self._search_config.get("exclusions_db") | |||
self._opener = build_opener() | |||
self._opener.addheaders = site._opener.addheaders | |||
def _open_url_ignoring_errors(self, url): | |||
"""Open a URL using self._opener and return its content, or None. | |||
Will decompress the content if the headers contain "gzip" as its | |||
content encoding, and will return None if URLError is raised while | |||
opening the URL. IOErrors while gunzipping a compressed response are | |||
ignored, and the original content is returned. | |||
""" | |||
try: | |||
response = self._opener.open(url.encode("utf8"), timeout=5) | |||
except (URLError, timeout): | |||
return None | |||
result = response.read() | |||
if response.headers.get("Content-Encoding") == "gzip": | |||
stream = StringIO(result) | |||
gzipper = GzipFile(fileobj=stream) | |||
try: | |||
result = gzipper.read() | |||
except IOError: | |||
pass | |||
return result | |||
def _select_search_engine(self): | |||
"""Return a function that can be called to do web searches. | |||
The function takes one argument, a search query, and returns a list of | |||
URLs, ranked by importance. The underlying logic depends on the | |||
*engine* argument within our config; for example, if *engine* is | |||
"Yahoo! BOSS", we'll use YahooBOSSSearchEngine for querying. | |||
Raises UnknownSearchEngineError if the 'engine' listed in our config is | |||
unknown to us, and UnsupportedSearchEngineError if we are missing a | |||
required package or module, like oauth2 for "Yahoo! BOSS". | |||
""" | |||
engine = self._search_config["engine"] | |||
credentials = self._search_config["credentials"] | |||
if engine == "Yahoo! BOSS": | |||
if not oauth: | |||
e = "The package 'oauth2' could not be imported" | |||
raise exceptions.UnsupportedSearchEngineError(e) | |||
return YahooBOSSSearchEngine(credentials) | |||
raise exceptions.UnknownSearchEngineError(engine) | |||
def _copyvio_compare_content(self, article, url): | |||
"""Return a number comparing an article and a URL. | |||
The *article* is a Markov chain, whereas the *url* is just a string | |||
that we'll try to open and read ourselves. | |||
""" | |||
html = self._open_url_ignoring_errors(url) | |||
if not html: | |||
return 0 | |||
source = MarkovChain(HTMLTextParser(html).strip()) | |||
delta = MarkovChainIntersection(article, source) | |||
return float(delta.size()) / article.size(), (source, delta) | |||
def copyvio_check(self, min_confidence=0.5, max_queries=-1, | |||
interquery_sleep=1): | |||
"""Check the page for copyright violations. | |||
Returns a | |||
:py:class:`~earwigbot.wiki.copyvios.result.CopyvioCheckResult` object | |||
with information on the results of the check. | |||
*max_queries* is self-explanatory; we will never make more than this | |||
number of queries in a given check. If it's lower than 0, we will not | |||
limit the number of queries. | |||
*interquery_sleep* is the minimum amount of time we will sleep between | |||
search engine queries, in seconds. | |||
Raises :py:exc:`~earwigbot.exceptions.CopyvioCheckError` or subclasses | |||
(:py:exc:`~earwigbot.exceptions.UnknownSearchEngineError`, | |||
:py:exc:`~earwigbot.exceptions.SearchQueryError`, ...) on errors. | |||
""" | |||
searcher = self._select_search_engine() | |||
if self._exclusions_db: | |||
self._exclusions_db.sync(self.site.name) | |||
handled_urls = [] | |||
best_confidence = 0 | |||
best_match = None | |||
num_queries = 0 | |||
empty = MarkovChain("") | |||
best_chains = (empty, MarkovChainIntersection(empty, empty)) | |||
parser = ArticleTextParser(self.get()) | |||
clean = parser.strip() | |||
chunks = parser.chunk(self._search_config["nltk_dir"], max_queries) | |||
article_chain = MarkovChain(clean) | |||
last_query = time() | |||
if article_chain.size() < 20: # Auto-fail very small articles | |||
return CopyvioCheckResult(False, best_confidence, best_match, | |||
num_queries, article_chain, best_chains) | |||
while (chunks and best_confidence < min_confidence and | |||
(max_queries < 0 or num_queries < max_queries)): | |||
chunk = chunks.pop(0) | |||
log = u"[[{0}]] -> querying {1} for {2!r}" | |||
self._logger.debug(log.format(self.title, searcher.name, chunk)) | |||
urls = searcher.search(chunk) | |||
urls = [url for url in urls if url not in handled_urls] | |||
for url in urls: | |||
handled_urls.append(url) | |||
if self._exclusions_db: | |||
if self._exclusions_db.check(self.site.name, url): | |||
continue | |||
conf, chains = self._copyvio_compare_content(article_chain, url) | |||
if conf > best_confidence: | |||
best_confidence = conf | |||
best_match = url | |||
best_chains = chains | |||
num_queries += 1 | |||
diff = time() - last_query | |||
if diff < interquery_sleep: | |||
sleep(interquery_sleep - diff) | |||
last_query = time() | |||
if best_confidence >= min_confidence: | |||
is_violation = True | |||
log = u"Violation detected for [[{0}]] (confidence: {1}; URL: {2}; using {3} queries)" | |||
self._logger.debug(log.format(self.title, best_confidence, | |||
best_match, num_queries)) | |||
else: | |||
is_violation = False | |||
log = u"No violation for [[{0}]] (confidence: {1}; using {2} queries)" | |||
self._logger.debug(log.format(self.title, best_confidence, | |||
num_queries)) | |||
return CopyvioCheckResult(is_violation, best_confidence, best_match, | |||
num_queries, article_chain, best_chains) | |||
def copyvio_compare(self, url, min_confidence=0.5): | |||
"""Check the page like :py:meth:`copyvio_check` against a specific URL. | |||
This is essentially a reduced version of the above - a copyivo | |||
comparison is made using Markov chains and the result is returned in a | |||
:py:class:`~earwigbot.wiki.copyvios.result.CopyvioCheckResult` object - | |||
but without using a search engine, since the suspected "violated" URL | |||
is supplied from the start. | |||
Its primary use is to generate a result when the URL is retrieved from | |||
a cache, like the one used in EarwigBot's Toolserver site. After a | |||
search is done, the resulting URL is stored in a cache for 24 hours so | |||
future checks against that page will not require another set of | |||
time-and-money-consuming search engine queries. However, the comparison | |||
itself (which includes the article's and the source's content) cannot | |||
be stored for data retention reasons, so a fresh comparison is made | |||
using this function. | |||
Since no searching is done, neither | |||
:py:exc:`~earwigbot.exceptions.UnknownSearchEngineError` nor | |||
:py:exc:`~earwigbot.exceptions.SearchQueryError` will be raised. | |||
""" | |||
content = self.get() | |||
clean = ArticleTextParser(content).strip() | |||
article_chain = MarkovChain(clean) | |||
confidence, chains = self._copyvio_compare_content(article_chain, url) | |||
if confidence >= min_confidence: | |||
is_violation = True | |||
log = u"Violation detected for [[{0}]] (confidence: {1}; URL: {2})" | |||
self._logger.debug(log.format(self.title, confidence, url)) | |||
else: | |||
is_violation = False | |||
log = u"No violation for [[{0}]] (confidence: {1}; URL: {2})" | |||
self._logger.debug(log.format(self.title, confidence, url)) | |||
return CopyvioCheckResult(is_violation, confidence, url, 0, | |||
article_chain, chains) |
@@ -0,0 +1,171 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import re | |||
import sqlite3 as sqlite | |||
from threading import Lock | |||
from time import time | |||
from urlparse import urlparse | |||
from earwigbot import exceptions | |||
__all__ = ["ExclusionsDB"] | |||
default_sources = { | |||
"enwiki": [ | |||
"Wikipedia:Mirrors and forks/Abc", "Wikipedia:Mirrors and forks/Def", | |||
"Wikipedia:Mirrors and forks/Ghi", "Wikipedia:Mirrors and forks/Jkl", | |||
"Wikipedia:Mirrors and forks/Mno", "Wikipedia:Mirrors and forks/Pqr", | |||
"Wikipedia:Mirrors and forks/Stu", "Wikipedia:Mirrors and forks/Vwxyz", | |||
"User:EarwigBot/Copyvios/Exclusions" | |||
] | |||
} | |||
class ExclusionsDB(object): | |||
""" | |||
**EarwigBot: Wiki Toolset: Exclusions Database Manager** | |||
Controls the :file:`exclusions.db` file, which stores URLs excluded from | |||
copyright violation checks on account of being known mirrors, for example. | |||
""" | |||
def __init__(self, sitesdb, dbfile, logger): | |||
self._sitesdb = sitesdb | |||
self._dbfile = dbfile | |||
self._logger = logger | |||
self._db_access_lock = Lock() | |||
def __repr__(self): | |||
"""Return the canonical string representation of the ExclusionsDB.""" | |||
res = "ExclusionsDB(sitesdb={0!r}, dbfile={1!r}, logger={2!r})" | |||
return res.format(self._sitesdb, self._dbfile, self._logger) | |||
def __str__(self): | |||
"""Return a nice string representation of the ExclusionsDB.""" | |||
return "<ExclusionsDB at {0}>".format(self._dbfile) | |||
def _create(self): | |||
"""Initialize the exclusions database with its necessary tables.""" | |||
script = """ | |||
CREATE TABLE sources (source_sitename, source_page); | |||
CREATE TABLE updates (update_sitename, update_time); | |||
CREATE TABLE exclusions (exclusion_sitename, exclusion_url); | |||
""" | |||
query = "INSERT INTO sources VALUES (?, ?);" | |||
sources = [] | |||
for sitename, pages in default_sources.iteritems(): | |||
[sources.append((sitename, page)) for page in pages] | |||
with sqlite.connect(self._dbfile) as conn: | |||
conn.executescript(script) | |||
conn.executemany(query, sources) | |||
def _load_source(self, site, source): | |||
"""Load from a specific source and return a set of URLs.""" | |||
urls = set() | |||
try: | |||
data = site.get_page(source).get() | |||
except exceptions.PageNotFoundError: | |||
return urls | |||
regexes = [ | |||
"url\s*=\s*<nowiki>(?:https?:)?(?://)?(.*)</nowiki>", | |||
"\*\s*Site:\s*\[?(?:https?:)?(?://)?(.*)\]?" | |||
] | |||
for regex in regexes: | |||
[urls.add(url.lower()) for (url,) in re.findall(regex, data, re.I)] | |||
return urls | |||
def _update(self, sitename): | |||
"""Update the database from listed sources in the index.""" | |||
query1 = "SELECT source_page FROM sources WHERE source_sitename = ?;" | |||
query2 = "SELECT exclusion_url FROM exclusions WHERE exclusion_sitename = ?" | |||
query3 = "DELETE FROM exclusions WHERE exclusion_sitename = ? AND exclusion_url = ?" | |||
query4 = "INSERT INTO exclusions VALUES (?, ?);" | |||
query5 = "SELECT 1 FROM updates WHERE update_sitename = ?;" | |||
query6 = "UPDATE updates SET update_time = ? WHERE update_sitename = ?;" | |||
query7 = "INSERT INTO updates VALUES (?, ?);" | |||
site = self._sitesdb.get_site(sitename) | |||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||
urls = set() | |||
for (source,) in conn.execute(query1, (sitename,)): | |||
urls |= self._load_source(site, source) | |||
for (url,) in conn.execute(query2, (sitename,)): | |||
if url in urls: | |||
urls.remove(url) | |||
else: | |||
conn.execute(query3, (sitename, url)) | |||
conn.executemany(query4, [(sitename, url) for url in urls]) | |||
if conn.execute(query5, (sitename,)).fetchone(): | |||
conn.execute(query6, (int(time()), sitename)) | |||
else: | |||
conn.execute(query7, (sitename, int(time()))) | |||
def _get_last_update(self, sitename): | |||
"""Return the UNIX timestamp of the last time the db was updated.""" | |||
query = "SELECT update_time FROM updates WHERE update_sitename = ?;" | |||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||
try: | |||
result = conn.execute(query, (sitename,)).fetchone() | |||
except sqlite.OperationalError: | |||
self._create() | |||
return 0 | |||
return result[0] if result else 0 | |||
def sync(self, sitename): | |||
"""Update the database if it hasn't been updated in the past week. | |||
This only updates the exclusions database for the *sitename* site. | |||
""" | |||
max_staleness = 60 * 60 * 24 * 7 | |||
time_since_update = int(time() - self._get_last_update(sitename)) | |||
if time_since_update > max_staleness: | |||
log = u"Updating stale database: {0} (last updated {1} seconds ago)" | |||
self._logger.info(log.format(sitename, time_since_update)) | |||
self._update(sitename) | |||
else: | |||
log = u"Database for {0} is still fresh (last updated {1} seconds ago)" | |||
self._logger.debug(log.format(sitename, time_since_update)) | |||
def check(self, sitename, url): | |||
"""Check whether a given URL is in the exclusions database. | |||
Return ``True`` if the URL is in the database, or ``False`` otherwise. | |||
""" | |||
normalized = re.sub("https?://", "", url.lower()) | |||
query = "SELECT exclusion_url FROM exclusions WHERE exclusion_sitename = ?" | |||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||
for (excl,) in conn.execute(query, (sitename,)): | |||
if excl.startswith("*."): | |||
netloc = urlparse(url.lower()).netloc | |||
matches = True if excl[2:] in netloc else False | |||
else: | |||
matches = True if normalized.startswith(excl) else False | |||
if matches: | |||
log = u"Exclusion detected in {0} for {1}" | |||
self._logger.debug(log.format(sitename, url)) | |||
return True | |||
log = u"No exclusions in {0} for {1}".format(sitename, url) | |||
self._logger.debug(log) | |||
return False |
@@ -0,0 +1,87 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from collections import defaultdict | |||
from re import sub, UNICODE | |||
__all__ = ["MarkovChain", "MarkovChainIntersection"] | |||
class MarkovChain(object): | |||
"""Implements a basic ngram Markov chain of words.""" | |||
START = -1 | |||
END = -2 | |||
degree = 3 # 2 for bigrams, 3 for trigrams, etc. | |||
def __init__(self, text): | |||
self.text = text | |||
self.chain = defaultdict(lambda: defaultdict(lambda: 0)) | |||
words = sub("[^\w\s-]", "", text.lower(), flags=UNICODE).split() | |||
padding = self.degree - 1 | |||
words = ([self.START] * padding) + words + ([self.END] * padding) | |||
for i in range(len(words) - self.degree + 1): | |||
last = i + self.degree - 1 | |||
self.chain[tuple(words[i:last])][words[last]] += 1 | |||
def __repr__(self): | |||
"""Return the canonical string representation of the MarkovChain.""" | |||
return "MarkovChain(text={0!r})".format(self.text) | |||
def __str__(self): | |||
"""Return a nice string representation of the MarkovChain.""" | |||
return "<MarkovChain of size {0}>".format(self.size()) | |||
def size(self): | |||
"""Return the size of the Markov chain: the total number of nodes.""" | |||
count = 0 | |||
for node in self.chain.itervalues(): | |||
for hits in node.itervalues(): | |||
count += hits | |||
return count | |||
class MarkovChainIntersection(MarkovChain): | |||
"""Implements the intersection of two chains (i.e., their shared nodes).""" | |||
def __init__(self, mc1, mc2): | |||
self.chain = defaultdict(lambda: defaultdict(lambda: 0)) | |||
self.mc1, self.mc2 = mc1, mc2 | |||
c1 = mc1.chain | |||
c2 = mc2.chain | |||
for word, nodes1 in c1.iteritems(): | |||
if word in c2: | |||
nodes2 = c2[word] | |||
for node, count1 in nodes1.iteritems(): | |||
if node in nodes2: | |||
count2 = nodes2[node] | |||
self.chain[word][node] = min(count1, count2) | |||
def __repr__(self): | |||
"""Return the canonical string representation of the intersection.""" | |||
res = "MarkovChainIntersection(mc1={0!r}, mc2={1!r})" | |||
return res.format(self.mc1, self.mc2) | |||
def __str__(self): | |||
"""Return a nice string representation of the intersection.""" | |||
res = "<MarkovChainIntersection of size {0} ({1} ^ {2})>" | |||
return res.format(self.size(), self.mc1, self.mc2) |
@@ -0,0 +1,138 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from os import path | |||
import bs4 | |||
import mwparserfromhell | |||
import nltk | |||
__all__ = ["BaseTextParser", "ArticleTextParser", "HTMLTextParser"] | |||
class BaseTextParser(object): | |||
"""Base class for a parser that handles text.""" | |||
def __init__(self, text): | |||
self.text = text | |||
def __repr__(self): | |||
"""Return the canonical string representation of the text parser.""" | |||
return "{0}(text={1!r})".format(self.__class__.__name__, self.text) | |||
def __str__(self): | |||
"""Return a nice string representation of the text parser.""" | |||
name = self.__class__.__name__ | |||
return "<{0} of text with size {1}>".format(name, len(self.text)) | |||
class ArticleTextParser(BaseTextParser): | |||
"""A parser that can strip and chunk wikicode article text.""" | |||
def strip(self): | |||
"""Clean the page's raw text by removing templates and formatting. | |||
Return the page's text with all HTML and wikicode formatting removed, | |||
including templates, tables, and references. It retains punctuation | |||
(spacing, paragraphs, periods, commas, (semi)-colons, parentheses, | |||
quotes), original capitalization, and so forth. HTML entities are | |||
replaced by their unicode equivalents. | |||
The actual stripping is handled by :py:mod:`mwparserfromhell`. | |||
""" | |||
wikicode = mwparserfromhell.parse(self.text) | |||
clean = wikicode.strip_code(normalize=True, collapse=True) | |||
self.clean = clean.replace("\n\n", "\n") # Collapse extra newlines | |||
return self.clean | |||
def chunk(self, nltk_dir, max_chunks, max_query=256): | |||
"""Convert the clean article text into a list of web-searchable chunks. | |||
No greater than *max_chunks* will be returned. Each chunk will only be | |||
a sentence or two long at most (no more than *max_query*). The idea is | |||
to return a sample of the article text rather than the whole, so we'll | |||
pick and choose from parts of it, especially if the article is large | |||
and *max_chunks* is low, so we don't end up just searching for just the | |||
first paragraph. | |||
This is implemented using :py:mod:`nltk` (http://nltk.org/). A base | |||
directory (*nltk_dir*) is required to store nltk's punctuation | |||
database. This is typically located in the bot's working directory. | |||
""" | |||
datafile = path.join(nltk_dir, "tokenizers", "punkt", "english.pickle") | |||
try: | |||
tokenizer = nltk.data.load("file:" + datafile) | |||
except LookupError: | |||
nltk.download("punkt", nltk_dir) | |||
tokenizer = nltk.data.load("file:" + datafile) | |||
sentences = [] | |||
for sentence in tokenizer.tokenize(self.clean): | |||
if len(sentence) > max_query: | |||
words = sentence.split() | |||
while len(" ".join(words)) > max_query: | |||
words.pop() | |||
sentence = " ".join(words) | |||
sentences.append(sentence) | |||
if max_chunks >= len(sentences): | |||
return sentences | |||
chunks = [] | |||
while len(chunks) < max_chunks: | |||
if len(chunks) % 5 == 0: | |||
chunk = sentences.pop(0) # Pop from beginning | |||
elif len(chunks) % 5 == 1: | |||
chunk = sentences.pop() # Pop from end | |||
elif len(chunks) % 5 == 2: | |||
chunk = sentences.pop(len(sentences) / 2) # Pop from Q2 | |||
elif len(chunks) % 5 == 3: | |||
chunk = sentences.pop(len(sentences) / 4) # Pop from Q1 | |||
else: | |||
chunk = sentences.pop(3 * len(sentences) / 4) # Pop from Q3 | |||
chunks.append(chunk) | |||
return chunks | |||
class HTMLTextParser(BaseTextParser): | |||
"""A parser that can extract the text from an HTML document.""" | |||
hidden_tags = [ | |||
"script", "style" | |||
] | |||
def strip(self): | |||
"""Return the actual text contained within an HTML document. | |||
Implemented using :py:mod:`BeautifulSoup <bs4>` | |||
(http://www.crummy.com/software/BeautifulSoup/). | |||
""" | |||
try: | |||
soup = bs4.BeautifulSoup(self.text, "lxml").body | |||
except ValueError: | |||
soup = bs4.BeautifulSoup(self.text).body | |||
is_comment = lambda text: isinstance(text, bs4.element.Comment) | |||
[comment.extract() for comment in soup.find_all(text=is_comment)] | |||
for tag in self.hidden_tags: | |||
[element.extract() for element in soup.find_all(tag)] | |||
return "\n".join(soup.stripped_strings) |
@@ -0,0 +1,60 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
__all__ = ["CopyvioCheckResult"] | |||
class CopyvioCheckResult(object): | |||
""" | |||
**EarwigBot: Wiki Toolset: Copyvio Check Result** | |||
A class holding information about the results of a copyvio check. | |||
*Attributes:* | |||
- :py:attr:`violation`: ``True`` if this is a violation, else ``False`` | |||
- :py:attr:`confidence`: a float between 0 and 1 indicating accuracy | |||
- :py:attr:`url`: the URL of the violated page | |||
- :py:attr:`queries`: the number of queries used to reach a result | |||
- :py:attr:`article_chain`: the MarkovChain of the article text | |||
- :py:attr:`source_chain`: the MarkovChain of the violated page text | |||
- :py:attr:`delta_chain`: the MarkovChainIntersection comparing the two | |||
""" | |||
def __init__(self, violation, confidence, url, queries, article, chains): | |||
self.violation = violation | |||
self.confidence = confidence | |||
self.url = url | |||
self.queries = queries | |||
self.article_chain = article | |||
self.source_chain = chains[0] | |||
self.delta_chain = chains[1] | |||
def __repr__(self): | |||
"""Return the canonical string representation of the result.""" | |||
res = "CopyvioCheckResult(violation={0!r}, confidence={1!r}, url={2!r}, queries={3|r})" | |||
return res.format(self.violation, self.confidence, self.url, | |||
self.queries) | |||
def __str__(self): | |||
"""Return a nice string representation of the result.""" | |||
res = "<CopyvioCheckResult ({0} with {1} conf)>" | |||
return res.format(self.violation, self.confidence) |
@@ -0,0 +1,91 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from json import loads | |||
from urllib import quote_plus, urlencode | |||
import oauth2 as oauth | |||
from earwigbot.exceptions import SearchQueryError | |||
__all__ = ["BaseSearchEngine", "YahooBOSSSearchEngine"] | |||
class BaseSearchEngine(object): | |||
"""Base class for a simple search engine interface.""" | |||
name = "Base" | |||
def __init__(self, cred): | |||
"""Store credentials *cred* for searching later on.""" | |||
self.cred = cred | |||
def __repr__(self): | |||
"""Return the canonical string representation of the search engine.""" | |||
return "{0}()".format(self.__class__.__name__) | |||
def __str__(self): | |||
"""Return a nice string representation of the search engine.""" | |||
return "<{0}>".format(self.__class__.__name__) | |||
def search(self, query): | |||
"""Use this engine to search for *query*. | |||
Not implemented in this base class; overridden in subclasses. | |||
""" | |||
raise NotImplementedError() | |||
class YahooBOSSSearchEngine(BaseSearchEngine): | |||
"""A search engine interface with Yahoo! BOSS.""" | |||
name = "Yahoo! BOSS" | |||
def search(self, query): | |||
"""Do a Yahoo! BOSS web search for *query*. | |||
Returns a list of URLs, no more than fifty, ranked by relevance (as | |||
determined by Yahoo). Raises | |||
:py:exc:`~earwigbot.exceptions.SearchQueryError` on errors. | |||
""" | |||
base_url = "http://yboss.yahooapis.com/ysearch/web" | |||
query = quote_plus(query.join('"', '"')) | |||
params = {"q": query, "type": "html,text", "format": "json"} | |||
url = "{0}?{1}".format(base_url, urlencode(params)) | |||
consumer = oauth.Consumer(key=self.cred["key"], | |||
secret=self.cred["secret"]) | |||
client = oauth.Client(consumer) | |||
headers, body = client.request(url, "GET") | |||
if headers["status"] != "200": | |||
e = "Yahoo! BOSS Error: got response code '{0}':\n{1}'" | |||
raise SearchQueryError(e.format(headers["status"], body)) | |||
try: | |||
res = loads(body) | |||
except ValueError: | |||
e = "Yahoo! BOSS Error: JSON could not be decoded" | |||
raise SearchQueryError(e) | |||
try: | |||
results = res["bossresponse"]["web"]["results"] | |||
except KeyError: | |||
return [] | |||
return [result["url"] for result in results] |
@@ -0,0 +1,787 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from hashlib import md5 | |||
from logging import getLogger, NullHandler | |||
import re | |||
from time import gmtime, strftime | |||
from urllib import quote | |||
import mwparserfromhell | |||
from earwigbot import exceptions | |||
from earwigbot.wiki.copyvios import CopyvioMixIn | |||
__all__ = ["Page"] | |||
class Page(CopyvioMixIn): | |||
""" | |||
**EarwigBot: Wiki Toolset: Page** | |||
Represents a page on a given :py:class:`~earwigbot.wiki.site.Site`. Has | |||
methods for getting information about the page, getting page content, and | |||
so on. :py:class:`~earwigbot.wiki.category.Category` is a subclass of | |||
:py:class:`Page` with additional methods. | |||
*Attributes:* | |||
- :py:attr:`site`: the page's corresponding Site object | |||
- :py:attr:`title`: the page's title, or pagename | |||
- :py:attr:`exists`: whether or not the page exists | |||
- :py:attr:`pageid`: an integer ID representing the page | |||
- :py:attr:`url`: the page's URL | |||
- :py:attr:`namespace`: the page's namespace as an integer | |||
- :py:attr:`protection`: the page's current protection status | |||
- :py:attr:`is_talkpage`: ``True`` if this is a talkpage, else ``False`` | |||
- :py:attr:`is_redirect`: ``True`` if this is a redirect, else ``False`` | |||
*Public methods:* | |||
- :py:meth:`reload`: forcibly reloads the page's attributes | |||
- :py:meth:`toggle_talk`: returns a content page's talk page, or vice versa | |||
- :py:meth:`get`: returns the page's content | |||
- :py:meth:`get_redirect_target`: returns the page's destination if it is a | |||
redirect | |||
- :py:meth:`get_creator`: returns a User object representing the first | |||
person to edit the page | |||
- :py:meth:`parse`: parses the page content for templates, links, etc | |||
- :py:meth:`edit`: replaces the page's content or creates a new page | |||
- :py:meth:`add_section`: adds a new section at the bottom of the page | |||
- :py:meth:`check_exclusion`: checks whether or not we are allowed to edit | |||
the page, per ``{{bots}}``/``{{nobots}}`` | |||
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_check`: | |||
checks the page for copyright violations | |||
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_compare`: | |||
checks the page like :py:meth:`copyvio_check`, but against a specific URL | |||
""" | |||
PAGE_UNKNOWN = 0 | |||
PAGE_INVALID = 1 | |||
PAGE_MISSING = 2 | |||
PAGE_EXISTS = 3 | |||
def __init__(self, site, title, follow_redirects=False, pageid=None, | |||
logger=None): | |||
"""Constructor for new Page instances. | |||
Takes four arguments: a Site object, the Page's title (or pagename), | |||
whether or not to follow redirects (optional, defaults to False), and | |||
a page ID to supplement the title (optional, defaults to None - i.e., | |||
we will have to query the API to get it). | |||
As with User, site.get_page() is preferred. | |||
__init__() will not do any API queries, but it will use basic namespace | |||
logic to determine our namespace ID and if we are a talkpage. | |||
""" | |||
super(Page, self).__init__(site) | |||
self._site = site | |||
self._title = title.strip() | |||
self._follow_redirects = self._keep_following = follow_redirects | |||
self._pageid = pageid | |||
# Set up our internal logger: | |||
if logger: | |||
self._logger = logger | |||
else: # Just set up a null logger to eat up our messages: | |||
self._logger = getLogger("earwigbot.wiki") | |||
self._logger.addHandler(NullHandler()) | |||
# Attributes to be loaded through the API: | |||
self._exists = self.PAGE_UNKNOWN | |||
self._is_redirect = None | |||
self._lastrevid = None | |||
self._protection = None | |||
self._fullurl = None | |||
self._content = None | |||
self._creator = None | |||
# Attributes used for editing/deleting/protecting/etc: | |||
self._token = None | |||
self._basetimestamp = None | |||
self._starttimestamp = None | |||
# Try to determine the page's namespace using our site's namespace | |||
# converter: | |||
prefix = self._title.split(":", 1)[0] | |||
if prefix != title: # ignore a page that's titled "Category" or "User" | |||
try: | |||
self._namespace = self.site.namespace_name_to_id(prefix) | |||
except exceptions.NamespaceNotFoundError: | |||
self._namespace = 0 | |||
else: | |||
self._namespace = 0 | |||
# Is this a talkpage? Talkpages have odd IDs, while content pages have | |||
# even IDs, excluding the "special" namespaces: | |||
if self._namespace < 0: | |||
self._is_talkpage = False | |||
else: | |||
self._is_talkpage = self._namespace % 2 == 1 | |||
def __repr__(self): | |||
"""Return the canonical string representation of the Page.""" | |||
res = "Page(title={0!r}, follow_redirects={1!r}, site={2!r})" | |||
return res.format(self._title, self._follow_redirects, self._site) | |||
def __str__(self): | |||
"""Return a nice string representation of the Page.""" | |||
return '<Page "{0}" of {1}>'.format(self.title, str(self.site)) | |||
def _assert_validity(self): | |||
"""Used to ensure that our page's title is valid. | |||
If this method is called when our page is not valid (and after | |||
_load_attributes() has been called), InvalidPageError will be raised. | |||
Note that validity != existence. If a page's title is invalid (e.g, it | |||
contains "[") it will always be invalid, and cannot be edited. | |||
""" | |||
if self._exists == self.PAGE_INVALID: | |||
e = u"Page '{0}' is invalid.".format(self._title) | |||
raise exceptions.InvalidPageError(e) | |||
def _assert_existence(self): | |||
"""Used to ensure that our page exists. | |||
If this method is called when our page doesn't exist (and after | |||
_load_attributes() has been called), PageNotFoundError will be raised. | |||
It will also call _assert_validity() beforehand. | |||
""" | |||
self._assert_validity() | |||
if self._exists == self.PAGE_MISSING: | |||
e = u"Page '{0}' does not exist.".format(self._title) | |||
raise exceptions.PageNotFoundError(e) | |||
def _load(self): | |||
"""Call _load_attributes() and follows redirects if we're supposed to. | |||
This method will only follow redirects if follow_redirects=True was | |||
passed to __init__() (perhaps indirectly passed by site.get_page()). | |||
It avoids the API's &redirects param in favor of manual following, | |||
so we can act more realistically (we don't follow double redirects, and | |||
circular redirects don't break us). | |||
This will raise RedirectError if we have a problem following, but that | |||
is a bug and should NOT happen. | |||
If we're following a redirect, this will make a grand total of three | |||
API queries. It's a lot, but each one is quite small. | |||
""" | |||
self._load_attributes() | |||
if self._keep_following and self._is_redirect: | |||
self._title = self.get_redirect_target() | |||
self._keep_following = False # don't follow double redirects | |||
self._content = None # reset the content we just loaded | |||
self._load_attributes() | |||
def _load_attributes(self, result=None): | |||
"""Load various data from the API in a single query. | |||
Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl, | |||
._protection, ._namespace, ._is_talkpage, ._creator, ._lastrevid, | |||
._token, and ._starttimestamp using the API. It will do a query of | |||
its own unless *result* is provided, in which case we'll pretend | |||
*result* is what the query returned. | |||
Assuming the API is sound, this should not raise any exceptions. | |||
""" | |||
if not result: | |||
query = self.site.api_query | |||
result = query(action="query", rvprop="user", intoken="edit", | |||
prop="info|revisions", rvlimit=1, rvdir="newer", | |||
titles=self._title, inprop="protection|url") | |||
res = result["query"]["pages"].values()[0] | |||
self._title = res["title"] # Normalize our pagename/title | |||
self._is_redirect = "redirect" in res | |||
self._pageid = int(result["query"]["pages"].keys()[0]) | |||
if self._pageid < 0: | |||
if "missing" in res: | |||
# If it has a negative ID and it's missing; we can still get | |||
# data like the namespace, protection, and URL: | |||
self._exists = self.PAGE_MISSING | |||
else: | |||
# If it has a negative ID and it's invalid, then break here, | |||
# because there's no other data for us to get: | |||
self._exists = self.PAGE_INVALID | |||
return | |||
else: | |||
self._exists = self.PAGE_EXISTS | |||
self._fullurl = res["fullurl"] | |||
self._protection = res["protection"] | |||
try: | |||
self._token = res["edittoken"] | |||
except KeyError: | |||
pass | |||
else: | |||
self._starttimestamp = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()) | |||
# We've determined the namespace and talkpage status in __init__() | |||
# based on the title, but now we can be sure: | |||
self._namespace = res["ns"] | |||
self._is_talkpage = self._namespace % 2 == 1 # talkpages have odd IDs | |||
# These last two fields will only be specified if the page exists: | |||
self._lastrevid = res.get("lastrevid") | |||
try: | |||
self._creator = res['revisions'][0]['user'] | |||
except KeyError: | |||
pass | |||
def _load_content(self, result=None): | |||
"""Load current page content from the API. | |||
If *result* is provided, we'll pretend that is the result of an API | |||
query and try to get content from that. Otherwise, we'll do an API | |||
query on our own. | |||
Don't call this directly, ever; use reload() followed by get() if you | |||
want to force content reloading. | |||
""" | |||
if not result: | |||
query = self.site.api_query | |||
result = query(action="query", prop="revisions", rvlimit=1, | |||
rvprop="content|timestamp", titles=self._title) | |||
res = result["query"]["pages"].values()[0] | |||
try: | |||
self._content = res["revisions"][0]["*"] | |||
self._basetimestamp = res["revisions"][0]["timestamp"] | |||
except KeyError: | |||
# This can only happen if the page was deleted since we last called | |||
# self._load_attributes(). In that case, some of our attributes are | |||
# outdated, so force another self._load_attributes(): | |||
self._load_attributes() | |||
self._assert_existence() | |||
def _edit(self, params=None, text=None, summary=None, minor=None, bot=None, | |||
force=None, section=None, captcha_id=None, captcha_word=None, | |||
tries=0): | |||
"""Edit the page! | |||
If *params* is given, we'll use it as our API query parameters. | |||
Otherwise, we'll build params using the given kwargs via | |||
_build_edit_params(). | |||
We'll then try to do the API query, and catch any errors the API raises | |||
in _handle_edit_errors(). We'll then throw these back as subclasses of | |||
EditError. | |||
""" | |||
# Try to get our edit token, and die if we can't: | |||
if not self._token: | |||
self._load_attributes() | |||
if not self._token: | |||
e = "You don't have permission to edit this page." | |||
raise exceptions.PermissionsError(e) | |||
# Weed out invalid pages before we get too far: | |||
self._assert_validity() | |||
# Build our API query string: | |||
if not params: | |||
params = self._build_edit_params(text, summary, minor, bot, force, | |||
section, captcha_id, captcha_word) | |||
else: # Make sure we have the right token: | |||
params["token"] = self._token | |||
# Try the API query, catching most errors with our handler: | |||
try: | |||
result = self.site.api_query(**params) | |||
except exceptions.APIError as error: | |||
if not hasattr(error, "code"): | |||
raise # We can only handle errors with a code attribute | |||
result = self._handle_edit_errors(error, params, tries) | |||
# If everything was successful, reset invalidated attributes: | |||
if result["edit"]["result"] == "Success": | |||
self._content = None | |||
self._basetimestamp = None | |||
self._exists = self.PAGE_UNKNOWN | |||
return | |||
# If we're here, then the edit failed. If it's because of AssertEdit, | |||
# handle that. Otherwise, die - something odd is going on: | |||
try: | |||
assertion = result["edit"]["assert"] | |||
except KeyError: | |||
raise exceptions.EditError(result["edit"]) | |||
self._handle_assert_edit(assertion, params, tries) | |||
def _build_edit_params(self, text, summary, minor, bot, force, section, | |||
captcha_id, captcha_word): | |||
"""Given some keyword arguments, build an API edit query string.""" | |||
unitxt = text.encode("utf8") if isinstance(text, unicode) else text | |||
hashed = md5(unitxt).hexdigest() # Checksum to ensure text is correct | |||
params = {"action": "edit", "title": self._title, "text": text, | |||
"token": self._token, "summary": summary, "md5": hashed} | |||
if section: | |||
params["section"] = section | |||
if captcha_id and captcha_word: | |||
params["captchaid"] = captcha_id | |||
params["captchaword"] = captcha_word | |||
if minor: | |||
params["minor"] = "true" | |||
else: | |||
params["notminor"] = "true" | |||
if bot: | |||
params["bot"] = "true" | |||
if not force: | |||
params["starttimestamp"] = self._starttimestamp | |||
if self._basetimestamp: | |||
params["basetimestamp"] = self._basetimestamp | |||
if self._exists == self.PAGE_MISSING: | |||
# Page does not exist; don't edit if it already exists: | |||
params["createonly"] = "true" | |||
else: | |||
params["recreate"] = "true" | |||
return params | |||
def _handle_edit_errors(self, error, params, tries): | |||
"""If our edit fails due to some error, try to handle it. | |||
We'll either raise an appropriate exception (for example, if the page | |||
is protected), or we'll try to fix it (for example, if we can't edit | |||
due to being logged out, we'll try to log in). | |||
""" | |||
if error.code in ["noedit", "cantcreate", "protectedtitle", | |||
"noimageredirect"]: | |||
raise exceptions.PermissionsError(error.info) | |||
elif error.code in ["noedit-anon", "cantcreate-anon", | |||
"noimageredirect-anon"]: | |||
if not all(self.site._login_info): | |||
# Insufficient login info: | |||
raise exceptions.PermissionsError(error.info) | |||
if tries == 0: | |||
# We have login info; try to login: | |||
self.site._login(self.site._login_info) | |||
self._token = None # Need a new token; old one is invalid now | |||
return self._edit(params=params, tries=1) | |||
else: | |||
# We already tried to log in and failed! | |||
e = "Although we should be logged in, we are not. This may be a cookie problem or an odd bug." | |||
raise exceptions.LoginError(e) | |||
elif error.code in ["editconflict", "pagedeleted", "articleexists"]: | |||
# These attributes are now invalidated: | |||
self._content = None | |||
self._basetimestamp = None | |||
self._exists = self.PAGE_UNKNOWN | |||
raise exceptions.EditConflictError(error.info) | |||
elif error.code in ["emptypage", "emptynewsection"]: | |||
raise exceptions.NoContentError(error.info) | |||
elif error.code == "contenttoobig": | |||
raise exceptions.ContentTooBigError(error.info) | |||
elif error.code == "spamdetected": | |||
raise exceptions.SpamDetectedError(error.info) | |||
elif error.code == "filtered": | |||
raise exceptions.FilteredError(error.info) | |||
raise exceptions.EditError(": ".join((error.code, error.info))) | |||
def _handle_assert_edit(self, assertion, params, tries): | |||
"""If we can't edit due to a failed AssertEdit assertion, handle that. | |||
If the assertion was 'user' and we have valid login information, try to | |||
log in. Otherwise, raise PermissionsError with details. | |||
""" | |||
if assertion == "user": | |||
if not all(self.site._login_info): | |||
# Insufficient login info: | |||
e = "AssertEdit: user assertion failed, and no login info was provided." | |||
raise exceptions.PermissionsError(e) | |||
if tries == 0: | |||
# We have login info; try to login: | |||
self.site._login(self.site._login_info) | |||
self._token = None # Need a new token; old one is invalid now | |||
return self._edit(params=params, tries=1) | |||
else: | |||
# We already tried to log in and failed! | |||
e = "Although we should be logged in, we are not. This may be a cookie problem or an odd bug." | |||
raise exceptions.LoginError(e) | |||
elif assertion == "bot": | |||
if not all(self.site._login_info): | |||
# Insufficient login info: | |||
e = "AssertEdit: bot assertion failed, and no login info was provided." | |||
raise exceptions.PermissionsError(e) | |||
if tries == 0: | |||
# Try to log in if we got logged out: | |||
self.site._login(self.site._login_info) | |||
self._token = None # Need a new token; old one is invalid now | |||
return self._edit(params=params, tries=1) | |||
else: | |||
# We already tried to log in, so we don't have a bot flag: | |||
e = "AssertEdit: bot assertion failed: we don't have a bot flag!" | |||
raise exceptions.PermissionsError(e) | |||
# Unknown assertion, maybe "true", "false", or "exists": | |||
e = "AssertEdit: assertion '{0}' failed.".format(assertion) | |||
raise exceptions.PermissionsError(e) | |||
@property | |||
def site(self): | |||
"""The page's corresponding Site object.""" | |||
return self._site | |||
@property | |||
def title(self): | |||
"""The page's title, or "pagename". | |||
This won't do any API queries on its own. Any other attributes or | |||
methods that do API queries will reload the title, however, like | |||
:py:attr:`exists` and :py:meth:`get`, potentially "normalizing" it or | |||
following redirects if :py:attr:`self._follow_redirects` is ``True``. | |||
""" | |||
return self._title | |||
@property | |||
def exists(self): | |||
"""Whether or not the page exists. | |||
This will be a number; its value does not matter, but it will equal | |||
one of :py:attr:`self.PAGE_INVALID <PAGE_INVALID>`, | |||
:py:attr:`self.PAGE_MISSING <PAGE_MISSING>`, or | |||
:py:attr:`self.PAGE_EXISTS <PAGE_EXISTS>`. | |||
Makes an API query only if we haven't already made one. | |||
""" | |||
if self._exists == self.PAGE_UNKNOWN: | |||
self._load() | |||
return self._exists | |||
@property | |||
def pageid(self): | |||
"""An integer ID representing the page. | |||
Makes an API query only if we haven't already made one and the *pageid* | |||
parameter to :py:meth:`__init__` was left as ``None``, which should be | |||
true for all cases except when pages are returned by an SQL generator | |||
(like :py:meth:`category.get_members() | |||
<earwigbot.wiki.category.Category.get_members>`). | |||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | |||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||
invalid or the page does not exist, respectively. | |||
""" | |||
if self._pageid: | |||
return self._pageid | |||
if self._exists == self.PAGE_UNKNOWN: | |||
self._load() | |||
self._assert_existence() # Missing pages do not have IDs | |||
return self._pageid | |||
@property | |||
def url(self): | |||
"""The page's URL. | |||
Like :py:meth:`title`, this won't do any API queries on its own. If the | |||
API was never queried for this page, we will attempt to determine the | |||
URL ourselves based on the title. | |||
""" | |||
if self._fullurl: | |||
return self._fullurl | |||
else: | |||
encoded = self._title.encode("utf8").replace(" ", "_") | |||
slug = quote(encoded, safe="/:").decode("utf8") | |||
path = self.site._article_path.replace("$1", slug) | |||
return u"".join((self.site.url, path)) | |||
@property | |||
def namespace(self): | |||
"""The page's namespace ID (an integer). | |||
Like :py:meth:`title`, this won't do any API queries on its own. If the | |||
API was never queried for this page, we will attempt to determine the | |||
namespace ourselves based on the title. | |||
""" | |||
return self._namespace | |||
@property | |||
def protection(self): | |||
"""The page's current protection status. | |||
Makes an API query only if we haven't already made one. | |||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` if the page | |||
name is invalid. Won't raise an error if the page is missing because | |||
those can still be create-protected. | |||
""" | |||
if self._exists == self.PAGE_UNKNOWN: | |||
self._load() | |||
self._assert_validity() # Invalid pages cannot be protected | |||
return self._protection | |||
@property | |||
def is_talkpage(self): | |||
"""``True`` if the page is a talkpage, otherwise ``False``. | |||
Like :py:meth:`title`, this won't do any API queries on its own. If the | |||
API was never queried for this page, we will attempt to determine | |||
whether it is a talkpage ourselves based on its namespace. | |||
""" | |||
return self._is_talkpage | |||
@property | |||
def is_redirect(self): | |||
"""``True`` if the page is a redirect, otherwise ``False``. | |||
Makes an API query only if we haven't already made one. | |||
We will return ``False`` even if the page does not exist or is invalid. | |||
""" | |||
if self._exists == self.PAGE_UNKNOWN: | |||
self._load() | |||
return self._is_redirect | |||
def reload(self): | |||
"""Forcibly reload the page's attributes. | |||
Emphasis on *reload*: this is only necessary if there is reason to | |||
believe they have changed. | |||
""" | |||
self._load() | |||
if self._content is not None: | |||
# Only reload content if it has already been loaded: | |||
self._load_content() | |||
def toggle_talk(self, follow_redirects=None): | |||
"""Return a content page's talk page, or vice versa. | |||
The title of the new page is determined by namespace logic, not API | |||
queries. We won't make any API queries on our own. | |||
If *follow_redirects* is anything other than ``None`` (the default), it | |||
will be passed to the new :py:class:`~earwigbot.wiki.page.Page` | |||
object's :py:meth:`__init__`. Otherwise, we'll use the value passed to | |||
our own :py:meth:`__init__`. | |||
Will raise :py:exc:`~earwigbot.exceptions.InvalidPageError` if we try | |||
to get the talk page of a special page (in the ``Special:`` or | |||
``Media:`` namespaces), but we won't raise an exception if our page is | |||
otherwise missing or invalid. | |||
""" | |||
if self._namespace < 0: | |||
ns = self.site.namespace_id_to_name(self._namespace) | |||
e = u"Pages in the {0} namespace can't have talk pages.".format(ns) | |||
raise exceptions.InvalidPageError(e) | |||
if self._is_talkpage: | |||
new_ns = self._namespace - 1 | |||
else: | |||
new_ns = self._namespace + 1 | |||
try: | |||
body = self._title.split(":", 1)[1] | |||
except IndexError: | |||
body = self._title | |||
new_prefix = self.site.namespace_id_to_name(new_ns) | |||
# If the new page is in namespace 0, don't do ":Title" (it's correct, | |||
# but unnecessary), just do "Title": | |||
if new_prefix: | |||
new_title = u":".join((new_prefix, body)) | |||
else: | |||
new_title = body | |||
if follow_redirects is None: | |||
follow_redirects = self._follow_redirects | |||
return Page(self.site, new_title, follow_redirects) | |||
def get(self): | |||
"""Return page content, which is cached if you try to call get again. | |||
Raises InvalidPageError or PageNotFoundError if the page name is | |||
invalid or the page does not exist, respectively. | |||
""" | |||
if self._exists == self.PAGE_UNKNOWN: | |||
# Kill two birds with one stone by doing an API query for both our | |||
# attributes and our page content: | |||
query = self.site.api_query | |||
result = query(action="query", rvlimit=1, titles=self._title, | |||
prop="info|revisions", inprop="protection|url", | |||
intoken="edit", rvprop="content|timestamp") | |||
self._load_attributes(result=result) | |||
self._assert_existence() | |||
self._load_content(result=result) | |||
# Follow redirects if we're told to: | |||
if self._keep_following and self._is_redirect: | |||
self._title = self.get_redirect_target() | |||
self._keep_following = False # Don't follow double redirects | |||
self._exists = self.PAGE_UNKNOWN # Force another API query | |||
self.get() | |||
return self._content | |||
# Make sure we're dealing with a real page here. This may be outdated | |||
# if the page was deleted since we last called self._load_attributes(), | |||
# but self._load_content() can handle that: | |||
self._assert_existence() | |||
if self._content is None: | |||
self._load_content() | |||
return self._content | |||
def get_redirect_target(self): | |||
"""If the page is a redirect, return its destination. | |||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | |||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||
invalid or the page does not exist, respectively. Raises | |||
:py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a | |||
redirect. | |||
""" | |||
re_redirect = "^\s*\#\s*redirect\s*\[\[(.*?)\]\]" | |||
content = self.get() | |||
try: | |||
return re.findall(re_redirect, content, flags=re.I)[0] | |||
except IndexError: | |||
e = "The page does not appear to have a redirect target." | |||
raise exceptions.RedirectError(e) | |||
def get_creator(self): | |||
"""Return the User object for the first person to edit the page. | |||
Makes an API query only if we haven't already made one. Normally, we | |||
can get the creator along with everything else (except content) in | |||
:py:meth:`_load_attributes`. However, due to a limitation in the API | |||
(can't get the editor of one revision and the content of another at | |||
both ends of the history), if our other attributes were only loaded | |||
through :py:meth:`get`, we'll have to do another API query. | |||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | |||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||
invalid or the page does not exist, respectively. | |||
""" | |||
if self._exists == self.PAGE_UNKNOWN: | |||
self._load() | |||
self._assert_existence() | |||
if not self._creator: | |||
self._load() | |||
self._assert_existence() | |||
return self.site.get_user(self._creator) | |||
def parse(self): | |||
"""Parse the page content for templates, links, etc. | |||
Actual parsing is handled by :py:mod:`mwparserfromhell`. Raises | |||
:py:exc:`~earwigbot.exceptions.InvalidPageError` or | |||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||
invalid or the page does not exist, respectively. | |||
""" | |||
return mwparserfromhell.parse(self.get()) | |||
def edit(self, text, summary, minor=False, bot=True, force=False): | |||
"""Replace the page's content or creates a new page. | |||
*text* is the new page content, with *summary* as the edit summary. | |||
If *minor* is ``True``, the edit will be marked as minor. If *bot* is | |||
``True``, the edit will be marked as a bot edit, but only if we | |||
actually have a bot flag. | |||
Use *force* to push the new content even if there's an edit conflict or | |||
the page was deleted/recreated between getting our edit token and | |||
editing our page. Be careful with this! | |||
""" | |||
self._edit(text=text, summary=summary, minor=minor, bot=bot, | |||
force=force) | |||
def add_section(self, text, title, minor=False, bot=True, force=False): | |||
"""Add a new section to the bottom of the page. | |||
The arguments for this are the same as those for :py:meth:`edit`, but | |||
instead of providing a summary, you provide a section title. Likewise, | |||
raised exceptions are the same as :py:meth:`edit`'s. | |||
This should create the page if it does not already exist, with just the | |||
new section as content. | |||
""" | |||
self._edit(text=text, summary=title, minor=minor, bot=bot, force=force, | |||
section="new") | |||
def check_exclusion(self, username=None, optouts=None): | |||
"""Check whether or not we are allowed to edit the page. | |||
Return ``True`` if we *are* allowed to edit this page, and ``False`` if | |||
we aren't. | |||
*username* is used to determine whether we are part of a specific list | |||
of allowed or disallowed bots (e.g. ``{{bots|allow=EarwigBot}}`` or | |||
``{{bots|deny=FooBot,EarwigBot}}``). It's ``None`` by default, which | |||
will swipe our username from :py:meth:`site.get_user() | |||
<earwigbot.wiki.site.Site.get_user>`.\ | |||
:py:attr:`~earwigbot.wiki.user.User.name`. | |||
*optouts* is a list of messages to consider this check as part of for | |||
the purpose of opt-out; it defaults to ``None``, which ignores the | |||
parameter completely. For example, if *optouts* is ``["nolicense"]``, | |||
we'll return ``False`` on ``{{bots|optout=nolicense}}`` or | |||
``{{bots|optout=all}}``, but `True` on | |||
``{{bots|optout=orfud,norationale,replaceable}}``. | |||
""" | |||
def parse_param(template, param): | |||
value = template.get(param).value | |||
return [item.strip().lower() for item in value.split(",")] | |||
if not username: | |||
username = self.site.get_user().name | |||
# Lowercase everything: | |||
username = username.lower() | |||
optouts = [optout.lower() for optout in optouts] if optouts else [] | |||
r_bots = "\{\{\s*(no)?bots\s*(\||\}\})" | |||
filter = self.parse().ifilter_templates(recursive=True, matches=r_bots) | |||
for template in filter: | |||
if template.has_param("deny"): | |||
denies = parse_param(template, "deny") | |||
if "all" in denies or username in denies: | |||
return False | |||
if template.has_param("allow"): | |||
allows = parse_param(template, "allow") | |||
if "all" in allows or username in allows: | |||
continue | |||
if optouts and template.has_param("optout"): | |||
tasks = parse_param(template, "optout") | |||
matches = [optout in tasks for optout in optouts] | |||
if "all" in tasks or any(matches): | |||
return False | |||
if template.name.strip().lower() == "nobots": | |||
return False | |||
return True |
@@ -0,0 +1,849 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from cookielib import CookieJar | |||
from gzip import GzipFile | |||
from json import loads | |||
from logging import getLogger, NullHandler | |||
from os.path import expanduser | |||
from StringIO import StringIO | |||
from threading import Lock | |||
from time import sleep, time | |||
from urllib import quote_plus, unquote_plus | |||
from urllib2 import build_opener, HTTPCookieProcessor, URLError | |||
from urlparse import urlparse | |||
import oursql | |||
from earwigbot import exceptions | |||
from earwigbot.wiki import constants | |||
from earwigbot.wiki.category import Category | |||
from earwigbot.wiki.page import Page | |||
from earwigbot.wiki.user import User | |||
__all__ = ["Site"] | |||
class Site(object): | |||
""" | |||
**EarwigBot: Wiki Toolset: Site** | |||
Represents a site, with support for API queries and returning | |||
:py:class:`~earwigbot.wiki.page.Page`, | |||
:py:class:`~earwigbot.wiki.user.User`, | |||
and :py:class:`~earwigbot.wiki.category.Category` objects. The constructor | |||
takes a bunch of arguments and you probably won't need to call it directly, | |||
rather :py:meth:`wiki.get_site() <earwigbot.wiki.sitesdb.SitesDB.get_site>` | |||
for returning :py:class:`Site` | |||
instances, :py:meth:`wiki.add_site() | |||
<earwigbot.wiki.sitesdb.SitesDB.add_site>` for adding new ones to our | |||
database, and :py:meth:`wiki.remove_site() | |||
<earwigbot.wiki.sitesdb.SitesDB.remove_site>` for removing old ones from | |||
our database, should suffice. | |||
*Attributes:* | |||
- :py:attr:`name`: the site's name (or "wikiid"), like ``"enwiki"`` | |||
- :py:attr:`project`: the site's project name, like ``"wikipedia"`` | |||
- :py:attr:`lang`: the site's language code, like ``"en"`` | |||
- :py:attr:`domain`: the site's web domain, like ``"en.wikipedia.org"`` | |||
- :py:attr:`url`: the site's URL, like ``"https://en.wikipedia.org"`` | |||
*Public methods:* | |||
- :py:meth:`api_query`: does an API query with kwargs as params | |||
- :py:meth:`sql_query`: does an SQL query and yields its results | |||
- :py:meth:`get_maxlag`: returns the internal database lag | |||
- :py:meth:`get_replag`: estimates the external database lag | |||
- :py:meth:`namespace_id_to_name`: returns names associated with an NS id | |||
- :py:meth:`namespace_name_to_id`: returns the ID associated with a NS name | |||
- :py:meth:`get_page`: returns a Page for the given title | |||
- :py:meth:`get_category`: returns a Category for the given title | |||
- :py:meth:`get_user`: returns a User object for the given name | |||
- :py:meth:`delegate`: controls when the API or SQL is used | |||
""" | |||
SERVICE_API = 1 | |||
SERVICE_SQL = 2 | |||
def __init__(self, name=None, project=None, lang=None, base_url=None, | |||
article_path=None, script_path=None, sql=None, | |||
namespaces=None, login=(None, None), cookiejar=None, | |||
user_agent=None, use_https=False, assert_edit=None, | |||
maxlag=None, wait_between_queries=2, logger=None, | |||
search_config=None): | |||
"""Constructor for new Site instances. | |||
This probably isn't necessary to call yourself unless you're building a | |||
Site that's not in your config and you don't want to add it - normally | |||
all you need is wiki.get_site(name), which creates the Site for you | |||
based on your config file and the sites database. We accept a bunch of | |||
kwargs, but the only ones you really "need" are *base_url* and | |||
*script_path*; this is enough to figure out an API url. *login*, a | |||
tuple of (username, password), is highly recommended. *cookiejar* will | |||
be used to store cookies, and we'll use a normal CookieJar if none is | |||
given. | |||
First, we'll store the given arguments as attributes, then set up our | |||
URL opener. We'll load any of the attributes that weren't given from | |||
the API, and then log in if a username/pass was given and we aren't | |||
already logged in. | |||
""" | |||
# Attributes referring to site information, filled in by an API query | |||
# if they are missing (and an API url can be determined): | |||
self._name = name | |||
self._project = project | |||
self._lang = lang | |||
self._base_url = base_url | |||
self._article_path = article_path | |||
self._script_path = script_path | |||
self._namespaces = namespaces | |||
# Attributes used for API queries: | |||
self._use_https = use_https | |||
self._assert_edit = assert_edit | |||
self._maxlag = maxlag | |||
self._wait_between_queries = wait_between_queries | |||
self._max_retries = 6 | |||
self._last_query_time = 0 | |||
self._api_lock = Lock() | |||
self._api_info_cache = {"maxlag": 0, "lastcheck": 0} | |||
# Attributes used for SQL queries: | |||
if sql: | |||
self._sql_data = sql | |||
else: | |||
self._sql_data = {} | |||
self._sql_conn = None | |||
self._sql_lock = Lock() | |||
self._sql_info_cache = {"replag": 0, "lastcheck": 0, "usable": None} | |||
# Attribute used in copyright violation checks (see CopyrightMixIn): | |||
if search_config: | |||
self._search_config = search_config | |||
else: | |||
self._search_config = {} | |||
# Set up cookiejar and URL opener for making API queries: | |||
if cookiejar is not None: | |||
self._cookiejar = cookiejar | |||
else: | |||
self._cookiejar = CookieJar() | |||
if not user_agent: | |||
user_agent = constants.USER_AGENT # Set default UA | |||
self._opener = build_opener(HTTPCookieProcessor(self._cookiejar)) | |||
self._opener.addheaders = [("User-Agent", user_agent), | |||
("Accept-Encoding", "gzip")] | |||
# Set up our internal logger: | |||
if logger: | |||
self._logger = logger | |||
else: # Just set up a null logger to eat up our messages: | |||
self._logger = getLogger("earwigbot.wiki") | |||
self._logger.addHandler(NullHandler()) | |||
# Get all of the above attributes that were not specified as arguments: | |||
self._load_attributes() | |||
# If we have a name/pass and the API says we're not logged in, log in: | |||
self._login_info = name, password = login | |||
if name and password: | |||
logged_in_as = self._get_username_from_cookies() | |||
if not logged_in_as or name.replace("_", " ") != logged_in_as: | |||
self._login(login) | |||
def __repr__(self): | |||
"""Return the canonical string representation of the Site.""" | |||
res = ", ".join(( | |||
"Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", | |||
"base_url={_base_url!r}", "article_path={_article_path!r}", | |||
"script_path={_script_path!r}", "use_https={_use_https!r}", | |||
"assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}", | |||
"sql={_sql_data!r}", "login={0}", "user_agent={2!r}", | |||
"cookiejar={1})")) | |||
name, password = self._login_info | |||
login = "({0}, {1})".format(repr(name), "hidden" if password else None) | |||
cookies = self._cookiejar.__class__.__name__ | |||
if hasattr(self._cookiejar, "filename"): | |||
cookies += "({0!r})".format(getattr(self._cookiejar, "filename")) | |||
else: | |||
cookies += "()" | |||
agent = self._opener.addheaders[0][1] | |||
return res.format(login, cookies, agent, **self.__dict__) | |||
def __str__(self): | |||
"""Return a nice string representation of the Site.""" | |||
res = "<Site {0} ({1}:{2}) at {3}>" | |||
return res.format(self.name, self.project, self.lang, self.domain) | |||
def _unicodeify(self, value, encoding="utf8"): | |||
"""Return input as unicode if it's not unicode to begin with.""" | |||
if isinstance(value, unicode): | |||
return value | |||
return unicode(value, encoding) | |||
def _urlencode_utf8(self, params): | |||
"""Implement urllib.urlencode() with support for unicode input.""" | |||
enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s) | |||
args = [] | |||
for key, val in params.iteritems(): | |||
key = quote_plus(enc(key)) | |||
val = quote_plus(enc(val)) | |||
args.append(key + "=" + val) | |||
return "&".join(args) | |||
def _api_query(self, params, tries=0, wait=5, ignore_maxlag=False): | |||
"""Do an API query with *params* as a dict of parameters. | |||
See the documentation for :py:meth:`api_query` for full implementation | |||
details. | |||
""" | |||
since_last_query = time() - self._last_query_time # Throttling support | |||
if since_last_query < self._wait_between_queries: | |||
wait_time = self._wait_between_queries - since_last_query | |||
log = "Throttled: waiting {0} seconds".format(round(wait_time, 2)) | |||
self._logger.debug(log) | |||
sleep(wait_time) | |||
self._last_query_time = time() | |||
url, data = self._build_api_query(params, ignore_maxlag) | |||
if "lgpassword" in params: | |||
self._logger.debug("{0} -> <hidden>".format(url)) | |||
else: | |||
self._logger.debug("{0} -> {1}".format(url, data)) | |||
try: | |||
response = self._opener.open(url, data) | |||
except URLError as error: | |||
if hasattr(error, "reason"): | |||
e = "API query failed: {0}.".format(error.reason) | |||
elif hasattr(error, "code"): | |||
e = "API query failed: got an error code of {0}." | |||
e = e.format(error.code) | |||
else: | |||
e = "API query failed." | |||
raise exceptions.APIError(e) | |||
result = response.read() | |||
if response.headers.get("Content-Encoding") == "gzip": | |||
stream = StringIO(result) | |||
gzipper = GzipFile(fileobj=stream) | |||
result = gzipper.read() | |||
return self._handle_api_query_result(result, params, tries, wait) | |||
def _build_api_query(self, params, ignore_maxlag): | |||
"""Given API query params, return the URL to query and POST data.""" | |||
if not self._base_url or self._script_path is None: | |||
e = "Tried to do an API query, but no API URL is known." | |||
raise exceptions.APIError(e) | |||
url = ''.join((self.url, self._script_path, "/api.php")) | |||
params["format"] = "json" # This is the only format we understand | |||
if self._assert_edit: # If requested, ensure that we're logged in | |||
params["assert"] = self._assert_edit | |||
if self._maxlag and not ignore_maxlag: | |||
# If requested, don't overload the servers: | |||
params["maxlag"] = self._maxlag | |||
data = self._urlencode_utf8(params) | |||
return url, data | |||
def _handle_api_query_result(self, result, params, tries, wait): | |||
"""Given the result of an API query, attempt to return useful data.""" | |||
try: | |||
res = loads(result) # Try to parse as a JSON object | |||
except ValueError: | |||
e = "API query failed: JSON could not be decoded." | |||
raise exceptions.APIError(e) | |||
try: | |||
code = res["error"]["code"] | |||
info = res["error"]["info"] | |||
except (TypeError, KeyError): # Having these keys indicates a problem | |||
return res # All is well; return the decoded JSON | |||
if code == "maxlag": # We've been throttled by the server | |||
if tries >= self._max_retries: | |||
e = "Maximum number of retries reached ({0})." | |||
raise exceptions.APIError(e.format(self._max_retries)) | |||
tries += 1 | |||
msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})' | |||
self._logger.info(msg.format(info, wait, tries, self._max_retries)) | |||
sleep(wait) | |||
return self._api_query(params, tries=tries, wait=wait*2) | |||
else: # Some unknown error occurred | |||
e = 'API query failed: got error "{0}"; server says: "{1}".' | |||
error = exceptions.APIError(e.format(code, info)) | |||
error.code, error.info = code, info | |||
raise error | |||
def _load_attributes(self, force=False): | |||
"""Load data about our Site from the API. | |||
This function is called by __init__() when one of the site attributes | |||
was not given as a keyword argument. We'll do an API query to get the | |||
missing data, but only if there actually *is* missing data. | |||
Additionally, you can call this with *force* set to True to forcibly | |||
reload all attributes. | |||
""" | |||
# All attributes to be loaded, except _namespaces, which is a special | |||
# case because it requires additional params in the API query: | |||
attrs = [self._name, self._project, self._lang, self._base_url, | |||
self._article_path, self._script_path] | |||
params = {"action": "query", "meta": "siteinfo", "siprop": "general"} | |||
if not self._namespaces or force: | |||
params["siprop"] += "|namespaces|namespacealiases" | |||
result = self.api_query(**params) | |||
self._load_namespaces(result) | |||
elif all(attrs): # Everything is already specified and we're not told | |||
return # to force a reload, so do nothing | |||
else: # We're only loading attributes other than _namespaces | |||
result = self.api_query(**params) | |||
res = result["query"]["general"] | |||
self._name = res["wikiid"] | |||
self._project = res["sitename"].lower() | |||
self._lang = res["lang"] | |||
self._base_url = res["server"] | |||
self._article_path = res["articlepath"] | |||
self._script_path = res["scriptpath"] | |||
def _load_namespaces(self, result): | |||
"""Fill self._namespaces with a dict of namespace IDs and names. | |||
Called by _load_attributes() with API data as *result* when | |||
self._namespaces was not given as an kwarg to __init__(). | |||
""" | |||
self._namespaces = {} | |||
for namespace in result["query"]["namespaces"].values(): | |||
ns_id = namespace["id"] | |||
name = namespace["*"] | |||
try: | |||
canonical = namespace["canonical"] | |||
except KeyError: | |||
self._namespaces[ns_id] = [name] | |||
else: | |||
if name != canonical: | |||
self._namespaces[ns_id] = [name, canonical] | |||
else: | |||
self._namespaces[ns_id] = [name] | |||
for namespace in result["query"]["namespacealiases"]: | |||
ns_id = namespace["id"] | |||
alias = namespace["*"] | |||
self._namespaces[ns_id].append(alias) | |||
def _get_cookie(self, name, domain): | |||
"""Return the named cookie unless it is expired or doesn't exist.""" | |||
for cookie in self._cookiejar: | |||
if cookie.name == name and cookie.domain == domain: | |||
if cookie.is_expired(): | |||
break | |||
return cookie | |||
def _get_username_from_cookies(self): | |||
"""Try to return our username based solely on cookies. | |||
First, we'll look for a cookie named self._name + "Token", like | |||
"enwikiToken". If it exists and isn't expired, we'll assume it's valid | |||
and try to return the value of the cookie self._name + "UserName" (like | |||
"enwikiUserName"). This should work fine on wikis without single-user | |||
login. | |||
If `enwikiToken` doesn't exist, we'll try to find a cookie named | |||
`centralauth_Token`. If this exists and is not expired, we'll try to | |||
return the value of `centralauth_User`. | |||
If we didn't get any matches, we'll return None. Our goal here isn't to | |||
return the most likely username, or what we *want* our username to be | |||
(for that, we'd do self._login_info[0]), but rather to get our current | |||
username without an unnecessary ?action=query&meta=userinfo API query. | |||
""" | |||
name = ''.join((self._name, "Token")) | |||
cookie = self._get_cookie(name, self.domain) | |||
if cookie: | |||
name = ''.join((self._name, "UserName")) | |||
user_name = self._get_cookie(name, self.domain) | |||
if user_name: | |||
return unquote_plus(user_name.value) | |||
for cookie in self._cookiejar: | |||
if cookie.name != "centralauth_Token" or cookie.is_expired(): | |||
continue | |||
base = cookie.domain | |||
if base.startswith(".") and not cookie.domain_initial_dot: | |||
base = base[1:] | |||
if self.domain.endswith(base): | |||
user_name = self._get_cookie("centralauth_User", cookie.domain) | |||
if user_name: | |||
return unquote_plus(user_name.value) | |||
def _get_username_from_api(self): | |||
"""Do a simple API query to get our username and return it. | |||
This is a reliable way to make sure we are actually logged in, because | |||
it doesn't deal with annoying cookie logic, but it results in an API | |||
query that is unnecessary in some cases. | |||
Called by _get_username() (in turn called by get_user() with no | |||
username argument) when cookie lookup fails, probably indicating that | |||
we are logged out. | |||
""" | |||
result = self.api_query(action="query", meta="userinfo") | |||
return result["query"]["userinfo"]["name"] | |||
def _get_username(self): | |||
"""Return the name of the current user, whether logged in or not. | |||
First, we'll try to deduce it solely from cookies, to avoid an | |||
unnecessary API query. For the cookie-detection method, see | |||
_get_username_from_cookies()'s docs. | |||
If our username isn't in cookies, then we're probably not logged in, or | |||
something fishy is going on (like forced logout). In this case, do a | |||
single API query for our username (or IP address) and return that. | |||
""" | |||
name = self._get_username_from_cookies() | |||
if name: | |||
return name | |||
return self._get_username_from_api() | |||
def _save_cookiejar(self): | |||
"""Try to save our cookiejar after doing a (normal) login or logout. | |||
Calls the standard .save() method with no filename. Don't fret if our | |||
cookiejar doesn't support saving (CookieJar raises AttributeError, | |||
FileCookieJar raises NotImplementedError) or no default filename was | |||
given (LWPCookieJar and MozillaCookieJar raise ValueError). | |||
""" | |||
if hasattr(self._cookiejar, "save"): | |||
try: | |||
getattr(self._cookiejar, "save")() | |||
except (NotImplementedError, ValueError): | |||
pass | |||
def _login(self, login, token=None, attempt=0): | |||
"""Safely login through the API. | |||
Normally, this is called by __init__() if a username and password have | |||
been provided and no valid login cookies were found. The only other | |||
time it needs to be called is when those cookies expire, which is done | |||
automatically by api_query() if a query fails. | |||
Recent versions of MediaWiki's API have fixed a CSRF vulnerability, | |||
requiring login to be done in two separate requests. If the response | |||
from from our initial request is "NeedToken", we'll do another one with | |||
the token. If login is successful, we'll try to save our cookiejar. | |||
Raises LoginError on login errors (duh), like bad passwords and | |||
nonexistent usernames. | |||
*login* is a (username, password) tuple. *token* is the token returned | |||
from our first request, and *attempt* is to prevent getting stuck in a | |||
loop if MediaWiki isn't acting right. | |||
""" | |||
name, password = login | |||
if token: | |||
result = self.api_query(action="login", lgname=name, | |||
lgpassword=password, lgtoken=token) | |||
else: | |||
result = self.api_query(action="login", lgname=name, | |||
lgpassword=password) | |||
res = result["login"]["result"] | |||
if res == "Success": | |||
self._save_cookiejar() | |||
elif res == "NeedToken" and attempt == 0: | |||
token = result["login"]["token"] | |||
return self._login(login, token, attempt=1) | |||
else: | |||
if res == "Illegal": | |||
e = "The provided username is illegal." | |||
elif res == "NotExists": | |||
e = "The provided username does not exist." | |||
elif res == "EmptyPass": | |||
e = "No password was given." | |||
elif res == "WrongPass" or res == "WrongPluginPass": | |||
e = "The given password is incorrect." | |||
else: | |||
e = "Couldn't login; server says '{0}'.".format(res) | |||
raise exceptions.LoginError(e) | |||
def _logout(self): | |||
"""Safely logout through the API. | |||
We'll do a simple API request (api.php?action=logout), clear our | |||
cookiejar (which probably contains now-invalidated cookies) and try to | |||
save it, if it supports that sort of thing. | |||
""" | |||
self.api_query(action="logout") | |||
self._cookiejar.clear() | |||
self._save_cookiejar() | |||
def _sql_connect(self, **kwargs): | |||
"""Attempt to establish a connection with this site's SQL database. | |||
oursql.connect() will be called with self._sql_data as its kwargs. | |||
Any kwargs given to this function will be passed to connect() and will | |||
have precedence over the config file. | |||
Will raise SQLError() if the module "oursql" is not available. oursql | |||
may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot | |||
establish a connection. | |||
""" | |||
if not oursql: | |||
e = "Module 'oursql' is required for SQL queries." | |||
raise exceptions.SQLError(e) | |||
args = self._sql_data | |||
for key, value in kwargs.iteritems(): | |||
args[key] = value | |||
if "read_default_file" not in args and "user" not in args and "passwd" not in args: | |||
args["read_default_file"] = expanduser("~/.my.cnf") | |||
if "autoping" not in args: | |||
args["autoping"] = True | |||
if "autoreconnect" not in args: | |||
args["autoreconnect"] = True | |||
self._sql_conn = oursql.connect(**args) | |||
def _get_service_order(self): | |||
"""Return a preferred order for using services (e.g. the API and SQL). | |||
A list is returned, starting with the most preferred service first and | |||
ending with the least preferred one. Currently, there are only two | |||
services. SERVICE_API will always be included since the API is expected | |||
to be always usable. In normal circumstances, self.SERVICE_SQL will be | |||
first (with the API second), since using SQL directly is easier on the | |||
servers than making web queries with the API. self.SERVICE_SQL will be | |||
second if replag is greater than three minutes (a cached value updated | |||
every two minutes at most), *unless* API lag is also very high. | |||
self.SERVICE_SQL will not be included in the list if we cannot form a | |||
proper SQL connection. | |||
""" | |||
now = time() | |||
if now - self._sql_info_cache["lastcheck"] > 120: | |||
self._sql_info_cache["lastcheck"] = now | |||
try: | |||
self._sql_info_cache["replag"] = sqllag = self.get_replag() | |||
except (exceptions.SQLError, oursql.Error): | |||
self._sql_info_cache["usable"] = False | |||
return [self.SERVICE_API] | |||
self._sql_info_cache["usable"] = True | |||
else: | |||
if not self._sql_info_cache["usable"]: | |||
return [self.SERVICE_API] | |||
sqllag = self._sql_info_cache["replag"] | |||
if sqllag > 300: | |||
if not self._maxlag: | |||
return [self.SERVICE_API, self.SERVICE_SQL] | |||
if now - self._api_info_cache["lastcheck"] > 300: | |||
self._api_info_cache["lastcheck"] = now | |||
try: | |||
self._api_info_cache["maxlag"] = apilag = self.get_maxlag() | |||
except exceptions.APIError: | |||
self._api_info_cache["maxlag"] = apilag = 0 | |||
else: | |||
apilag = self._api_info_cache["maxlag"] | |||
if apilag > self._maxlag: | |||
return [self.SERVICE_SQL, self.SERVICE_API] | |||
return [self.SERVICE_API, self.SERVICE_SQL] | |||
return [self.SERVICE_SQL, self.SERVICE_API] | |||
@property | |||
def name(self): | |||
"""The Site's name (or "wikiid" in the API), like ``"enwiki"``.""" | |||
return self._name | |||
@property | |||
def project(self): | |||
"""The Site's project name in lowercase, like ``"wikipedia"``.""" | |||
return self._project | |||
@property | |||
def lang(self): | |||
"""The Site's language code, like ``"en"`` or ``"es"``.""" | |||
return self._lang | |||
@property | |||
def domain(self): | |||
"""The Site's web domain, like ``"en.wikipedia.org"``.""" | |||
return urlparse(self._base_url).netloc | |||
@property | |||
def url(self): | |||
"""The Site's full base URL, like ``"https://en.wikipedia.org"``.""" | |||
url = self._base_url | |||
if url.startswith("//"): # Protocol-relative URLs from 1.18 | |||
if self._use_https: | |||
url = "https:" + url | |||
else: | |||
url = "http:" + url | |||
return url | |||
def api_query(self, **kwargs): | |||
"""Do an API query with `kwargs` as the parameters. | |||
This will first attempt to construct an API url from | |||
:py:attr:`self._base_url` and :py:attr:`self._script_path`. We need | |||
both of these, or else we'll raise | |||
:py:exc:`~earwigbot.exceptions.APIError`. If | |||
:py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki | |||
1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is | |||
``True``, otherwise HTTP. | |||
We'll encode the given params, adding ``format=json`` along the way, as | |||
well as ``&assert=`` and ``&maxlag=`` based on | |||
:py:attr:`self._assert_edit` and :py:attr:`_maxlag` respectively. | |||
Additionally, we'll sleep a bit if the last query was made fewer than | |||
:py:attr:`self._wait_between_queries` seconds ago. The request is made | |||
through :py:attr:`self._opener`, which has cookie support | |||
(:py:attr:`self._cookiejar`), a ``User-Agent`` | |||
(:py:const:`earwigbot.wiki.constants.USER_AGENT`), and | |||
``Accept-Encoding`` set to ``"gzip"``. | |||
Assuming everything went well, we'll gunzip the data (if compressed), | |||
load it as a JSON object, and return it. | |||
If our request failed for some reason, we'll raise | |||
:py:exc:`~earwigbot.exceptions.APIError` with details. If that | |||
reason was due to maxlag, we'll sleep for a bit and then repeat the | |||
query until we exceed :py:attr:`self._max_retries`. | |||
There is helpful MediaWiki API documentation at `MediaWiki.org | |||
<http://www.mediawiki.org/wiki/API>`_. | |||
""" | |||
with self._api_lock: | |||
return self._api_query(kwargs) | |||
def sql_query(self, query, params=(), plain_query=False, dict_cursor=False, | |||
cursor_class=None, show_table=False): | |||
"""Do an SQL query and yield its results. | |||
If *plain_query* is ``True``, we will force an unparameterized query. | |||
Specifying both *params* and *plain_query* will cause an error. If | |||
*dict_cursor* is ``True``, we will use :py:class:`oursql.DictCursor` as | |||
our cursor, otherwise the default :py:class:`oursql.Cursor`. If | |||
*cursor_class* is given, it will override this option. If *show_table* | |||
is True, the name of the table will be prepended to the name of the | |||
column. This will mainly affect an :py:class:`~oursql.DictCursor`. | |||
Example usage:: | |||
>>> query = "SELECT user_id, user_registration FROM user WHERE user_name = ?" | |||
>>> params = ("The Earwig",) | |||
>>> result1 = site.sql_query(query, params) | |||
>>> result2 = site.sql_query(query, params, dict_cursor=True) | |||
>>> for row in result1: print row | |||
(7418060L, '20080703215134') | |||
>>> for row in result2: print row | |||
{'user_id': 7418060L, 'user_registration': '20080703215134'} | |||
This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of | |||
oursql's exceptions (:py:exc:`oursql.ProgrammingError`, | |||
:py:exc:`oursql.InterfaceError`, ...) if there were problems with the | |||
query. | |||
See :py:meth:`_sql_connect` for information on how a connection is | |||
acquired. Also relevant is `oursql's documentation | |||
<http://packages.python.org/oursql>`_ for details on that package. | |||
""" | |||
if not cursor_class: | |||
if dict_cursor: | |||
cursor_class = oursql.DictCursor | |||
else: | |||
cursor_class = oursql.Cursor | |||
klass = cursor_class | |||
with self._sql_lock: | |||
if not self._sql_conn: | |||
self._sql_connect() | |||
with self._sql_conn.cursor(klass, show_table=show_table) as cur: | |||
cur.execute(query, params, plain_query) | |||
for result in cur: | |||
yield result | |||
def get_maxlag(self, showall=False): | |||
"""Return the internal database replication lag in seconds. | |||
In a typical setup, this function returns the replication lag *within* | |||
the WMF's cluster, *not* external replication lag affecting the | |||
Toolserver (see :py:meth:`get_replag` for that). This is useful when | |||
combined with the ``maxlag`` API query param (added by config), in | |||
which queries will be halted and retried if the lag is too high, | |||
usually above five seconds. | |||
With *showall*, will return a list of the lag for all servers in the | |||
cluster, not just the one with the highest lag. | |||
""" | |||
params = {"action": "query", "meta": "siteinfo", "siprop": "dbrepllag"} | |||
if showall: | |||
params["sishowalldb"] = 1 | |||
with self._api_lock: | |||
result = self._api_query(params, ignore_maxlag=True) | |||
if showall: | |||
return [server["lag"] for server in result["query"]["dbrepllag"]] | |||
return result["query"]["dbrepllag"][0]["lag"] | |||
def get_replag(self): | |||
"""Return the estimated external database replication lag in seconds. | |||
Requires SQL access. This function only makes sense on a replicated | |||
database (e.g. the Wikimedia Toolserver) and on a wiki that receives a | |||
large number of edits (ideally, at least one per second), or the result | |||
may be larger than expected, since it works by subtracting the current | |||
time from the timestamp of the latest recent changes event. | |||
This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of | |||
oursql's exceptions (:py:exc:`oursql.ProgrammingError`, | |||
:py:exc:`oursql.InterfaceError`, ...) if there were problems. | |||
""" | |||
query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM | |||
recentchanges ORDER BY rc_timestamp DESC LIMIT 1""" | |||
result = list(self.sql_query(query)) | |||
return result[0][0] | |||
def namespace_id_to_name(self, ns_id, all=False): | |||
"""Given a namespace ID, returns associated namespace names. | |||
If *all* is ``False`` (default), we'll return the first name in the | |||
list, which is usually the localized version. Otherwise, we'll return | |||
the entire list, which includes the canonical name. For example, this | |||
returns ``u"Wikipedia"`` if *ns_id* = ``4`` and *all* is ``False`` on | |||
``enwiki``; returns ``[u"Wikipedia", u"Project", u"WP"]`` if *ns_id* = | |||
``4`` and *all* is ``True``. | |||
Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the ID | |||
is not found. | |||
""" | |||
try: | |||
if all: | |||
return self._namespaces[ns_id] | |||
else: | |||
return self._namespaces[ns_id][0] | |||
except KeyError: | |||
e = "There is no namespace with id {0}.".format(ns_id) | |||
raise exceptions.NamespaceNotFoundError(e) | |||
def namespace_name_to_id(self, name): | |||
"""Given a namespace name, returns the associated ID. | |||
Like :py:meth:`namespace_id_to_name`, but reversed. Case is ignored, | |||
because namespaces are assumed to be case-insensitive. | |||
Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the | |||
name is not found. | |||
""" | |||
lname = name.lower() | |||
for ns_id, names in self._namespaces.items(): | |||
lnames = [n.lower() for n in names] # Be case-insensitive | |||
if lname in lnames: | |||
return ns_id | |||
e = "There is no namespace with name '{0}'.".format(name) | |||
raise exceptions.NamespaceNotFoundError(e) | |||
def get_page(self, title, follow_redirects=False, pageid=None): | |||
"""Return a :py:class:`Page` object for the given title. | |||
*follow_redirects* is passed directly to | |||
:py:class:`~earwigbot.wiki.page.Page`'s constructor. Also, this will | |||
return a :py:class:`~earwigbot.wiki.category.Category` object instead | |||
if the given title is in the category namespace. As | |||
:py:class:`~earwigbot.wiki.category.Category` is a subclass of | |||
:py:class:`~earwigbot.wiki.page.Page`, this should not cause problems. | |||
Note that this doesn't do any direct checks for existence or | |||
redirect-following: :py:class:`~earwigbot.wiki.page.Page`'s methods | |||
provide that. | |||
""" | |||
title = self._unicodeify(title) | |||
prefixes = self.namespace_id_to_name(constants.NS_CATEGORY, all=True) | |||
prefix = title.split(":", 1)[0] | |||
if prefix != title: # Avoid a page that is simply "Category" | |||
if prefix in prefixes: | |||
return Category(self, title, follow_redirects, pageid, | |||
self._logger) | |||
return Page(self, title, follow_redirects, pageid, self._logger) | |||
def get_category(self, catname, follow_redirects=False, pageid=None): | |||
"""Return a :py:class:`Category` object for the given category name. | |||
*catname* should be given *without* a namespace prefix. This method is | |||
really just shorthand for :py:meth:`get_page("Category:" + catname) | |||
<get_page>`. | |||
""" | |||
catname = self._unicodeify(catname) | |||
prefix = self.namespace_id_to_name(constants.NS_CATEGORY) | |||
pagename = u':'.join((prefix, catname)) | |||
return Category(self, pagename, follow_redirects, pageid, self._logger) | |||
def get_user(self, username=None): | |||
"""Return a :py:class:`User` object for the given username. | |||
If *username* is left as ``None``, then a | |||
:py:class:`~earwigbot.wiki.user.User` object representing the currently | |||
logged-in (or anonymous!) user is returned. | |||
""" | |||
if username: | |||
username = self._unicodeify(username) | |||
else: | |||
username = self._get_username() | |||
return User(self, username, self._logger) | |||
def delegate(self, services, args=None, kwargs=None): | |||
"""Delegate a task to either the API or SQL depending on conditions. | |||
*services* should be a dictionary in which the key is the service name | |||
(:py:attr:`self.SERVICE_API <SERVICE_API>` or | |||
:py:attr:`self.SERVICE_SQL <SERVICE_SQL>`), and the value is the | |||
function to call for this service. All functions will be passed the | |||
same arguments the tuple *args* and the dict **kwargs**, which are both | |||
empty by default. The service order is determined by | |||
:py:meth:`_get_service_order`. | |||
Not every service needs an entry in the dictionary. Will raise | |||
:py:exc:`~earwigbot.exceptions.NoServiceError` if an appropriate | |||
service cannot be found. | |||
""" | |||
if not args: | |||
args = () | |||
if not kwargs: | |||
kwargs = {} | |||
order = self._get_service_order() | |||
for srv in order: | |||
if srv in services: | |||
try: | |||
return services[srv](*args, **kwargs) | |||
except exceptions.ServiceError: | |||
continue | |||
raise exceptions.NoServiceError(services) |
@@ -0,0 +1,438 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from collections import OrderedDict | |||
from cookielib import LWPCookieJar, LoadError | |||
import errno | |||
from os import chmod, path | |||
from platform import python_version | |||
import stat | |||
import sqlite3 as sqlite | |||
from earwigbot import __version__ | |||
from earwigbot.exceptions import SiteNotFoundError | |||
from earwigbot.wiki.copyvios.exclusions import ExclusionsDB | |||
from earwigbot.wiki.site import Site | |||
__all__ = ["SitesDB"] | |||
class SitesDB(object): | |||
""" | |||
**EarwigBot: Wiki Toolset: Sites Database Manager** | |||
This class controls the :file:`sites.db` file, which stores information | |||
about all wiki sites known to the bot. Three public methods act as bridges | |||
between the bot's config files and :py:class:`~earwigbot.wiki.site.Site` | |||
objects: | |||
- :py:meth:`get_site`: returns a Site object corresponding to a site | |||
- :py:meth:`add_site`: stores a site in the database | |||
- :py:meth:`remove_site`: removes a site from the database | |||
There's usually no need to use this class directly. All public methods | |||
here are available as :py:meth:`bot.wiki.get_site`, | |||
:py:meth:`bot.wiki.add_site`, and :py:meth:`bot.wiki.remove_site`, which | |||
use a :file:`sites.db` file located in the same directory as our | |||
:file:`config.yml` file. Lower-level access can be achieved by importing | |||
the manager class (``from earwigbot.wiki import SitesDB``). | |||
""" | |||
def __init__(self, bot): | |||
"""Set up the manager with an attribute for the base Bot object.""" | |||
self.config = bot.config | |||
self._logger = bot.logger.getChild("wiki") | |||
self._sites = {} # Internal site cache | |||
self._sitesdb = path.join(bot.config.root_dir, "sites.db") | |||
self._cookie_file = path.join(bot.config.root_dir, ".cookies") | |||
self._cookiejar = None | |||
excl_db = path.join(bot.config.root_dir, "exclusions.db") | |||
excl_logger = self._logger.getChild("exclusionsdb") | |||
self._exclusions_db = ExclusionsDB(self, excl_db, excl_logger) | |||
def __repr__(self): | |||
"""Return the canonical string representation of the SitesDB.""" | |||
res = "SitesDB(config={0!r}, sitesdb={1!r}, cookie_file={2!r})" | |||
return res.format(self.config, self._sitesdb, self._cookie_file) | |||
def __str__(self): | |||
"""Return a nice string representation of the SitesDB.""" | |||
return "<SitesDB at {0}>".format(self._sitesdb) | |||
def _get_cookiejar(self): | |||
"""Return a LWPCookieJar object loaded from our .cookies file. | |||
The same .cookies file is returned every time, located in the project | |||
root, same directory as config.yml and bot.py. If it doesn't exist, we | |||
will create the file and set it to be readable and writeable only by | |||
us. If it exists but the information inside is bogus, we'll ignore it. | |||
This is normally called by _make_site_object() (in turn called by | |||
get_site()), and the cookiejar is passed to our Site's constructor, | |||
used when it makes API queries. This way, we can easily preserve | |||
cookies between sites (e.g., for CentralAuth), making logins easier. | |||
""" | |||
if self._cookiejar: | |||
return self._cookiejar | |||
self._cookiejar = LWPCookieJar(self._cookie_file) | |||
try: | |||
self._cookiejar.load() | |||
except LoadError: | |||
pass # File contains bad data, so ignore it completely | |||
except IOError as e: | |||
if e.errno == errno.ENOENT: # "No such file or directory" | |||
# Create the file and restrict reading/writing only to the | |||
# owner, so others can't peak at our cookies: | |||
open(self._cookie_file, "w").close() | |||
chmod(self._cookie_file, stat.S_IRUSR|stat.S_IWUSR) | |||
else: | |||
raise | |||
return self._cookiejar | |||
def _create_sitesdb(self): | |||
"""Initialize the sitesdb file with its three necessary tables.""" | |||
script = """ | |||
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, | |||
site_article_path, site_script_path); | |||
CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value); | |||
CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name); | |||
""" | |||
with sqlite.connect(self._sitesdb) as conn: | |||
conn.executescript(script) | |||
def _get_site_object(self, name): | |||
"""Return the site from our cache, or create it if it doesn't exist. | |||
This is essentially just a wrapper around _make_site_object that | |||
returns the same object each time a specific site is asked for. | |||
""" | |||
try: | |||
return self._sites[name] | |||
except KeyError: | |||
site = self._make_site_object(name) | |||
self._sites[name] = site | |||
return site | |||
def _load_site_from_sitesdb(self, name): | |||
"""Return all information stored in the sitesdb relating to given site. | |||
The information will be returned as a tuple, containing the site's | |||
name, project, language, base URL, article path, script path, SQL | |||
connection data, and namespaces, in that order. If the site is not | |||
found in the database, SiteNotFoundError will be raised. An empty | |||
database will be created before the exception is raised if none exists. | |||
""" | |||
query1 = "SELECT * FROM sites WHERE site_name = ?" | |||
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" | |||
query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?" | |||
error = "Site '{0}' not found in the sitesdb.".format(name) | |||
with sqlite.connect(self._sitesdb) as conn: | |||
try: | |||
site_data = conn.execute(query1, (name,)).fetchone() | |||
except sqlite.OperationalError: | |||
self._create_sitesdb() | |||
raise SiteNotFoundError(error) | |||
if not site_data: | |||
raise SiteNotFoundError(error) | |||
sql_data = conn.execute(query2, (name,)).fetchall() | |||
ns_data = conn.execute(query3, (name,)).fetchall() | |||
name, project, lang, base_url, article_path, script_path = site_data | |||
sql = dict(sql_data) | |||
namespaces = {} | |||
for ns_id, ns_name, ns_is_primary_name in ns_data: | |||
try: | |||
if ns_is_primary_name: # "Primary" name goes first in list | |||
namespaces[ns_id].insert(0, ns_name) | |||
else: # Ordering of the aliases doesn't matter | |||
namespaces[ns_id].append(ns_name) | |||
except KeyError: | |||
namespaces[ns_id] = [ns_name] | |||
return (name, project, lang, base_url, article_path, script_path, sql, | |||
namespaces) | |||
def _make_site_object(self, name): | |||
"""Return a Site object associated with the site *name* in our sitesdb. | |||
This calls _load_site_from_sitesdb(), so SiteNotFoundError will be | |||
raised if the site is not in our sitesdb. | |||
""" | |||
cookiejar = self._get_cookiejar() | |||
(name, project, lang, base_url, article_path, script_path, sql, | |||
namespaces) = self._load_site_from_sitesdb(name) | |||
config = self.config | |||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||
user_agent = config.wiki.get("userAgent") | |||
use_https = config.wiki.get("useHTTPS", False) | |||
assert_edit = config.wiki.get("assert") | |||
maxlag = config.wiki.get("maxlag") | |||
wait_between_queries = config.wiki.get("waitTime", 2) | |||
logger = self._logger.getChild(name) | |||
search_config = config.wiki.get("search", OrderedDict()).copy() | |||
if user_agent: | |||
user_agent = user_agent.replace("$1", __version__) | |||
user_agent = user_agent.replace("$2", python_version()) | |||
if search_config: | |||
nltk_dir = path.join(self.config.root_dir, ".nltk") | |||
search_config["nltk_dir"] = nltk_dir | |||
search_config["exclusions_db"] = self._exclusions_db | |||
if not sql: | |||
sql = config.wiki.get("sql", OrderedDict()).copy() | |||
for key, value in sql.iteritems(): | |||
if isinstance(value, basestring) and "$1" in value: | |||
sql[key] = value.replace("$1", name) | |||
return Site(name=name, project=project, lang=lang, base_url=base_url, | |||
article_path=article_path, script_path=script_path, | |||
sql=sql, namespaces=namespaces, login=login, | |||
cookiejar=cookiejar, user_agent=user_agent, | |||
use_https=use_https, assert_edit=assert_edit, | |||
maxlag=maxlag, wait_between_queries=wait_between_queries, | |||
logger=logger, search_config=search_config) | |||
def _get_site_name_from_sitesdb(self, project, lang): | |||
"""Return the name of the first site with the given project and lang. | |||
If we can't find the site with the given information, we'll also try | |||
searching for a site whose base_url contains "{lang}.{project}". There | |||
are a few sites, like the French Wikipedia, that set their project to | |||
something other than the expected "wikipedia" ("wikipédia" in this | |||
case), but we should correctly find them when doing get_site(lang="fr", | |||
project="wikipedia"). | |||
If the site is not found, return None. An empty sitesdb will be created | |||
if none exists. | |||
""" | |||
query1 = "SELECT site_name FROM sites WHERE site_project = ? and site_lang = ?" | |||
query2 = "SELECT site_name FROM sites WHERE site_base_url LIKE ?" | |||
with sqlite.connect(self._sitesdb) as conn: | |||
try: | |||
site = conn.execute(query1, (project, lang)).fetchone() | |||
if site: | |||
return site[0] | |||
else: | |||
url = "%{0}.{1}%".format(lang, project) | |||
site = conn.execute(query2, (url,)).fetchone() | |||
return site[0] if site else None | |||
except sqlite.OperationalError: | |||
self._create_sitesdb() | |||
def _add_site_to_sitesdb(self, site): | |||
"""Extract relevant info from a Site object and add it to the sitesdb. | |||
Works like a reverse _load_site_from_sitesdb(); the site's project, | |||
language, base URL, article path, script path, SQL connection data, and | |||
namespaces are extracted from the site and inserted into the sites | |||
database. If the sitesdb doesn't exist, we'll create it first. | |||
""" | |||
name = site.name | |||
sites_data = (name, site.project, site.lang, site._base_url, | |||
site._article_path, site._script_path) | |||
sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()] | |||
ns_data = [] | |||
for ns_id, ns_names in site._namespaces.iteritems(): | |||
ns_data.append((name, ns_id, ns_names.pop(0), True)) | |||
for ns_name in ns_names: | |||
ns_data.append((name, ns_id, ns_name, False)) | |||
with sqlite.connect(self._sitesdb) as conn: | |||
check_exists = "SELECT 1 FROM sites WHERE site_name = ?" | |||
try: | |||
exists = conn.execute(check_exists, (name,)).fetchone() | |||
except sqlite.OperationalError: | |||
self._create_sitesdb() | |||
else: | |||
if exists: | |||
conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||
conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data) | |||
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) | |||
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) | |||
def _remove_site_from_sitesdb(self, name): | |||
"""Remove a site by name from the sitesdb and the internal cache.""" | |||
try: | |||
del self._sites[name] | |||
except KeyError: | |||
pass | |||
with sqlite.connect(self._sitesdb) as conn: | |||
cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||
if cursor.rowcount == 0: | |||
return False | |||
else: | |||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||
self._logger.info("Removed site '{0}'".format(name)) | |||
return True | |||
def get_site(self, name=None, project=None, lang=None): | |||
"""Return a Site instance based on information from the sitesdb. | |||
With no arguments, return the default site as specified by our config | |||
file. This is ``config.wiki["defaultSite"]``. | |||
With *name* specified, return the site with that name. This is | |||
equivalent to the site's ``wikiid`` in the API, like *enwiki*. | |||
With *project* and *lang* specified, return the site whose project and | |||
language match these values. If there are multiple sites with the same | |||
values (unlikely), this is not a reliable way of loading a site. Call | |||
the function with an explicit *name* in that case. | |||
We will attempt to login to the site automatically using | |||
``config.wiki["username"]`` and ``config.wiki["password"]`` if both are | |||
defined. | |||
Specifying a project without a lang or a lang without a project will | |||
raise :py:exc:`TypeError`. If all three args are specified, *name* will | |||
be first tried, then *project* and *lang* if *name* doesn't work. If a | |||
site cannot be found in the sitesdb, | |||
:py:exc:`~earwigbot.exceptions.SiteNotFoundError` will be raised. An | |||
empty sitesdb will be created if none is found. | |||
""" | |||
# Someone specified a project without a lang, or vice versa: | |||
if (project and not lang) or (not project and lang): | |||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||
raise TypeError(e) | |||
# No args given, so return our default site: | |||
if not name and not project and not lang: | |||
try: | |||
default = self.config.wiki["defaultSite"] | |||
except KeyError: | |||
e = "Default site is not specified in config." | |||
raise SiteNotFoundError(e) | |||
return self._get_site_object(default) | |||
# Name arg given, but don't look at others unless `name` isn't found: | |||
if name: | |||
try: | |||
return self._get_site_object(name) | |||
except SiteNotFoundError: | |||
if project and lang: | |||
name = self._get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return self._get_site_object(name) | |||
raise | |||
# If we end up here, then project and lang are the only args given: | |||
name = self._get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return self._get_site_object(name) | |||
e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) | |||
raise SiteNotFoundError(e) | |||
def add_site(self, project=None, lang=None, base_url=None, | |||
script_path="/w", sql=None): | |||
"""Add a site to the sitesdb so it can be retrieved with get_site(). | |||
If only a project and a lang are given, we'll guess the *base_url* as | |||
``"//{lang}.{project}.org"`` (which is protocol-relative, becoming | |||
``"https"`` if *useHTTPS* is ``True`` in config otherwise ``"http"``). | |||
If this is wrong, provide the correct *base_url* as an argument (in | |||
which case project and lang are ignored). Most wikis use ``"/w"`` as | |||
the script path (meaning the API is located at | |||
``"{base_url}{script_path}/api.php"`` -> | |||
``"//{lang}.{project}.org/w/api.php"``), so this is the default. If | |||
your wiki is different, provide the script_path as an argument. SQL | |||
connection settings are guessed automatically using config's template | |||
value. If this is wrong or not specified, provide a dict of kwargs as | |||
*sql* and Site will pass it to :py:func:`oursql.connect(**sql) | |||
<oursql.connect>`, allowing you to make queries with | |||
:py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`. | |||
Returns ``True`` if the site was added successfully or ``False`` if the | |||
site is already in our sitesdb (this can be done purposefully to update | |||
old site info). Raises :py:exc:`~earwigbot.exception.SiteNotFoundError` | |||
if not enough information has been provided to identify the site (e.g. | |||
a *project* but not a *lang*). | |||
""" | |||
if not base_url: | |||
if not project or not lang: | |||
e = "Without a base_url, both a project and a lang must be given." | |||
raise SiteNotFoundError(e) | |||
base_url = "//{0}.{1}.org".format(lang, project) | |||
cookiejar = self._get_cookiejar() | |||
config = self.config | |||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||
user_agent = config.wiki.get("userAgent") | |||
use_https = config.wiki.get("useHTTPS", True) | |||
assert_edit = config.wiki.get("assert") | |||
maxlag = config.wiki.get("maxlag") | |||
wait_between_queries = config.wiki.get("waitTime", 2) | |||
if user_agent: | |||
user_agent = user_agent.replace("$1", __version__) | |||
user_agent = user_agent.replace("$2", python_version()) | |||
# Create a Site object to log in and load the other attributes: | |||
site = Site(base_url=base_url, script_path=script_path, sql=sql, | |||
login=login, cookiejar=cookiejar, user_agent=user_agent, | |||
use_https=use_https, assert_edit=assert_edit, | |||
maxlag=maxlag, wait_between_queries=wait_between_queries) | |||
self._logger.info("Added site '{0}'".format(site.name)) | |||
self._add_site_to_sitesdb(site) | |||
return self._get_site_object(site.name) | |||
def remove_site(self, name=None, project=None, lang=None): | |||
"""Remove a site from the sitesdb. | |||
Returns ``True`` if the site was removed successfully or ``False`` if | |||
the site was not in our sitesdb originally. If all three args (*name*, | |||
*project*, and *lang*) are given, we'll first try *name* and then try | |||
the latter two if *name* wasn't found in the database. Raises | |||
:py:exc:`TypeError` if a project was given but not a language, or vice | |||
versa. Will create an empty sitesdb if none was found. | |||
""" | |||
# Someone specified a project without a lang, or vice versa: | |||
if (project and not lang) or (not project and lang): | |||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||
raise TypeError(e) | |||
if name: | |||
was_removed = self._remove_site_from_sitesdb(name) | |||
if not was_removed: | |||
if project and lang: | |||
name = self._get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return self._remove_site_from_sitesdb(name) | |||
return was_removed | |||
if project and lang: | |||
name = self._get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return self._remove_site_from_sitesdb(name) | |||
return False |
@@ -0,0 +1,316 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from logging import getLogger, NullHandler | |||
from time import gmtime, strptime | |||
from socket import AF_INET, AF_INET6, error as socket_error, inet_pton | |||
from earwigbot.exceptions import UserNotFoundError | |||
from earwigbot.wiki import constants | |||
from earwigbot.wiki.page import Page | |||
__all__ = ["User"] | |||
class User(object): | |||
""" | |||
**EarwigBot: Wiki Toolset: User** | |||
Represents a user on a given :py:class:`~earwigbot.wiki.site.Site`. Has | |||
methods for getting a bunch of information about the user, such as | |||
editcount and user rights, methods for returning the user's userpage and | |||
talkpage, etc. | |||
*Attributes:* | |||
- :py:attr:`site`: the user's corresponding Site object | |||
- :py:attr:`name`: the user's username | |||
- :py:attr:`exists`: ``True`` if the user exists, else ``False`` | |||
- :py:attr:`userid`: an integer ID representing the user | |||
- :py:attr:`blockinfo`: information about any current blocks on the user | |||
- :py:attr:`groups`: a list of the user's groups | |||
- :py:attr:`rights`: a list of the user's rights | |||
- :py:attr:`editcount`: the number of edits made by the user | |||
- :py:attr:`registration`: the time the user registered | |||
- :py:attr:`emailable`: ``True`` if you can email the user, or ``False`` | |||
- :py:attr:`gender`: the user's gender ("male"/"female"/"unknown") | |||
- :py:attr:`is_ip`: ``True`` if this is an IP address, or ``False`` | |||
*Public methods:* | |||
- :py:meth:`reload`: forcibly reloads the user's attributes | |||
- :py:meth:`get_userpage`: returns a Page object representing the user's | |||
userpage | |||
- :py:meth:`get_talkpage`: returns a Page object representing the user's | |||
talkpage | |||
""" | |||
def __init__(self, site, name, logger=None): | |||
"""Constructor for new User instances. | |||
Takes two arguments, a Site object (necessary for doing API queries), | |||
and the name of the user, preferably without "User:" in front, although | |||
this prefix will be automatically removed by the API if given. | |||
You can also use site.get_user() instead, which returns a User object, | |||
and is preferred. | |||
We won't do any API queries yet for basic information about the user - | |||
save that for when the information is requested. | |||
""" | |||
self._site = site | |||
self._name = name | |||
# Set up our internal logger: | |||
if logger: | |||
self._logger = logger | |||
else: # Just set up a null logger to eat up our messages: | |||
self._logger = getLogger("earwigbot.wiki") | |||
self._logger.addHandler(NullHandler()) | |||
def __repr__(self): | |||
"""Return the canonical string representation of the User.""" | |||
return "User(name={0!r}, site={1!r})".format(self._name, self._site) | |||
def __str__(self): | |||
"""Return a nice string representation of the User.""" | |||
return '<User "{0}" of {1}>'.format(self.name, str(self.site)) | |||
def _get_attribute(self, attr): | |||
"""Internally used to get an attribute by name. | |||
We'll call _load_attributes() to get this (and all other attributes) | |||
from the API if it is not already defined. | |||
Raises UserNotFoundError if a nonexistant user prevents us from | |||
returning a certain attribute. | |||
""" | |||
if not hasattr(self, attr): | |||
self._load_attributes() | |||
if not self._exists: | |||
e = u"User '{0}' does not exist.".format(self._name) | |||
raise UserNotFoundError(e) | |||
return getattr(self, attr) | |||
def _load_attributes(self): | |||
"""Internally used to load all attributes from the API. | |||
Normally, this is called by _get_attribute() when a requested attribute | |||
is not defined. This defines it. | |||
""" | |||
props = "blockinfo|groups|rights|editcount|registration|emailable|gender" | |||
result = self.site.api_query(action="query", list="users", | |||
ususers=self._name, usprop=props) | |||
res = result["query"]["users"][0] | |||
# normalize our username in case it was entered oddly | |||
self._name = res["name"] | |||
try: | |||
self._userid = res["userid"] | |||
except KeyError: # userid is missing, so user does not exist | |||
self._exists = False | |||
return | |||
self._exists = True | |||
try: | |||
self._blockinfo = { | |||
"by": res["blockedby"], | |||
"reason": res["blockreason"], | |||
"expiry": res["blockexpiry"] | |||
} | |||
except KeyError: | |||
self._blockinfo = False | |||
self._groups = res["groups"] | |||
try: | |||
self._rights = res["rights"].values() | |||
except AttributeError: | |||
self._rights = res["rights"] | |||
self._editcount = res["editcount"] | |||
reg = res["registration"] | |||
try: | |||
self._registration = strptime(reg, "%Y-%m-%dT%H:%M:%SZ") | |||
except TypeError: | |||
# Sometimes the API doesn't give a date; the user's probably really | |||
# old. There's nothing else we can do! | |||
self._registration = gmtime(0) | |||
try: | |||
res["emailable"] | |||
except KeyError: | |||
self._emailable = False | |||
else: | |||
self._emailable = True | |||
self._gender = res["gender"] | |||
@property | |||
def site(self): | |||
"""The user's corresponding Site object.""" | |||
return self._site | |||
@property | |||
def name(self): | |||
"""The user's username. | |||
This will never make an API query on its own, but if one has already | |||
been made by the time this is retrieved, the username may have been | |||
"normalized" from the original input to the constructor, converted into | |||
a Unicode object, with underscores removed, etc. | |||
""" | |||
return self._name | |||
@property | |||
def exists(self): | |||
"""``True`` if the user exists, or ``False`` if they do not. | |||
Makes an API query only if we haven't made one already. | |||
""" | |||
if not hasattr(self, "_exists"): | |||
self._load_attributes() | |||
return self._exists | |||
@property | |||
def userid(self): | |||
"""An integer ID used by MediaWiki to represent the user. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_userid") | |||
@property | |||
def blockinfo(self): | |||
"""Information about any current blocks on the user. | |||
If the user is not blocked, returns ``False``. If they are, returns a | |||
dict with three keys: ``"by"`` is the blocker's username, ``"reason"`` | |||
is the reason why they were blocked, and ``"expiry"`` is when the block | |||
expires. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_blockinfo") | |||
@property | |||
def groups(self): | |||
"""A list of groups this user is in, including ``"*"``. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_groups") | |||
@property | |||
def rights(self): | |||
"""A list of this user's rights. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_rights") | |||
@property | |||
def editcount(self): | |||
"""Returns the number of edits made by the user. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_editcount") | |||
@property | |||
def registration(self): | |||
"""The time the user registered as a :py:class:`time.struct_time`. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_registration") | |||
@property | |||
def emailable(self): | |||
"""``True`` if the user can be emailed, or ``False`` if they cannot. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_emailable") | |||
@property | |||
def gender(self): | |||
"""The user's gender. | |||
Can return either ``"male"``, ``"female"``, or ``"unknown"``, if they | |||
did not specify it. | |||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||
does not exist. Makes an API query only if we haven't made one already. | |||
""" | |||
return self._get_attribute("_gender") | |||
@property | |||
def is_ip(self): | |||
"""``True`` if the user is an IP address, or ``False`` otherwise. | |||
This tests for IPv4 and IPv6 using :py:func:`socket.inet_pton` on the | |||
username. No API queries are made. | |||
""" | |||
try: | |||
inet_pton(AF_INET, self.name) | |||
except socket_error: | |||
try: | |||
inet_pton(AF_INET6, self.name) | |||
except socket_error: | |||
return False | |||
return True | |||
def reload(self): | |||
"""Forcibly reload the user's attributes. | |||
Emphasis on *reload*: this is only necessary if there is reason to | |||
believe they have changed. | |||
""" | |||
self._load_attributes() | |||
def get_userpage(self): | |||
"""Return a Page object representing the user's userpage. | |||
No checks are made to see if it exists or not. Proper site namespace | |||
conventions are followed. | |||
""" | |||
prefix = self.site.namespace_id_to_name(constants.NS_USER) | |||
pagename = ':'.join((prefix, self._name)) | |||
return Page(self.site, pagename) | |||
def get_talkpage(self): | |||
"""Return a Page object representing the user's talkpage. | |||
No checks are made to see if it exists or not. Proper site namespace | |||
conventions are followed. | |||
""" | |||
prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK) | |||
pagename = ':'.join((prefix, self._name)) | |||
return Page(self.site, pagename) |
@@ -1,33 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# A base class for commands on IRC. | |||
class BaseCommand(object): | |||
def __init__(self, connection): | |||
"""A base class for commands on IRC.""" | |||
self.connection = connection | |||
def get_hooks(self): | |||
"""Hooks are: 'msg', 'msg_private', 'msg_public', and 'join'. Return | |||
the hooks you want this command to be called on.""" | |||
return [] | |||
def get_help(self, command): | |||
"""Return help information for the command, used by !help. return None | |||
for no help. If a given class handles multiple commands, the command | |||
variable can be used to return different help for each one.""" | |||
return None | |||
def check(self, data): | |||
"""Given a Data() object, return True if we should respond to this | |||
activity, or False if we should ignore it/it doesn't apply to us. Most | |||
commands return True if data.command == 'command_name', otherwise | |||
they return False.""" | |||
return False | |||
def process(self, data): | |||
"""Handle an activity (usually a message) on IRC. At this point, thanks | |||
to self.check() which is called automatically by command_handler, we | |||
know this is something we should respond to, so (usually) a | |||
'if data.command != "command_name": return' is unnecessary.""" | |||
pass |
@@ -1,66 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# A module to manage IRC commands. | |||
import os | |||
import traceback | |||
commands = [] | |||
def load_commands(connection): | |||
"""load all valid command classes from irc/commmands/ into the commands variable""" | |||
files = os.listdir(os.path.join("irc", "commands")) # get all files in irc/commands/ | |||
files.sort() # alphabetically sort list of files | |||
for f in files: | |||
if f.startswith("_") or not f.endswith(".py"): # ignore non-python files or files beginning with "_" | |||
continue | |||
module = f[:-3] # strip .py from end | |||
try: | |||
exec "from irc.commands import %s" % module | |||
except: # importing the file failed for some reason... | |||
print "Couldn't load file %s:" % f | |||
traceback.print_exc() | |||
continue | |||
process_module(connection, eval(module)) # 'module' is a string, so get the actual object for processing by eval-ing it | |||
pretty_cmnds = map(lambda c: c.__class__.__name__, commands) | |||
print "Found %s command classes: %s." % (len(commands), ', '.join(pretty_cmnds)) | |||
def process_module(connection, module): | |||
"""go through all objects in a module and add valid command classes to the commands variable""" | |||
global commands | |||
objects = dir(module) | |||
for this_obj in objects: # go through everything in the file | |||
obj = eval("module.%s" % this_obj) # this_obj is a string, so get the actual object corresponding to that string | |||
try: | |||
bases = obj.__bases__ | |||
except AttributeError: # object isn't a valid class, so ignore it | |||
continue | |||
for base in bases: | |||
if base.__name__ == "BaseCommand": # this inherits BaseCommand, so it must be a command class | |||
command = obj(connection) # initialize a new command object | |||
commands.append(command) | |||
print "Added command class %s from %s..." % (this_obj, module.__name__) | |||
continue | |||
def get_commands(): | |||
"""get our commands""" | |||
return commands | |||
def check(hook, data): | |||
"""given an event on IRC, check if there's anything we can respond to by calling each command class""" | |||
data.parse_args() # parse command arguments into data.command and data.args | |||
for command in commands: | |||
if hook in command.get_hooks(): | |||
if command.check(data): | |||
try: | |||
command.process(data) | |||
except: | |||
print "Error executing command '{}':".format(data.command) | |||
traceback.print_exc() # catch exceptions and print them | |||
break |
@@ -1,979 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
###### | |||
###### NOTE: | |||
###### This is an old commands file from the previous version of EarwigBot. | |||
###### It is not used by the new EarwigBot and is simply here for reference | |||
###### when developing new commands. | |||
###### | |||
### EarwigBot | |||
## Import basics. | |||
import sys, socket, string, time, codecs, os, traceback, thread, re, urllib, web, math, unicodedata | |||
## Import our functions. | |||
import config | |||
## Set up constants. | |||
HOST, PORT, NICK, IDENT, REALNAME, CHANS, REPORT_CHAN, WELCOME_CHAN, HOST2, CHAN2, OWNER, ADMINS, ADMINS_R, PASS = config.host, config.port, config.nick, config.ident, config.realname, config.chans, config.report_chan, config.welcome_chan, config.host2, config.chan2, config.owner, config.admins, config.admin_readable, config.password | |||
def get_commandList(): | |||
return {'quiet': 'quiet', | |||
'welcome': 'welcome', | |||
'greet': 'welcome', | |||
'linker': 'linker', | |||
'auth': 'auth', | |||
'access': 'access', | |||
'join': 'join', | |||
'part': 'part', | |||
'restart': 'restart', | |||
'quit': 'quit', | |||
'die': 'quit', | |||
'msg': 'msg', | |||
'me': 'me', | |||
'calc': 'calc', | |||
'dice': 'dice', | |||
'tock': 'tock', | |||
'beats': 'beats', | |||
'copyvio': 'copyvio', | |||
'copy': 'copyvio', | |||
'copyright': 'copyvio', | |||
'dict': 'dictionary', | |||
'dictionary': 'dictionary', | |||
'ety': 'etymology', | |||
'etymology': 'etymology', | |||
'lang': 'langcode', | |||
'langcode': 'langcode', | |||
'num': 'number', | |||
'number': 'number', | |||
'count': 'number', | |||
'c': 'number', | |||
'nick': 'nick', | |||
'op': 'op', | |||
'deop': 'deop', | |||
'voice': 'voice', | |||
'devoice': 'devoice', | |||
'pend': 'pending', | |||
'pending': 'pending', | |||
'sub': 'submissions', | |||
'submissions': 'submissions', | |||
'praise': 'praise', | |||
'leonard': 'leonard', | |||
'groovedog': 'groovedog', | |||
'earwig': 'earwig', | |||
'macmed': 'macmed', | |||
'cubs197': 'cubs197', | |||
'sparksboy': 'sparksboy', | |||
'tim_song': 'tim_song', | |||
'tim': 'tim_song', | |||
'blurpeace': 'blurpeace', | |||
'sausage': 'sausage', | |||
'mindstormskid': 'mindstormskid', | |||
'mcjohn': 'mcjohn', | |||
'fetchcomms': 'fetchcomms', | |||
'trout': 'trout', | |||
'kill': 'kill', | |||
'destroy': 'kill', | |||
'murder': 'kill', | |||
'fish': 'fish', | |||
'report': 'report', | |||
'commands': 'commands', | |||
'help': 'help', | |||
'doc': 'help', | |||
'documentation': 'help', | |||
'mysql': 'mysql', | |||
'remind': 'reminder', | |||
'reminder': 'reminder', | |||
'notes': 'notes', | |||
'note': 'notes', | |||
'about': 'notes', | |||
'data': 'notes', | |||
'database': 'notes', | |||
'hash': 'hash', | |||
'lookup': 'lookup', | |||
'ip': 'lookup' | |||
} | |||
def main(command, line, line2, nick, chan, host, auth, notice, say, reply, s): | |||
try: | |||
parse(command, line, line2, nick, chan, host, auth, notice, say, reply, s) | |||
except Exception: | |||
trace = traceback.format_exc() # Traceback. | |||
print trace # Print. | |||
lines = list(reversed(trace.splitlines())) # Convert lines to process traceback.... | |||
report2 = [lines[0].strip()] | |||
for line in lines: | |||
line = line.strip() | |||
if line.startswith('File "/'): | |||
report2.append(line[0].lower() + line[1:]) | |||
break | |||
else: report2.append('source unknown') | |||
say(report2[0] + ' (' + report2[1] + ')', chan) | |||
def parse(command, line, line2, nick, chan, host, auth, notice, say, reply, s): | |||
authy = auth(host) | |||
if command == "access": | |||
a = 'The bot\'s owner is "%s".' % OWNER | |||
b = 'The bot\'s admins are "%s".' % ', '.join(ADMINS_R) | |||
reply(a, chan, nick) | |||
reply(b, chan, nick) | |||
return | |||
if command == "join": | |||
if authy == "owner" or authy == "admin": | |||
try: | |||
channel = line2[4] | |||
except Exception: | |||
channel = chan | |||
s.send("JOIN %s\r\n" % channel) | |||
else: | |||
reply("You aren't authorized to use that command.", chan, nick) | |||
return | |||
if command == "part": | |||
if authy == "owner" or authy == "admin": | |||
try: | |||
channel = line2[4] | |||
except Exception: | |||
channel = chan | |||
s.send("PART %s\r\n" % channel) | |||
else: | |||
reply("You aren't authorized to use that command.", chan, nick) | |||
return | |||
if command == "restart": | |||
import thread | |||
if authy == "owner": | |||
s.send("QUIT\r\n") | |||
time.sleep(5) | |||
os.system("nice -15 python main.py") | |||
exit() | |||
else: | |||
reply("Only the owner, %s, can stop the bot. This incident will be reported." % OWNER, chan, nick) | |||
return | |||
if command == "quit" or command == "die": | |||
if authy != "owner": | |||
if command != "suicide": | |||
reply("Only the owner, %s, can stop the bot. This incident will be reported." % OWNER, chan, nick) | |||
else: | |||
say("\x01ACTION hands %s a gun... have fun :D\x01" % nick, nick) | |||
else: | |||
if command == "suicide": | |||
say("\x01ACTION stabs himself with a knife.\x01", chan) | |||
time.sleep(0.2) | |||
try: | |||
s.send("QUIT :%s\r\n" % ' '.join(line2[4:])) | |||
except Exception: | |||
s.send("QUIT\r\n") | |||
__import__('os')._exit(0) | |||
return | |||
if command == "msg": | |||
if authy == "owner" or authy == "admin": | |||
say(' '.join(line2[5:]), line2[4]) | |||
else: | |||
reply("You aren't authorized to use that command.", chan, nick) | |||
return | |||
if command == "me": | |||
if authy == "owner" or authy == "admin": | |||
say("\x01ACTION %s\x01" % ' '.join(line2[5:]), line2[4]) | |||
else: | |||
reply("You aren't authorized to use that command.", chan, nick) | |||
return | |||
if command == "calc": | |||
r_result = re.compile(r'(?i)<A NAME=results>(.*?)</A>') | |||
r_tag = re.compile(r'<\S+.*?>') | |||
subs = [ | |||
(' in ', ' -> '), | |||
(' over ', ' / '), | |||
(u'£', 'GBP '), | |||
(u'€', 'EUR '), | |||
('\$', 'USD '), | |||
(r'\bKB\b', 'kilobytes'), | |||
(r'\bMB\b', 'megabytes'), | |||
(r'\bGB\b', 'kilobytes'), | |||
('kbps', '(kilobits / second)'), | |||
('mbps', '(megabits / second)') | |||
] | |||
try: | |||
q = ' '.join(line2[4:]) | |||
except Exception: | |||
say("0?", chan) | |||
return | |||
query = q[:] | |||
for a, b in subs: | |||
query = re.sub(a, b, query) | |||
query = query.rstrip(' \t') | |||
precision = 5 | |||
if query[-3:] in ('GBP', 'USD', 'EUR', 'NOK'): | |||
precision = 2 | |||
query = web.urllib.quote(query.encode('utf-8')) | |||
uri = 'http://futureboy.us/fsp/frink.fsp?fromVal=' | |||
bytes = web.get(uri + query) | |||
m = r_result.search(bytes) | |||
if m: | |||
result = m.group(1) | |||
result = r_tag.sub('', result) # strip span.warning tags | |||
result = result.replace('>', '>') | |||
result = result.replace('(undefined symbol)', '(?) ') | |||
if '.' in result: | |||
try: result = str(round(float(result), precision)) | |||
except ValueError: pass | |||
if not result.strip(): | |||
result = '?' | |||
elif ' in ' in q: | |||
result += ' ' + q.split(' in ', 1)[1] | |||
say(q + ' = ' + result[:350], chan) | |||
else: reply("Sorry, can't calculate that.", chan, nick) | |||
return | |||
if command == "dice": | |||
import random | |||
try: | |||
set = range(int(line2[4]), int(line2[5]) + 1) | |||
except Exception: | |||
set = range(1, 7) | |||
num = random.choice(set) | |||
reply("You rolled a %s." % num, chan, nick) | |||
if len(set) < 30: | |||
say("Set consisted of %s." % set, nick) | |||
else: | |||
say("Set consisted of %s... and %s others." % (set[:30], len(set) - 30), nick) | |||
return | |||
if command == "tock": | |||
u = urllib.urlopen('http://tycho.usno.navy.mil/cgi-bin/timer.pl') | |||
info = u.info() | |||
u.close() | |||
say('"' + info['Date'] + '" - tycho.usno.navy.mil', chan) | |||
return | |||
if command == "beats": | |||
beats = ((time.time() + 3600) % 86400) / 86.4 | |||
beats = int(math.floor(beats)) | |||
say('@%03i' % beats, chan) | |||
return | |||
if command == "copyvio" or command == "copy" or command == "copyright": | |||
url = "http://en.wikipedia.org/wiki/User:EarwigBot/AfC copyvios" | |||
query = urllib.urlopen(url) | |||
data = query.read() | |||
url = "http://toolserver.org/~earwig/earwigbot/pywikipedia/error.txt" | |||
query = urllib.urlopen(url) | |||
data2 = query.read() | |||
if "critical" in data2: | |||
text = "AfC copyvio situation is CRITICAL: Major disaster." | |||
elif "exceed" in data2: | |||
text = "AfC copyvio situation is CRITICAL: Queries exceeded error." | |||
elif "spam" in data2: | |||
text = "AfC copyvio situation is CRITICAL: Spamfilter error." | |||
elif "<h3>" in data: | |||
text = "AfC copyvio situation is BAD: Unsolved copyvios at [[User:EarwigBot/AfC copyvios]]" | |||
else: | |||
text = "AfC copyvio situation is OK: OK." | |||
reply(text, chan, nick) | |||
return | |||
if command == "dict" or command == "dictionary": | |||
def trim(thing): | |||
if thing.endswith(' '): | |||
thing = thing[:-6] | |||
return thing.strip(' :.') | |||
r_li = re.compile(r'(?ims)<li>.*?</li>') | |||
r_tag = re.compile(r'<[^>]+>') | |||
r_parens = re.compile(r'(?<=\()(?:[^()]+|\([^)]+\))*(?=\))') | |||
r_word = re.compile(r'^[A-Za-z0-9\' -]+$') | |||
uri = 'http://encarta.msn.com/dictionary_/%s.html' | |||
r_info = re.compile(r'(?:ResultBody"><br /><br />(.*?) )|(?:<b>(.*?)</b>)') | |||
try: | |||
word = line2[4] | |||
except Exception: | |||
reply("Please enter a word.", chan, nick) | |||
return | |||
word = urllib.quote(word.encode('utf-8')) | |||
bytes = web.get(uri % word) | |||
results = {} | |||
wordkind = None | |||
for kind, sense in r_info.findall(bytes): | |||
kind, sense = trim(kind), trim(sense) | |||
if kind: wordkind = kind | |||
elif sense: | |||
results.setdefault(wordkind, []).append(sense) | |||
result = word.encode('utf-8') + ' - ' | |||
for key in sorted(results.keys()): | |||
if results[key]: | |||
result += (key or '') + ' 1. ' + results[key][0] | |||
if len(results[key]) > 1: | |||
result += ', 2. ' + results[key][1] | |||
result += '; ' | |||
result = result.rstrip('; ') | |||
if result.endswith('-') and (len(result) < 30): | |||
reply('Sorry, no definition found.', chan, nick) | |||
else: say(result, chan) | |||
return | |||
if command == "ety" or command == "etymology": | |||
etyuri = 'http://etymonline.com/?term=%s' | |||
etysearch = 'http://etymonline.com/?search=%s' | |||
r_definition = re.compile(r'(?ims)<dd[^>]*>.*?</dd>') | |||
r_tag = re.compile(r'<(?!!)[^>]+>') | |||
r_whitespace = re.compile(r'[\t\r\n ]+') | |||
abbrs = [ | |||
'cf', 'lit', 'etc', 'Ger', 'Du', 'Skt', 'Rus', 'Eng', 'Amer.Eng', 'Sp', | |||
'Fr', 'N', 'E', 'S', 'W', 'L', 'Gen', 'J.C', 'dial', 'Gk', | |||
'19c', '18c', '17c', '16c', 'St', 'Capt', 'obs', 'Jan', 'Feb', 'Mar', | |||
'Apr', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'c', 'tr', 'e', 'g' | |||
] | |||
t_sentence = r'^.*?(?<!%s)(?:\.(?= [A-Z0-9]|\Z)|\Z)' | |||
r_sentence = re.compile(t_sentence % ')(?<!'.join(abbrs)) | |||
def unescape(s): | |||
s = s.replace('>', '>') | |||
s = s.replace('<', '<') | |||
s = s.replace('&', '&') | |||
return s | |||
def text(html): | |||
html = r_tag.sub('', html) | |||
html = r_whitespace.sub(' ', html) | |||
return unescape(html).strip() | |||
try: | |||
word = line2[4] | |||
except Exception: | |||
reply("Please enter a word.", chan, nick) | |||
return | |||
def ety(word): | |||
if len(word) > 25: | |||
raise ValueError("Word too long: %s[...]" % word[:10]) | |||
word = {'axe': 'ax/axe'}.get(word, word) | |||
bytes = web.get(etyuri % word) | |||
definitions = r_definition.findall(bytes) | |||
if not definitions: | |||
return None | |||
defn = text(definitions[0]) | |||
m = r_sentence.match(defn) | |||
if not m: | |||
return None | |||
sentence = m.group(0) | |||
try: | |||
sentence = unicode(sentence, 'iso-8859-1') | |||
sentence = sentence.encode('utf-8') | |||
except: pass | |||
maxlength = 275 | |||
if len(sentence) > maxlength: | |||
sentence = sentence[:maxlength] | |||
words = sentence[:-5].split(' ') | |||
words.pop() | |||
sentence = ' '.join(words) + ' [...]' | |||
sentence = '"' + sentence.replace('"', "'") + '"' | |||
return sentence + ' - ' + (etyuri % word) | |||
try: | |||
result = ety(word.encode('utf-8')) | |||
except IOError: | |||
msg = "Can't connect to etymonline.com (%s)" % (etyuri % word) | |||
reply(msg, chan, nick) | |||
return | |||
except AttributeError: | |||
result = None | |||
if result is not None: | |||
reply(result, chan, nick) | |||
else: | |||
uri = etysearch % word | |||
msg = 'Can\'t find the etymology for "%s". Try %s' % (word, uri) | |||
reply(msg, chan, nick) | |||
return | |||
if command == "num" or command == "number" or command == "count" or command == "c": | |||
try: | |||
params = string.lower(line2[4]) | |||
except Exception: | |||
params = False | |||
if params == "old" or params == "afc" or params == "a": | |||
number = unicode(int(len(re.findall("title=", urllib.urlopen("http://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Pending_AfC_submissions&cmlimit=500").read()))) - 2) | |||
reply("There are currently %s pending AfC submissions." % number, chan, nick) | |||
elif params == "redirect" or params == "redir" or params == "redirs" or params == "redirects" or params == "r": | |||
redir_data = urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Articles_for_creation/Redirects").read() | |||
redirs = (string.count(redir_data, "<h2>") - 1) - (string.count(redir_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">')) | |||
reply("There are currently %s open redirect requests." % redirs, chan, nick) | |||
elif params == "files" or params == "ffu" or params == "file" or params == "image" or params == "images" or params == "ifu" or params == "f": | |||
file_data = re.sub("<h2>Contents</h2>", "", urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Files_for_upload").read()) | |||
files = (string.count(file_data, "<h2>") - 1) - (string.count(file_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">')) | |||
reply("There are currently %s open file upload requests." % files, chan, nick) | |||
elif params == "aggregate" or params == "agg": | |||
subs = unicode(int(len(re.findall("title=", urllib.urlopen("http://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Pending_AfC_submissions&cmlimit=500").read()))) - 2) | |||
redir_data = urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Articles_for_creation/Redirects").read() | |||
file_data = re.sub("<h2>Contents</h2>", "", urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Files_for_upload").read()) | |||
redirs = (string.count(redir_data, "<h2><span class=\"editsection\">")) - (string.count(redir_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">')) | |||
files = (string.count(file_data, "<h2>") - 1) - (string.count(file_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">')) | |||
aggregate = (int(subs) * 5) + (int(redirs) * 2) + (int(files) * 2) | |||
if aggregate == 0: | |||
stat = "clear" | |||
elif aggregate < 60: | |||
stat = "almost clear" | |||
elif aggregate < 125: | |||
stat = "small backlog" | |||
elif aggregate < 175: | |||
stat = "average backlog" | |||
elif aggregate < 250: | |||
stat = "backlogged" | |||
elif aggregate < 300: | |||
stat = "heavily backlogged" | |||
else: | |||
stat = "severely backlogged" | |||
reply("Aggregate is currently %s (%s)." % (aggregate, stat), chan, nick) | |||
else: | |||
subs = unicode(int(len(re.findall("title=", urllib.urlopen("http://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Pending_AfC_submissions&cmlimit=500").read()))) - 2) | |||
redir_data = urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Articles_for_creation/Redirects").read() | |||
file_data = re.sub("<h2>Contents</h2>", "", urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Files_for_upload").read()) | |||
redirs = (string.count(redir_data, "<h2><span class=\"editsection\">")) - (string.count(redir_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">')) | |||
files = (string.count(file_data, "<h2>") - 1) - (string.count(file_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">')) | |||
reply("There are currently %s pending submissions, %s open redirect requests, and %s open file upload requests." % (subs, redirs, files), chan, nick) | |||
return | |||
if command == "nick": | |||
if authy == "owner": | |||
try: | |||
new_nick = line2[4] | |||
except Exception: | |||
reply("Please specify a nick to change to.", chan, nick) | |||
return | |||
s.send("NICK %s\r\n" % new_nick) | |||
else: | |||
reply("You aren't authorized to use that command.", chan, nick) | |||
return | |||
if command == "op" or command == "deop" or command == "voice" or command == "devoice": | |||
if authy == "owner" or authy == "admin": | |||
try: | |||
user = line2[4] | |||
except Exception: | |||
user = nick | |||
say("%s %s %s" % (command, chan, user), "ChanServ") | |||
else: | |||
reply("You aren't authorized to use that command.", chan, nick) | |||
return | |||
if command == "pend" or command == "pending": | |||
say("Pending submissions status page: <http://en.wikipedia.org/wiki/WP:AFC/S>.", chan) | |||
say("Pending submissions category: <http://en.wikipedia.org/wiki/Category:Pending_AfC_submissions>.", chan) | |||
return | |||
if command == "sub" or command == "submissions": | |||
try: | |||
number = int(line2[4]) | |||
except Exception: | |||
reply("Please enter a number.", chan, nick) | |||
return | |||
do_url = False | |||
try: | |||
if "url" in line2[5:]: do_url = True | |||
except Exception: | |||
pass | |||
url = "http://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Pending_AfC_submissions&cmlimit=500&cmsort=timestamp" | |||
query = urllib.urlopen(url) | |||
data = query.read() | |||
pages = re.findall("title="(.*?)"", data) | |||
try: | |||
pages.remove("Wikipedia:Articles for creation/Redirects") | |||
except Exception: | |||
pass | |||
try: | |||
pages.remove("Wikipedia:Files for upload") | |||
except Exception: | |||
pass | |||
pages.reverse() | |||
pages = pages[:number] | |||
if not do_url: | |||
s = string.join(pages, "]], [[") | |||
s = "[[%s]]" % s | |||
else: | |||
s = string.join(pages, ">, <http://en.wikipedia.org/wiki/") | |||
s = "<http://en.wikipedia.org/wiki/%s>" % s | |||
s = re.sub(" ", "_", s) | |||
s = re.sub(">,_<", ">, <", s) | |||
report = "\x02First %s pending AfC submissions:\x0F %s" % (number, s) | |||
say(report, chan) | |||
return | |||
if command == "praise" or command == "leonard" or command == "groovedog" or command == "earwig" or command == "macmed" or command == "cubs197" or command == "sparksboy" or command == "tim_song" or command == "tim" or command == "sausage" or command == "mindstormskid" or command == "mcjohn" or command == "fetchcomms" or command == "blurpeace": | |||
bad = False | |||
if command == "leonard": | |||
special = "AfC redirect reviewer" | |||
user = "Leonard^Bloom" | |||
elif command == "groovedog": | |||
special = "heh" | |||
user = "GrooveDog" | |||
elif command == "earwig": | |||
special = "Python programmer" | |||
user = "Earwig" | |||
elif command == "macmed": | |||
special = "CSD tagger" | |||
user = "MacMed" | |||
elif command == "mindstormskid": | |||
special = "Lego fanatic" | |||
user = "MindstormsKid" | |||
elif command == "cubs197": | |||
special = "IRC dude" | |||
user = "Cubs197" | |||
elif command == "sparksboy": | |||
special = "pet owner" | |||
user = "SparksBoy" | |||
elif command == "tim_song" or command == "tim": | |||
special = "JavaScript programmer" | |||
user = "Tim_Song" | |||
elif command == "sausage": | |||
special = "helper" | |||
user = "chzz" | |||
elif command == "mcjohn": | |||
special = "edit summary writer" | |||
user = "McJohn" | |||
elif command == "fetchcomms": | |||
special = "n00b" | |||
user = "Fetchcomms" | |||
elif command == "blurpeace": | |||
special = "Commons admin" | |||
user = "Blurpeace" | |||
else: | |||
say("Only a true fool would use that command, %s." % nick, chan) | |||
# say("The users who you can praise are: Leonard^Bloom, GrooveDog, Earwig, MacMed, Cubs197, SparksBoy, MindstormsKid, Chzz, McJohn, Tim_Song, Fetchcomms, and Blurpeace.", chan) | |||
return | |||
if not bad: | |||
say("\x02%s\x0F is the bestest %s evah!" % (user, special), chan) | |||
if bad: | |||
say("\x02%s\x0F is worstest %s evah!" % (user, special), chan) | |||
return | |||
if command == "trout": | |||
try: | |||
user = line2[4] | |||
user = ' '.join(line2[4:]) | |||
except Exception: | |||
reply("Hahahahahahahaha...", chan, nick) | |||
return | |||
normal = unicodedata.normalize('NFKD', unicode(string.lower(user))) | |||
if "itself" in normal: | |||
reply("I'm not that stupid ;)", chan, nick) | |||
return | |||
elif "earwigbot" in normal: | |||
reply("I'm not that stupid ;)", chan, nick) | |||
elif "earwig" not in normal and "ear wig" not in normal: | |||
text = 'slaps %s around a bit with a large trout.' % user | |||
msg = '\x01ACTION %s\x01' % text | |||
say(msg, chan) | |||
else: | |||
reply("I refuse to hurt anything with \"Earwig\" in its name :P", chan, nick) | |||
return | |||
if command == "kill" or command == "destroy" or command == "murder": | |||
reply("Who do you think I am? The Mafia?", chan, nick) | |||
return | |||
if command == "fish": | |||
try: | |||
user = line2[4] | |||
fish = ' '.join(line2[5:]) | |||
except Exception: | |||
reply("Hahahahahahahaha...", chan, nick) | |||
return | |||
normal = unicodedata.normalize('NFKD', unicode(string.lower(user))) | |||
if "itself" in normal: | |||
reply("I'm not that stupid ;)", chan, nick) | |||
return | |||
elif "earwigbot" in normal: | |||
reply("I'm not that stupid ;)", chan, nick) | |||
elif "earwig" not in normal and "ear wig" not in normal: | |||
text = 'slaps %s around a bit with a %s.' % (user, fish) | |||
msg = '\x01ACTION %s\x01' % text | |||
say(msg, chan) | |||
else: | |||
reply("I refuse to hurt anything with \"Earwig\" in its name :P", chan, nick) | |||
return | |||
if command == "report": | |||
def find_status(name="", talk=False): | |||
enname = re.sub(" ", "_", name) | |||
if talk == True: | |||
enname = "Wikipedia_talk:Articles_for_creation/%s" % enname | |||
if talk == False: | |||
enname = "Wikipedia:Articles_for_creation/%s" % enname | |||
url = "http://en.wikipedia.org/w/api.php?action=query&titles=%s&prop=revisions&rvprop=content" % enname | |||
query = urllib.urlopen(url) | |||
data = query.read() | |||
status = "" | |||
if "{{AFC submission|D" in data or "{{AFC submission|d" in data: | |||
reason = re.findall("(D|d)\|(.*?)\|", data) | |||
if reason[0][1] != "reason": | |||
status = "Declined, reason is '%s'" % reason[0][1] | |||
if reason[0][1] == "reason": | |||
status = "Declined, reason is a custom reason" | |||
if "{{AFC submission|H" in data or "{{AFC submission|h" in data: | |||
reason = re.findall("(H|h)\|(.*?)\|", data) | |||
if reason[0][1] != "reason": | |||
status = "Held, reason is '%s'" % reason[0][1] | |||
if reason[0][1] == "reason": | |||
status = "Held, reason is a custom reason" | |||
if "{{AFC submission||" in data: | |||
status = "Pending" | |||
if "{{AFC submission|R" in data or "{{AFC submission|r" in data: | |||
status = "Reviewing" | |||
if not status: | |||
exist = exists(name=enname) | |||
if exist == True: | |||
status = "Accepted" | |||
if exist == False: | |||
status = "Not found" | |||
return status | |||
def exists(name=""): | |||
url = "http://en.wikipedia.org/wiki/%s" % name | |||
query = urllib.urlopen(url) | |||
data = query.read() | |||
if "Wikipedia does not have a" in data: | |||
return False | |||
return True | |||
def get_submitter(name="", talk=False): | |||
enname = re.sub(" ", "_", name) | |||
if talk == True: | |||
enname = "Wikipedia_talk:Articles_for_creation/%s" % enname | |||
if talk == False: | |||
enname = "Wikipedia:Articles_for_creation/%s" % enname | |||
url = "http://en.wikipedia.org/w/api.php?action=query&titles=%s&prop=revisions&rvprop=user&rvdir=newer&rvlimit=1" % enname | |||
query = urllib.urlopen(url) | |||
data = query.read() | |||
extract = re.findall("user="(.*?)"", data) | |||
if "anon=" in data: | |||
anon = True | |||
else: | |||
anon = False | |||
try: | |||
return extract[0], anon | |||
except BaseException: | |||
print extract | |||
return "", anon | |||
try: | |||
rawSub = line2[4] | |||
rawSub = ' '.join(line2[4:]) | |||
except Exception: | |||
reply("You need to specify a submission name in order to use %s!" % command, chan, nick) | |||
return | |||
talk = False | |||
if "[[" in rawSub and "]]" in rawSub: | |||
name = re.sub("\[\[(.*)\]\]", "\\1", rawSub) | |||
name = re.sub(" ", "_", name) | |||
name = urllib.quote(name, ":/") | |||
name = "http://en.wikipedia.org/wiki/%s" % name | |||
if "talk:" in name: | |||
talk = True | |||
elif "http://" in rawSub: | |||
name = rawSub | |||
if "talk:" in name: | |||
talk = True | |||
elif "en.wikipedia.org" in rawSub: | |||
name = "http://%s" % rawSub | |||
if "talk:" in name: | |||
talk = True | |||
elif "Wikipedia:" in rawSub or "Wikipedia_talk:" in rawSub or "Wikipedia talk:" in rawSub: | |||
name = re.sub(" ", "_", rawSub) | |||
name = urllib.quote(name, ":/") | |||
name = "http://en.wikipedia.org/wiki/%s" % name | |||
if "talk:" in name: | |||
talk = True | |||
else: | |||
url = "http://en.wikipedia.org/wiki/" | |||
pagename = re.sub(" ", "_", rawSub) | |||
pagename = urllib.quote(pagename, ":/") | |||
pagename = "Wikipedia:Articles_for_creation/%s" % pagename | |||
page = urllib.urlopen("%s%s" % (url, pagename)) | |||
text = page.read() | |||
name = "http://en.wikipedia.org/wiki/%s" % pagename | |||
if "Wikipedia does not have a" in text: | |||
pagename = re.sub(" ", "_", rawSub) | |||
pagename = urllib.quote(pagename, ":/") | |||
pagename = "Wikipedia_talk:Articles_for_creation/%s" % pagename | |||
page = urllib.urlopen("%s%s" % (url, pagename)) | |||
name = "http://en.wikipedia.org/wiki/%s" % pagename | |||
talk = True | |||
unname = re.sub("http://en.wikipedia.org/wiki/Wikipedia:Articles_for_creation/", "", name) | |||
unname = re.sub("http://en.wikipedia.org/wiki/Wikipedia_talk:Articles_for_creation/", "", unname) | |||
unname = re.sub("_", " ", unname) | |||
if "talk" in unname: | |||
talk = True | |||
submitter, anon = get_submitter(name=unname, talk=talk) | |||
status = find_status(name=unname, talk=talk) | |||
if submitter != "": | |||
if anon == True: | |||
submitter_page = "Special:Contributions/%s" % submitter | |||
if anon == False: | |||
unsubmit = re.sub(" ", "_", submitter) | |||
unsubmit = urllib.quote(unsubmit, ":/") | |||
submitter_page = "User:%s" % unsubmit | |||
if status == "Accepted": | |||
submitterm = "Reviewer" | |||
else: | |||
submitterm = "Submitter" | |||
line1 = "\x02AfC submission report for %s:" % unname | |||
line2 = "\x02URL: \x0301\x0F%s" % name | |||
if submitter != "": | |||
line3 = "\x02%s: \x0F\x0302%s (\x0301\x0Fhttp://en.wikipedia.org/wiki/%s)." % (submitterm, submitter, submitter_page) | |||
line4 = "\x02Status: \x0F\x0302%s." % status | |||
say(line1, chan) | |||
time.sleep(0.1) | |||
say(line2, chan) | |||
time.sleep(0.1) | |||
if submitter != "": | |||
say(line3, chan) | |||
time.sleep(0.1) | |||
say(line4, chan) | |||
return | |||
if command == "commands": | |||
if chan.startswith("#"): | |||
reply("Please use that command in a private message.", chan, nick) | |||
return | |||
others2 = get_commandList().values() | |||
others = [] | |||
for com in others2: | |||
if com == "copyvio" or com == "number" or com == "pending" or com == "report" or com == "submissions" or com == "access" or com == "help" or com == "join" or com == "linker" or com == "nick" or com == "op" or com == "part" or com == "quiet" or com == "quit" or com == "restart" or com == "voice" or com == "welcome" or com == "fish" or com == "praise" or com == "trout" or com == "notes": | |||
continue | |||
if com in others: continue | |||
others.append(com) | |||
others.sort() | |||
say("\x02AFC commands:\x0F copyvio, number, pending, report, submissions.", chan) | |||
time.sleep(0.1) | |||
say("\x02Bot operation and channel maintaince commands:\x0F access, help, join, linker, nick, op, part, quiet, quit, restart, voice, welcome.", chan) | |||
time.sleep(0.1) | |||
say("\x02Fun commands:\x0F fish, praise, trout, and numerous easter eggs", chan) | |||
time.sleep(0.1) | |||
say("\x02Other commands:\x0F %s" % ', '.join(others), chan) | |||
time.sleep(0.1) | |||
say("The bot maintains a mini-wiki. Type \"!notes help\" for more information.", chan) | |||
time.sleep(0.1) | |||
say("See http://enwp.org/User:The_Earwig/Bots/IRC for details. For help on a specific command, type '!help command'.", chan) | |||
return | |||
if command == "help" or command == "doc" or command == "documentation": | |||
try: | |||
com = line2[4] | |||
except Exception: | |||
reply("Hi, I'm a bot that does work for Articles for Creation. You can find information about me at http://enwp.org/User:The_Earwig/Bots/IRC. Say \"!commands\" to me in a private message for some of my abilities. Earwig is my owner and creator, and you can contact him at http://enwp.org/User_talk:The_Earwig.", chan, nick) | |||
return | |||
say("Sorry, command documentation has not been implemented yet.", chan) | |||
return | |||
if command == "mysql": | |||
if authy != "owner": | |||
reply("You aren't authorized to use this command.", chan, nick) | |||
return | |||
import MySQLdb | |||
try: | |||
strings = line2[4] | |||
strings = ' '.join(line2[4:]) | |||
if "db:" in strings: | |||
database = re.findall("db\:(.*?)\s", strings)[0] | |||
else: | |||
database = "enwiki_p" | |||
if "time:" in strings: | |||
times = int(re.findall("time\:(.*?)\s", strings)[0]) | |||
else: | |||
times = 60 | |||
file = re.findall("file\:(.*?)\s", strings)[0] | |||
sqlquery = re.findall("query\:(.*?)\Z", strings)[0] | |||
except Exception: | |||
reply("You did not specify enough data for the bot to continue.", chan, nick) | |||
return | |||
database2 = database[:-2] + "-p" | |||
db = MySQLdb.connect(db=database, host="%s.rrdb.toolserver.org" % database2, read_default_file="/home/earwig/.my.cnf") | |||
db.query(sqlquery) | |||
r = db.use_result() | |||
data = r.fetch_row(0) | |||
try: | |||
f = codecs.open("/home/earwig/public_html/reports/%s/%s" % (database[:-2], file), 'r') | |||
reply("A file already exists with that name.", chan, nick) | |||
return | |||
except Exception: | |||
pass | |||
f = codecs.open("/home/earwig/public_html/reports/%s/%s" % (database[:-2], file), 'a', 'utf-8') | |||
for line in data: | |||
new_line = [] | |||
for l in line: | |||
new_line.append(str(l)) | |||
f.write(' '.join(new_line) + "\n") | |||
f.close() | |||
reply("Query completed successfully. See http://toolserver.org/~earwig/reports/%s/%s. I will delete the report in %s seconds." % (database[:-2], file, times), chan, nick) | |||
time.sleep(times) | |||
os.remove("/home/earwig/public_html/reports/%s/%s" % (database[:-2], file)) | |||
return | |||
if command == "remind" or command == "reminder": | |||
try: | |||
times = int(line2[4]) | |||
content = ' '.join(line2[5:]) | |||
except Exception: | |||
reply("Please specify a time and a note in the following format: !remind <time> <note>.", chan, nick) | |||
return | |||
reply("Set reminder for \"%s\" in %s seconds." % (content, times), chan, nick) | |||
time.sleep(times) | |||
reply(content, chan, nick) | |||
return | |||
if command == "notes" or command == "note" or command == "about" or command == "data" or command == "database": | |||
try: | |||
action = line2[4] | |||
except BaseException: | |||
reply("What do you want me to do? Type \"!notes help\" for more information.", chan, nick) | |||
return | |||
import MySQLdb | |||
db = MySQLdb.connect(db="u_earwig_ircbot", host="sql", read_default_file="/home/earwig/.my.cnf") | |||
specify = ' '.join(line2[5:]) | |||
if action == "help" or action == "manual": | |||
shortCommandList = "read, write, change, undo, delete, move, author, category, list, report, developer" | |||
if specify == "read": | |||
say("To read an entry, type \"!notes read <entry>\".", chan) | |||
elif specify == "write": | |||
say("To write a new entry, type \"!notes write <entry> <content>\". This will create a new entry only if one does not exist, see the below command...", chan) | |||
elif specify == "change": | |||
say("To change an entry, type \"!notes change <entry> <new content>\". The old entry will be stored in the database, so it can be undone later.", chan) | |||
elif specify == "undo": | |||
say("To undo a change, type \"!notes undo <entry>\".", chan) | |||
elif specify == "delete": | |||
say("To delete an entry, type \"!notes delete <entry>\". For security reasons, only bot admins can do this.", chan) | |||
elif specify == "move": | |||
say("To move an entry, type \"!notes move <old_title> <new_title>\".", chan) | |||
elif specify == "author": | |||
say("To return the author of an entry, type \"!notes author <entry>\".", chan) | |||
elif specify == "category" or specify == "cat": | |||
say("To change an entry's category, type \"!notes category <entry> <category>\".", chan) | |||
elif specify == "list": | |||
say("To list all categories in the database, type \"!notes list\". Type \"!notes list <category>\" to get all entries in a certain category.", chan) | |||
elif specify == "report": | |||
say("To give some statistics about the mini-wiki, including some debugging information, type \"!notes report\" in a PM.", chan) | |||
elif specify == "developer": | |||
say("To do developer work, such as writing to the database directly, type \"!notes developer <command>\". This can only be done by the bot owner.", chan) | |||
else: | |||
db.query("SELECT * FROM version;") | |||
r = db.use_result() | |||
data = r.fetch_row(0) | |||
version = data[0] | |||
reply("The Earwig Mini-Wiki: running v%s." % version, chan, nick) | |||
reply("The full list of commands, for reference, are: %s." % shortCommandList, chan, nick) | |||
reply("For an explaination of a certain command, type \"!notes help <command>\".", chan, nick) | |||
reply("You can also access the database from the Toolserver: http://toolserver.org/~earwig/cgi-bin/irc_database.py", chan, nick) | |||
time.sleep(0.4) | |||
return | |||
elif action == "read": | |||
specify = string.lower(specify) | |||
if " " in specify: specify = string.split(specify, " ")[0] | |||
if not specify or "\"" in specify: | |||
reply("Please include the name of the entry you would like to read after the command, e.g. !notes read earwig", chan, nick) | |||
return | |||
try: | |||
db.query("SELECT entry_content FROM entries WHERE entry_title = \"%s\";" % specify) | |||
r = db.use_result() | |||
data = r.fetch_row(0) | |||
entry = data[0][0] | |||
say("Entry \"\x02%s\x0F\": %s" % (specify, entry), chan) | |||
except Exception: | |||
reply("There is no entry titled \"\x02%s\x0F\"." % specify, chan, nick) | |||
return | |||
elif action == "delete" or action == "remove": | |||
specify = string.lower(specify) | |||
if " " in specify: specify = string.split(specify, " ")[0] | |||
if not specify or "\"" in specify: | |||
reply("Please include the name of the entry you would like to delete after the command, e.g. !notes delete earwig", chan, nick) | |||
return | |||
if authy == "owner" or authy == "admin": | |||
try: | |||
db.query("DELETE from entries where entry_title = \"%s\";" % specify) | |||
r = db.use_result() | |||
db.commit() | |||
reply("The entry on \"\x02%s\x0F\" has been removed." % specify, chan, nick) | |||
except Exception: | |||
phenny.reply("Unable to remove the entry on \"\x02%s\x0F\", because it doesn't exist." % specify, chan, nick) | |||
else: | |||
reply("Only bot admins can remove entries.", chan, nick) | |||
return | |||
elif action == "developer": | |||
if authy == "owner": | |||
db.query(specify) | |||
r = db.use_result() | |||
try: | |||
print r.fetch_row(0) | |||
except Exception: | |||
pass | |||
db.commit() | |||
reply("Done.", chan, nick) | |||
else: | |||
reply("Only the bot owner can modify the raw database.", chan, nick) | |||
return | |||
elif action == "write": | |||
try: | |||
write = line2[5] | |||
content = ' '.join(line2[6:]) | |||
except Exception: | |||
reply("Please include some content in your entry.", chan, nick) | |||
return | |||
db.query("SELECT * from entries WHERE entry_title = \"%s\";" % write) | |||
r = db.use_result() | |||
data = r.fetch_row(0) | |||
if data: | |||
reply("An entry on %s already exists; please use \"!notes change %s %s\"." % (write, write, content), chan, nick) | |||
return | |||
content2 = content.replace('"', '\\' + '"') | |||
db.query("INSERT INTO entries (entry_title, entry_author, entry_category, entry_content, entry_content_old) VALUES (\"%s\", \"%s\", \"uncategorized\", \"%s\", NULL);" % (write, nick, content2)) | |||
db.commit() | |||
reply("You have written an entry titled \"\x02%s\x0F\", with the following content: \"%s\"" % (write, content), chan, nick) | |||
return | |||
elif action == "change": | |||
reply("NotImplementedError", chan, nick) | |||
elif action == "undo": | |||
reply("NotImplementedError", chan, nick) | |||
elif action == "move": | |||
reply("NotImplementedError", chan, nick) | |||
elif action == "author": | |||
try: | |||
entry = line2[5] | |||
except Exception: | |||
reply("Please include the name of the entry you would like to get information for after the command, e.g. !notes author earwig", chan, nick) | |||
return | |||
db.query("SELECT entry_author from entries WHERE entry_title = \"%s\";" % entry) | |||
r = db.use_result() | |||
data = r.fetch_row(0) | |||
if data: | |||
say("The author of \"\x02%s\x0F\" is \x02%s\x0F." % (entry, data[0][0]), chan) | |||
return | |||
reply("There is no entry titled \"\x02%s\x0F\"." % entry, chan, nick) | |||
return | |||
elif action == "cat" or action == "category": | |||
reply("NotImplementedError", chan, nick) | |||
elif action == "list": | |||
reply("NotImplementedError", chan, nick) | |||
elif action == "report": | |||
reply("NotImplementedError", chan, nick) | |||
if command == "hash": | |||
import hashlib | |||
try: | |||
hashVia = line2[4] | |||
hashText = line2[5] | |||
hashText = ' '.join(line2[5:]) | |||
except Exception: | |||
reply("Please provide a string and method to hash by.", chan, nick) | |||
return | |||
try: | |||
hashed = eval("hashlib.%s(\"%s\").hexdigest()" % (hashVia, hashText)) | |||
reply(hashed, chan, nick) | |||
except Exception: | |||
try: | |||
hashing = hashlib.new(hashVia) | |||
hashing.update(hashText) | |||
hashed = hashing.hexdigest() | |||
reply(hashed, chan, nick) | |||
except Exception: | |||
reply("Error.", chan, nick) | |||
if command == "langcode" or command == "lang" or command == "language": | |||
try: | |||
lang = line2[4] | |||
except Exception: | |||
reply("Please specify an ISO code.", chan, nick) | |||
return | |||
data = urllib.urlopen("http://toolserver.org/~earwig/cgi-bin/swmt.py?action=iso").read() | |||
data = string.split(data, "\n") | |||
result = False | |||
for datum in data: | |||
if datum.startswith(lang): | |||
result = re.findall(".*? (.*)", datum)[0] | |||
break | |||
if result: | |||
reply(result, chan, nick) | |||
return | |||
reply("Not found.", chan, nick) | |||
return | |||
if command == "lookup" or command == "ip": | |||
try: | |||
hexIP = line2[4] | |||
except Exception: | |||
reply("Please specify a hex IP address.", chan, nick) | |||
return | |||
hexes = [hexIP[:2], hexIP[2:4], hexIP[4:6], hexIP[6:8]] | |||
hashes = [] | |||
for hexHash in hexes: | |||
newHex = int(hexHash, 16) | |||
hashes.append(newHex) | |||
normalizedIP = "%s.%s.%s.%s" % (hashes[0], hashes[1], hashes[2], hashes[3]) | |||
reply(normalizedIP, chan, nick) | |||
return |
@@ -1,138 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# Report the status of AFC submissions, either as an automatic message on join or a request via !status. | |||
import json | |||
import re | |||
import urllib | |||
from config.watcher import * | |||
from irc.base_command import BaseCommand | |||
class AFCStatus(BaseCommand): | |||
def get_hooks(self): | |||
return ["join", "msg"] | |||
def get_help(self, command): | |||
return "Get the number of pending AfC submissions, open redirect requests, and open file upload requests." | |||
def check(self, data): | |||
if data.is_command and (data.command == "status" or | |||
data.command == "count" or data.command == "num" or | |||
data.command == "number" or data.command == "afc_status"): | |||
return True | |||
try: | |||
if data.line[1] == "JOIN" and data.chan in AFC_CHANS: | |||
return True | |||
except IndexError: | |||
pass | |||
return False | |||
def process(self, data): | |||
if data.line[1] == "JOIN": | |||
notice = self.get_join_notice() | |||
self.connection.notice(data.nick, notice) | |||
return | |||
if data.args: | |||
if data.args[0].startswith("sub") or data.args[0] == "s": | |||
subs = self.count_submissions() | |||
self.connection.reply(data, "there are currently %s pending AfC submissions." % subs) | |||
elif data.args[0].startswith("redir") or data.args[0] == "r": | |||
redirs = self.count_redirects() | |||
self.connection.reply(data, "there are currently %s open redirect requests." % redirs) | |||
elif data.args[0].startswith("file") or data.args[0] == "f": | |||
files = self.count_redirects() | |||
self.connection.reply(data, "there are currently %s open file upload requests." % files) | |||
elif data.args[0].startswith("agg") or data.args[0] == "a": | |||
try: | |||
agg_num = int(data.args[1]) | |||
except IndexError: | |||
agg_data = (self.count_submissions(), self.count_redirects(), self.count_files()) | |||
agg_num = self.get_aggregate_number(agg_data) | |||
except ValueError: | |||
self.connection.reply(data, "\x0303%s\x0301 isn't a number!" % data.args[1]) | |||
return | |||
aggregate = self.get_aggregate(agg_num) | |||
self.connection.reply(data, "aggregate is currently %s (AfC %s)." % (agg_num, aggregate)) | |||
elif data.args[0].startswith("join") or data.args[0] == "j": | |||
notice = self.get_join_notice() | |||
self.connection.reply(data, notice) | |||
else: | |||
self.connection.reply(data, "unknown argument: \x0303%s\x0301. Valid args are 'subs', 'redirs', 'files', 'agg', and 'join'." % data.args[0]) | |||
else: | |||
subs = self.count_submissions() | |||
redirs = self.count_redirects() | |||
files = self.count_files() | |||
self.connection.reply(data, "there are currently %s pending submissions, %s open redirect requests, and %s open file upload requests." | |||
% (subs, redirs, files)) | |||
def get_join_notice(self): | |||
subs = self.count_submissions() | |||
redirs = self.count_redirects() | |||
files = self.count_files() | |||
agg_num = self.get_aggregate_number((subs, redirs, files)) | |||
aggregate = self.get_aggregate(agg_num) | |||
return ("\x02Current status:\x0F Articles for Creation %s (\x0302AFC\x0301: \x0305%s\x0301; \x0302AFC/R\x0301: \x0305%s\x0301; \x0302FFU\x0301: \x0305%s\x0301)" | |||
% (aggregate, subs, redirs, files)) | |||
def count_submissions(self): | |||
params = {'action': 'query', 'list': 'categorymembers', 'cmlimit':'500', 'format': 'json'} | |||
params['cmtitle'] = "Category:Pending_AfC_submissions" | |||
data = urllib.urlencode(params) | |||
raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read() | |||
res = json.loads(raw) | |||
subs = len(res['query']['categorymembers']) | |||
subs -= 2 # remove [[Wikipedia:Articles for creation/Redirects]] and [[Wikipedia:Files for upload]], which aren't real submissions | |||
return subs | |||
def count_redirects(self): | |||
content = self.get_page("Wikipedia:Articles_for_creation/Redirects") | |||
total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE)) | |||
closed = content.lower().count("{{afc-c|b}}") | |||
redirs = total - closed | |||
return redirs | |||
def count_files(self): | |||
content = self.get_page("Wikipedia:Files_for_upload") | |||
total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE)) | |||
closed = content.lower().count("{{ifu-c|b}}") | |||
files = total - closed | |||
return files | |||
def get_page(self, pagename): | |||
params = {'action': 'query', 'prop': 'revisions', 'rvprop':'content', 'rvlimit':'1', 'format': 'json'} | |||
params['titles'] = pagename | |||
data = urllib.urlencode(params) | |||
raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read() | |||
res = json.loads(raw) | |||
pageid = res['query']['pages'].keys()[0] | |||
content = res['query']['pages'][pageid]['revisions'][0]['*'] | |||
return content | |||
def get_aggregate(self, num): | |||
if num == 0: | |||
agg = "is \x02\x0303clear\x0301\x0F" | |||
elif num < 60: | |||
agg = "is \x0303almost clear\x0301" | |||
elif num < 125: | |||
agg = "has a \x0312small backlog\x0301" | |||
elif num < 175: | |||
agg = "has an \x0307average backlog\x0301" | |||
elif num < 250: | |||
agg = "is \x0304backlogged\x0301" | |||
elif num < 300: | |||
agg = "is \x02\x0304heavily backlogged\x0301\x0F" | |||
else: | |||
agg = "is \x02\x1F\x0304severely backlogged\x0301\x0F" | |||
return agg | |||
def get_aggregate_number(self, (subs, redirs, files)): | |||
num = (subs * 5) + (redirs * 2) + (files * 2) | |||
return num |
@@ -1,71 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# A somewhat advanced calculator: http://futureboy.us/fsp/frink.fsp. | |||
import re | |||
import urllib | |||
from irc.base_command import BaseCommand | |||
class Calc(BaseCommand): | |||
def get_hooks(self): | |||
return ["msg"] | |||
def get_help(self, command): | |||
return "A somewhat advanced calculator: see http://futureboy.us/fsp/frink.fsp for details." | |||
def check(self, data): | |||
if data.is_command and data.command == "calc": | |||
return True | |||
return False | |||
def process(self, data): | |||
if not data.args: | |||
self.connection.reply(data, "What do you want me to calculate?") | |||
return | |||
query = ' '.join(data.args) | |||
query = self.cleanup(query) | |||
url = "http://futureboy.us/fsp/frink.fsp?fromVal=%s" % urllib.quote(query) | |||
result = urllib.urlopen(url).read() | |||
r_result = re.compile(r'(?i)<A NAME=results>(.*?)</A>') | |||
r_tag = re.compile(r'<\S+.*?>') | |||
match = r_result.search(result) | |||
if not match: | |||
self.connection.reply(data, "Calculation error.") | |||
return | |||
result = match.group(1) | |||
result = r_tag.sub("", result) # strip span.warning tags | |||
result = result.replace(">", ">") | |||
result = result.replace("(undefined symbol)", "(?) ") | |||
result = result.strip() | |||
if not result: | |||
result = '?' | |||
elif " in " in query: | |||
result += " " + query.split(" in ", 1)[1] | |||
res = "%s = %s" % (query, result) | |||
self.connection.reply(data, res) | |||
def cleanup(self, query): | |||
fixes = [ | |||
(' in ', ' -> '), | |||
(' over ', ' / '), | |||
(u'£', 'GBP '), | |||
(u'€', 'EUR '), | |||
('\$', 'USD '), | |||
(r'\bKB\b', 'kilobytes'), | |||
(r'\bMB\b', 'megabytes'), | |||
(r'\bGB\b', 'kilobytes'), | |||
('kbps', '(kilobits / second)'), | |||
('mbps', '(megabits / second)') | |||
] | |||
for original, fix in fixes: | |||
query = re.sub(original, fix, query) | |||
return query.strip() |
@@ -1,31 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# Voice/devoice/op/deop users in the channel. | |||
from irc.base_command import BaseCommand | |||
from config.irc import * | |||
class ChanOps(BaseCommand): | |||
def get_hooks(self): | |||
return ["msg"] | |||
def get_help(self, command): | |||
action = command.capitalize() | |||
return "%s users in the channel." % action | |||
def check(self, data): | |||
if data.is_command and data.command in ["voice", "devoice", "op", "deop"]: | |||
return True | |||
return False | |||
def process(self, data): | |||
if data.host not in ADMINS: | |||
self.connection.reply(data, "you must be a bot admin to use this command.") | |||
return | |||
if not data.args: # if it is just !op/!devoice/whatever without arguments, assume they want to do this to themselves | |||
target = data.nick | |||
else: | |||
target = data.args[0] | |||
self.connection.say("ChanServ", "%s %s %s" % (data.command, data.chan, target)) |
@@ -1,160 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# Commands to interface with the bot's git repository; use '!git help' for sub-command list. | |||
import shlex, subprocess, re | |||
from config.irc import * | |||
from irc.base_command import BaseCommand | |||
class Git(BaseCommand): | |||
def get_hooks(self): | |||
return ["msg"] | |||
def get_help(self, command): | |||
return "Commands to interface with the bot's git repository; use '!git help' for sub-command list." | |||
def check(self, data): | |||
if data.is_command and data.command == "git": | |||
return True | |||
return False | |||
def process(self, data): | |||
self.data = data | |||
if data.host not in OWNERS: | |||
self.connection.reply(data, "you must be a bot owner to use this command.") | |||
return | |||
if not data.args: | |||
self.connection.reply(data, "no arguments provided. Maybe you wanted '!git help'?") | |||
return | |||
if data.args[0] == "help": | |||
self.do_help() | |||
elif data.args[0] == "branch": | |||
self.do_branch() | |||
elif data.args[0] == "branches": | |||
self.do_branches() | |||
elif data.args[0] == "checkout": | |||
self.do_checkout() | |||
elif data.args[0] == "delete": | |||
self.do_delete() | |||
elif data.args[0] == "pull": | |||
self.do_pull() | |||
elif data.args[0] == "status": | |||
self.do_status() | |||
else: # they asked us to do something we don't know | |||
self.connection.reply(data, "unknown argument: \x0303%s\x0301." % data.args[0]) | |||
def exec_shell(self, command): | |||
"""execute a shell command and get the output""" | |||
command = shlex.split(command) | |||
result = subprocess.check_output(command, stderr=subprocess.STDOUT) | |||
if result: | |||
result = result[:-1] # strip newline | |||
return result | |||
def do_help(self): | |||
"""display all commands""" | |||
help_dict = { | |||
"branch": "get current branch", | |||
"branches": "get all branches", | |||
"checkout": "switch branches", | |||
"delete": "delete an old branch", | |||
"pull": "update everything from the remote server", | |||
"status": "check if we are up-to-date", | |||
} | |||
keys = help_dict.keys() | |||
keys.sort() | |||
help = "" | |||
for key in keys: | |||
help += "\x0303%s\x0301 (%s), " % (key, help_dict[key]) | |||
help = help[:-2] # trim last comma and space | |||
self.connection.reply(self.data, "sub-commands are: %s." % help) | |||
def do_branch(self): | |||
"""get our current branch""" | |||
branch = self.exec_shell("git name-rev --name-only HEAD") | |||
self.connection.reply(self.data, "currently on branch \x0302%s\x0301." % branch) | |||
def do_branches(self): | |||
"""get list of branches""" | |||
branches = self.exec_shell("git branch") | |||
branches = branches.replace('\n* ', ', ') # cleanup extraneous characters | |||
branches = branches.replace('* ', ' ') | |||
branches = branches.replace('\n ', ', ') | |||
branches = branches.strip() | |||
self.connection.reply(self.data, "branches: \x0302%s\x0301." % branches) | |||
def do_checkout(self): | |||
"""switch branches""" | |||
try: | |||
branch = self.data.args[1] | |||
except IndexError: # no branch name provided | |||
self.connection.reply(self.data, "switch to which branch?") | |||
return | |||
try: | |||
result = self.exec_shell("git checkout %s" % branch) | |||
if "Already on" in result: | |||
self.connection.reply(self.data, "already on \x0302%s\x0301!" % branch) | |||
else: | |||
current_branch = self.exec_shell("git name-rev --name-only HEAD") | |||
self.connection.reply(self.data, "switched from branch \x0302%s\x0301 to \x0302%s\x0301." % (current_branch, branch)) | |||
except subprocess.CalledProcessError: # git couldn't switch branches | |||
self.connection.reply(self.data, "branch \x0302%s\x0301 doesn't exist!" % branch) | |||
def do_delete(self): | |||
"""delete a branch, while making sure that we are not on it""" | |||
try: | |||
delete_branch = self.data.args[1] | |||
except IndexError: # no branch name provided | |||
self.connection.reply(self.data, "delete which branch?") | |||
return | |||
current_branch = self.exec_shell("git name-rev --name-only HEAD") | |||
if current_branch == delete_branch: | |||
self.connection.reply(self.data, "you're currently on this branch; please checkout to a different branch before deleting.") | |||
return | |||
try: | |||
self.exec_shell("git branch -d %s" % delete_branch) | |||
self.connection.reply(self.data, "branch \x0302%s\x0301 has been deleted locally." % delete_branch) | |||
except subprocess.CalledProcessError: # git couldn't delete | |||
self.connection.reply(self.data, "branch \x0302%s\x0301 doesn't exist!" % delete_branch) | |||
def do_pull(self): | |||
"""pull from remote repository""" | |||
branch = self.exec_shell("git name-rev --name-only HEAD") | |||
self.connection.reply(self.data, "pulling from remote (currently on \x0302%s\x0301)..." % branch) | |||
result = self.exec_shell("git pull") | |||
if "Already up-to-date." in result: | |||
self.connection.reply(self.data, "done; no new changes.") | |||
else: | |||
changes = re.findall("\s*((.*?)\sfile(.*?)tions?\(-\))", result)[0][0] # find the changes | |||
try: | |||
remote = self.exec_shell("git config --get branch.%s.remote" % branch) | |||
url = self.exec_shell("git config --get remote.%s.url" % remote) | |||
self.connection.reply(self.data, "done; %s [from %s]." % (changes, url)) | |||
except subprocess.CalledProcessError: # something in .git/config is not specified correctly, so we cannot get the remote's url | |||
self.connection.reply(self.data, "done; %s." % changes) | |||
def do_status(self): | |||
"""check whether we have anything to pull""" | |||
last = self.exec_shell("git log -n 1 --pretty=\"%ar\"") | |||
result = self.exec_shell("git fetch --dry-run") | |||
if not result: # nothing was fetched, so remote and local are equal | |||
self.connection.reply(self.data, "last commit was %s. Local copy is \x02up-to-date\x0F with remote." % last) | |||
else: | |||
self.connection.reply(self.data, "last local commit was %s. Remote is \x02ahead\x0F of local copy." % last) |
@@ -1,55 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# Generates help information. | |||
from irc.base_command import BaseCommand | |||
from irc.data import Data | |||
from irc import command_handler | |||
class Help(BaseCommand): | |||
def get_hooks(self): | |||
return ["msg"] | |||
def get_help(self, command): | |||
return "Generates help information." | |||
def check(self, data): | |||
if data.is_command and data.command == "help": | |||
return True | |||
return False | |||
def process(self, data): | |||
if not data.args: | |||
self.do_general_help(data) | |||
else: | |||
if data.args[0] == "list": | |||
self.do_list_help(data) | |||
else: | |||
self.do_command_help(data) | |||
def do_general_help(self, data): | |||
self.connection.reply(data, "I am a bot! You can get help for any command with '!help <command>', or a list of all loaded modules with '!help list'.") | |||
def do_list_help(self, data): | |||
commands = command_handler.get_commands() | |||
cmnds = map(lambda c: c.__class__.__name__, commands) | |||
pretty_cmnds = ', '.join(cmnds) | |||
self.connection.reply(data, "%s command classes loaded: %s." % (len(cmnds), pretty_cmnds)) | |||
def do_command_help(self, data): | |||
command = data.args[0] | |||
commands = command_handler.get_commands() | |||
dummy = Data() # dummy message to test which command classes pick up this command | |||
dummy.command = command.lower() # lowercase command name | |||
dummy.is_command = True | |||
for cmnd in commands: | |||
if cmnd.check(dummy): | |||
help = cmnd.get_help(command) | |||
break | |||
try: | |||
self.connection.reply(data, "info for command \x0303%s\x0301: \"%s\"" % (command, help)) | |||
except UnboundLocalError: | |||
self.connection.reply(data, "sorry, no help for \x0303%s\x0301." % command) |
@@ -1,65 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# Convert a Wikipedia page name into a URL. | |||
import re | |||
from irc.base_command import BaseCommand | |||
class Link(BaseCommand): | |||
def get_hooks(self): | |||
return ["msg"] | |||
def get_help(self, command): | |||
return "Convert a Wikipedia page name into a URL." | |||
def check(self, data): | |||
if ((data.is_command and data.command == "link") or | |||
(("[[" in data.msg and "]]" in data.msg) or | |||
("{{" in data.msg and "}}" in data.msg))): | |||
return True | |||
return False | |||
def process(self, data): | |||
msg = data.msg | |||
if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", msg): | |||
links = self.parse_line(msg) | |||
links = " , ".join(links) | |||
self.connection.reply(data, links) | |||
elif data.command == "link": | |||
if not data.args: | |||
self.connection.reply(data, "what do you want me to link to?") | |||
return | |||
pagename = ' '.join(data.args) | |||
link = self.parse_link(pagename) | |||
self.connection.reply(data, link) | |||
def parse_line(self, line): | |||
results = list() | |||
line = re.sub("\{\{\{(.*?)\}\}\}", "", line) # destroy {{{template parameters}}} | |||
links = re.findall("(\[\[(.*?)(\||\]\]))", line) # find all [[links]] | |||
if links: | |||
links = map(lambda x: x[1], links) # re.findall() returns a list of tuples, but we only want the 2nd item in each tuple | |||
results.extend(map(self.parse_link, links)) | |||
templates = re.findall("(\{\{(.*?)(\||\}\}))", line) # find all {{templates}} | |||
if templates: | |||
templates = map(lambda x: x[1], templates) | |||
results.extend(map(self.parse_template, templates)) | |||
return results | |||
def parse_link(self, pagename): | |||
pagename = pagename.strip() | |||
link = "http://en.wikipedia.org/wiki/" + pagename | |||
link = link.replace(" ", "_") | |||
return link | |||
def parse_template(self, pagename): | |||
pagename = "Template:%s" % pagename # TODO: implement an actual namespace check | |||
link = self.parse_link(pagename) | |||
return link |
@@ -1,124 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# Manage wiki tasks from IRC, and check on thread status. | |||
import threading, re | |||
from irc.base_command import BaseCommand | |||
from irc.data import * | |||
from wiki import task_manager | |||
from config.main import * | |||
from config.irc import * | |||
class Tasks(BaseCommand): | |||
def get_hooks(self): | |||
return ["msg"] | |||
def get_help(self, command): | |||
return "Manage wiki tasks from IRC, and check on thread status." | |||
def check(self, data): | |||
if data.is_command and data.command in ["tasks", "threads", "tasklist"]: | |||
return True | |||
return False | |||
def process(self, data): | |||
self.data = data | |||
if data.host not in OWNERS: | |||
self.connection.reply(data, "at this time, you must be a bot owner to use this command.") | |||
return | |||
if not data.args: | |||
if data.command == "tasklist": | |||
self.do_list() | |||
else: | |||
self.connection.reply(data, "no arguments provided. Maybe you wanted '!{cmnd} list', '!{cmnd} start', or '!{cmnd} listall'?".format(cmnd=data.command)) | |||
return | |||
if data.args[0] == "list": | |||
self.do_list() | |||
elif data.args[0] == "start": | |||
self.do_start() | |||
elif data.args[0] in ["listall", "all"]: | |||
self.do_listall() | |||
else: # they asked us to do something we don't know | |||
self.connection.reply(data, "unknown argument: \x0303{}\x0301.".format(data.args[0])) | |||
def do_list(self): | |||
threads = threading.enumerate() | |||
normal_threads = [] | |||
task_threads = [] | |||
for thread in threads: | |||
tname = thread.name | |||
if tname == "MainThread": | |||
tname = self.get_main_thread_name() | |||
normal_threads.append("\x0302{}\x0301 (as main thread, id {})".format(tname, thread.ident)) | |||
elif tname in ["irc-frontend", "irc-watcher", "wiki-scheduler"]: | |||
normal_threads.append("\x0302{}\x0301 (id {})".format(tname, thread.ident)) | |||
else: | |||
tname, start_time = re.findall("^(.*?) \((.*?)\)$", tname)[0] | |||
task_threads.append("\x0302{}\x0301 (id {}, since {})".format(tname, thread.ident, start_time)) | |||
if task_threads: | |||
msg = "\x02{}\x0F threads active: {}, and \x02{}\x0F task threads: {}.".format(len(threads), ', '.join(normal_threads), len(task_threads), ', '.join(task_threads)) | |||
else: | |||
msg = "\x02{}\x0F threads active: {}, and \x020\x0F task threads.".format(len(threads), ', '.join(normal_threads)) | |||
self.connection.reply(self.data, msg) | |||
def do_listall(self): | |||
tasks = task_manager.task_list.keys() | |||
threads = threading.enumerate() | |||
tasklist = [] | |||
tasks.sort() | |||
for task in tasks: | |||
threads_running_task = [t for t in threads if t.name.startswith(task)] | |||
ids = map(lambda t: str(t.ident), threads_running_task) | |||
if not ids: | |||
tasklist.append("\x0302{}\x0301 (idle)".format(task)) | |||
elif len(ids) == 1: | |||
tasklist.append("\x0302{}\x0301 (\x02active\x0F as id {})".format(task, ids[0])) | |||
else: | |||
tasklist.append("\x0302{}\x0301 (\x02active\x0F as ids {})".format(task, ', '.join(ids))) | |||
tasklist = ", ".join(tasklist) | |||
msg = "{} tasks loaded: {}.".format(len(tasks), tasklist) | |||
self.connection.reply(self.data, msg) | |||
def do_start(self): | |||
data = self.data | |||
try: | |||
task_name = data.args[1] | |||
except IndexError: # no task name given | |||
self.connection.reply(data, "what task do you want me to start?") | |||
return | |||
try: | |||
data.parse_kwargs() | |||
except KwargParseException, arg: | |||
self.connection.reply(data, "error parsing argument: \x0303{}\x0301.".format(arg)) | |||
return | |||
if task_name not in task_manager.task_list.keys(): # this task does not exist or hasn't been loaded | |||
self.connection.reply(data, "task could not be found; either wiki/tasks/{}.py doesn't exist, or it wasn't loaded correctly.".format(task_name)) | |||
return | |||
task_manager.start_task(task_name, **data.kwargs) | |||
self.connection.reply(data, "task \x0302{}\x0301 started.".format(task_name)) | |||
def get_main_thread_name(self): | |||
"""Return the "proper" name of the MainThread; e.g. "irc-frontend" or "irc-watcher".""" | |||
if enable_irc_frontend: | |||
return "irc-frontend" | |||
elif enable_wiki_schedule: | |||
return "wiki-scheduler" | |||
else: | |||
return "irc-watcher" |
@@ -1,26 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# A very simple command to test the bot. | |||
import random | |||
from irc.base_command import BaseCommand | |||
class Test(BaseCommand): | |||
def get_hooks(self): | |||
return ["msg"] | |||
def get_help(self, command): | |||
return "Test the bot!" | |||
def check(self, data): | |||
if data.is_command and data.command == "test": | |||
return True | |||
return False | |||
def process(self, data): | |||
hey = random.randint(0, 1) | |||
if hey: | |||
self.connection.say(data.chan, "Hey \x02%s\x0F!" % data.nick) | |||
else: | |||
self.connection.say(data.chan, "'sup \x02%s\x0F?" % data.nick) |
@@ -1,75 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# A class to interface with IRC. | |||
import socket | |||
import threading | |||
class BrokenSocketException(Exception): | |||
"""A socket has broken, because it is not sending data.""" | |||
pass | |||
class Connection(object): | |||
def __init__(self, host=None, port=None, nick=None, ident=None, realname=None): | |||
"""a class to interface with IRC""" | |||
self.host = host | |||
self.port = port | |||
self.nick = nick | |||
self.ident = ident | |||
self.realname = realname | |||
def connect(self): | |||
"""connect to IRC""" | |||
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |||
self.sock.connect((self.host, self.port)) | |||
self.send("NICK %s" % self.nick) | |||
self.send("USER %s %s * :%s" % (self.ident, self.host, self.realname)) | |||
def close(self): | |||
"""close our connection with IRC""" | |||
try: | |||
self.sock.shutdown(socket.SHUT_RDWR) # shut down connection first | |||
except socket.error: | |||
pass # ignore if the socket is already down | |||
self.sock.close() | |||
def get(self, size=4096): | |||
"""receive (get) data from the server""" | |||
data = self.sock.recv(4096) | |||
if not data: # socket giving us no data, so it is dead/broken | |||
raise BrokenSocketException() | |||
return data | |||
def send(self, msg): | |||
"""send data to the server""" | |||
lock = threading.Lock() | |||
lock.acquire() # ensure that we only send one message at a time (blocking) | |||
try: | |||
self.sock.sendall(msg + "\r\n") | |||
print " %s" % msg | |||
finally: | |||
lock.release() | |||
def say(self, target, msg): | |||
"""send a message""" | |||
self.send("PRIVMSG %s :%s" % (target, msg)) | |||
def reply(self, data, msg): | |||
"""send a message as a reply""" | |||
self.say(data.chan, "%s%s%s: %s" % (chr(2), data.nick, chr(0x0f), msg)) | |||
def action(self, target, msg): | |||
"""send a message as an action""" | |||
self.say(target,"%sACTION %s%s" % (chr(1), msg, chr(1))) | |||
def notice(self, target, msg): | |||
"""send a notice""" | |||
self.send("NOTICE %s :%s" % (target, msg)) | |||
def join(self, chan): | |||
"""join a channel""" | |||
self.send("JOIN %s" % chan) | |||
def mode(self, chan, level, msg): | |||
"""send a mode message""" | |||
self.send("MODE %s %s %s" % (chan, level, msg)) |
@@ -1,55 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# A class to store data from an individual line received on IRC. | |||
import re | |||
class KwargParseException(Exception): | |||
"""Couldn't parse a certain keyword argument in self.args, probably because | |||
it was given incorrectly: e.g., no value (abc), just a value (=xyz), just | |||
an equal sign (=), instead of the correct (abc=xyz).""" | |||
pass | |||
class Data(object): | |||
def __init__(self): | |||
"""store data from an individual line received on IRC""" | |||
self.line = str() | |||
self.chan = str() | |||
self.nick = str() | |||
self.ident = str() | |||
self.host = str() | |||
self.msg = str() | |||
def parse_args(self): | |||
"""parse command arguments from self.msg into self.command and self.args""" | |||
args = self.msg.strip().split(' ') # strip out extra whitespace and split the message into a list | |||
while '' in args: # remove any empty arguments | |||
args.remove('') | |||
self.args = args[1:] # the command arguments | |||
self.is_command = False # whether this is a real command or not | |||
try: | |||
self.command = args[0] # the command itself | |||
except IndexError: | |||
self.command = None | |||
try: | |||
if self.command.startswith('!') or self.command.startswith('.'): | |||
self.is_command = True | |||
self.command = self.command[1:] # strip '!' or '.' | |||
self.command = self.command.lower() # lowercase command name | |||
except AttributeError: | |||
pass | |||
def parse_kwargs(self): | |||
"""parse command arguments from self.args, given as !command key1=value1 key2=value2..., into a dict self.kwargs: {'key1': 'value2', 'key2': 'value2'...}""" | |||
self.kwargs = {} | |||
for arg in self.args[2:]: | |||
try: | |||
key, value = re.findall("^(.*?)\=(.*?)$", arg)[0] | |||
except IndexError: | |||
raise KwargParseException(arg) | |||
if not key or not value: | |||
raise KwargParseException(arg) | |||
self.kwargs[key] = value |
@@ -1,75 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
## Imports | |||
import re, time | |||
from config.irc import * | |||
from config.secure import * | |||
from irc import command_handler | |||
from irc.connection import * | |||
from irc.data import Data | |||
connection = None | |||
def get_connection(): | |||
connection = Connection(HOST, PORT, NICK, IDENT, REALNAME) | |||
return connection | |||
def startup(conn): | |||
global connection | |||
connection = conn | |||
command_handler.load_commands(connection) | |||
connection.connect() | |||
def main(): | |||
read_buffer = str() | |||
while 1: | |||
try: | |||
read_buffer = read_buffer + connection.get() | |||
except BrokenSocketException: | |||
print "Socket has broken on front-end; restarting bot..." | |||
return | |||
lines = read_buffer.split("\n") | |||
read_buffer = lines.pop() | |||
for line in lines: | |||
line = line.strip().split() | |||
data = Data() | |||
data.line = line | |||
if line[1] == "JOIN": | |||
data.nick, data.ident, data.host = re.findall(":(.*?)!(.*?)@(.*?)\Z", line[0])[0] | |||
data.chan = line[2][1:] | |||
command_handler.check("join", data) # check if there's anything we can respond to, and if so, respond | |||
if line[1] == "PRIVMSG": | |||
data.nick, data.ident, data.host = re.findall(":(.*?)!(.*?)@(.*?)\Z", line[0])[0] | |||
data.msg = ' '.join(line[3:])[1:] | |||
data.chan = line[2] | |||
if data.chan == NICK: # this is a privmsg to us, so set 'chan' as the nick of the sender | |||
data.chan = data.nick | |||
command_handler.check("msg_private", data) # only respond if it's a private message | |||
else: | |||
command_handler.check("msg_public", data) # only respond if it's a public (channel) message | |||
command_handler.check("msg", data) # check for general messages | |||
if data.msg.startswith("!restart"): # hardcode the !restart command (we can't restart from within an ordinary command) | |||
if data.host in OWNERS: | |||
print "Restarting bot per owner request..." | |||
return | |||
if line[0] == "PING": # If we are pinged, pong back to the server | |||
connection.send("PONG %s" % line[1]) | |||
if line[1] == "376": | |||
if NS_AUTH: # if we're supposed to auth to nickserv, do that | |||
connection.say("NickServ", "IDENTIFY %s %s" % (NS_USER, NS_PASS)) | |||
time.sleep(3) # sleep for a bit so we don't join channels un-authed | |||
for chan in CHANS: # join all of our startup channels | |||
connection.join(chan) |
@@ -1,57 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# A class to store data on an individual event received from our IRC watcher. | |||
import re | |||
class RC(object): | |||
def __init__(self, msg): | |||
"""store data on an individual event received from our IRC watcher""" | |||
self.msg = msg | |||
def parse(self): | |||
"""parse recent changes log into some variables""" | |||
msg = self.msg | |||
msg = re.sub("\x03([0-9]{1,2}(,[0-9]{1,2})?)?", "", msg) # strip IRC color codes; we don't want/need 'em | |||
msg = msg.strip() | |||
self.msg = msg | |||
self.is_edit = True | |||
# flags: 'M' for minor edit, 'B' for bot edit, 'create' for a user creation log entry... | |||
try: | |||
page, flags, url, user, comment = re.findall("\A\[\[(.*?)\]\]\s(.*?)\s(http://.*?)\s\*\s(.*?)\s\*\s(.*?)\Z", msg)[0] | |||
except IndexError: # we're probably missing the http:// part, because it's a log entry, which lacks a url | |||
page, flags, user, comment = re.findall("\A\[\[(.*?)\]\]\s(.*?)\s\*\s(.*?)\s\*\s(.*?)\Z", msg)[0] | |||
url = "http://en.wikipedia.org/wiki/{}".format(page) | |||
flags = flags.strip() # flag tends to have a extraneous whitespace character at the end when it's a log entry | |||
self.is_edit = False # this is a log entry, not edit | |||
self.page, self.flags, self.url, self.user, self.comment = page, flags, url, user, comment | |||
def get_pretty(self): | |||
"""make a nice, colorful message from self.msg to send to the front-end""" | |||
flags = self.flags | |||
event_type = flags # "New <event>:" if we don't know exactly what happened | |||
if "N" in flags: | |||
event_type = "page" # "New page:" | |||
elif flags == "delete": | |||
event_type = "deletion" # "New deletion:" | |||
elif flags == "protect": | |||
event_type = "protection" # "New protection:" | |||
elif flags == "create": | |||
event_type = "user" # "New user:" | |||
if self.page == "Special:Log/move": | |||
event_type = "move" # New move: | |||
else: | |||
event_type = "edit" # "New edit:" | |||
if "B" in flags: | |||
event_type = "bot {}".format(event_type) # "New bot edit:" | |||
if "M" in flags: | |||
event_type = "minor {}".format(event_type) # "New minor edit:" OR "New minor bot edit:" | |||
if self.is_edit: | |||
pretty = "\x02New {0}\x0F: \x0314[[\x0307{1}\x0314]]\x0306 *\x0303 {2}\x0306 *\x0302 {3}\x0306 *\x0310 {4}".format(event_type, self.page, self.user, self.url, self.comment) | |||
else: | |||
pretty = "\x02New {0}\x0F: \x0303{1}\x0306 *\x0302 {2}\x0306 *\x0310 {3}".format(event_type, self.user, self.url, self.comment) | |||
return pretty |