@@ -1,10 +1,6 @@ | |||||
# Ignore python bytecode: | |||||
*.pyc | *.pyc | ||||
# Ignore secure config files: | |||||
config/secure.py | |||||
# Ignore pydev's nonsense: | |||||
.project | |||||
.pydevproject | |||||
.settings/ | |||||
*.egg | |||||
*.egg-info | |||||
.DS_Store | |||||
build | |||||
docs/_build |
@@ -1,5 +1,4 @@ | |||||
Copyright (c) 2009-2011 Ben Kurtovic (The Earwig) | |||||
<http://en.wikipedia.org/wiki/User:The_Earwig> | |||||
Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy | Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
of this software and associated documentation files (the "Software"), to deal | of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,20 +0,0 @@ | |||||
EarwigBot[1] is a Python[2] robot that edits Wikipedia. | |||||
Development began, based on the Pywikipedia framework[3], in early 2009. | |||||
Approval for its fist task, a copyright violation detector[4], was carried out | |||||
in May, and the bot has been running consistently ever since (with the | |||||
exception of Jan/Feb 2011). It currently handles several ongoing tasks[5], | |||||
ranging from statistics generation to category cleanup, and on-demand tasks | |||||
such as WikiProject template tagging. Since it started running, the bot has | |||||
made over 45,000 edits. | |||||
A project to rewrite it from scratch began in early April 2011, thus moving | |||||
away from the Pywikipedia framework and allowing for less overall code, better | |||||
integration between bot parts, and easier maintenance. | |||||
Links: | |||||
[1] http://toolserver.org/~earwig/earwigbot/ | |||||
[2] http://python.org/ | |||||
[3] http://pywikipediabot.sourceforge.net/ | |||||
[4] http://en.wikipedia.org/wiki/Wikipedia:Bots/Requests_for_approval/EarwigBot_1 | |||||
[5] http://en.wikipedia.org/wiki/User:EarwigBot#Tasks |
@@ -0,0 +1,205 @@ | |||||
EarwigBot | |||||
========= | |||||
EarwigBot_ is a Python_ robot that edits Wikipedia_ and interacts with people | |||||
over IRC_. This file provides a basic overview of how to install and setup the | |||||
bot; more detailed information is located in the ``docs/`` directory (available | |||||
online at PyPI_). | |||||
History | |||||
------- | |||||
Development began, based on the `Pywikipedia framework`_, in early 2009. | |||||
Approval for its fist task, a `copyright violation detector`_, was carried out | |||||
in May, and the bot has been running consistently ever since (with the | |||||
exception of Jan/Feb 2011). It currently handles `several ongoing tasks`_ | |||||
ranging from statistics generation to category cleanup, and on-demand tasks | |||||
such as WikiProject template tagging. Since it started running, the bot has | |||||
made over 50,000 edits. | |||||
A project to rewrite it from scratch began in early April 2011, thus moving | |||||
away from the Pywikipedia framework and allowing for less overall code, better | |||||
integration between bot parts, and easier maintenance. | |||||
Installation | |||||
------------ | |||||
This package contains the core ``earwigbot``, abstracted enough that it should | |||||
be usable and customizable by anyone running a bot on a MediaWiki site. Since | |||||
it is component-based, the IRC components can be disabled if desired. IRC | |||||
commands and bot tasks specific to `my instance of EarwigBot`_ that I don't | |||||
feel the average user will need are available from the repository | |||||
`earwigbot-plugins`_. | |||||
It's recommended to run the bot's unit tests before installing. Run ``python | |||||
setup.py test`` from the project's root directory. Note that some | |||||
tests require an internet connection, and others may take a while to run. | |||||
Coverage is currently rather incomplete. | |||||
Latest release (v0.1) | |||||
~~~~~~~~~~~~~~~~~~~~~ | |||||
EarwigBot is available from the `Python Package Index`_, so you can install the | |||||
latest release with ``pip install earwigbot`` (`get pip`_). | |||||
You can also install it from source [1]_ directly:: | |||||
curl -Lo earwigbot.tgz https://github.com/earwig/earwigbot/tarball/v0.1 | |||||
tar -xf earwigbot.tgz | |||||
cd earwig-earwigbot-* | |||||
python setup.py install | |||||
cd .. | |||||
rm -r earwigbot.tgz earwig-earwigbot-* | |||||
Development version | |||||
~~~~~~~~~~~~~~~~~~~ | |||||
You can install the development version of the bot from ``git`` by using | |||||
setuptools/distribute's ``develop`` command [1]_, probably on the ``develop`` | |||||
branch which contains (usually) working code. ``master`` contains the latest | |||||
release. EarwigBot uses `git flow`_, so you're free to | |||||
browse by tags or by new features (``feature/*`` branches):: | |||||
git clone git://github.com/earwig/earwigbot.git earwigbot | |||||
cd earwigbot | |||||
python setup.py develop | |||||
Setup | |||||
----- | |||||
The bot stores its data in a "working directory", including its config file and | |||||
databases. This is also the location where you will place custom IRC commands | |||||
and bot tasks, which will be explained later. It doesn't matter where this | |||||
directory is, as long as the bot can write to it. | |||||
Start the bot with ``earwigbot path/to/working/dir``, or just ``earwigbot`` if | |||||
the working directory is the current directory. It will notice that no | |||||
``config.yml`` file exists and take you through the setup process. | |||||
There is currently no way to edit the ``config.yml`` file from within the bot | |||||
after it has been created, but YAML is a very straightforward format, so you | |||||
should be able to make any necessary changes yourself. Check out the | |||||
`explanation of YAML`_ on Wikipedia for help. | |||||
After setup, the bot will start. This means it will connect to the IRC servers | |||||
it has been configured for, schedule bot tasks to run at specific times, and | |||||
then wait for instructions (as commands on IRC). For a list of commands, say | |||||
"``!help``" (commands are messages prefixed with an exclamation mark). | |||||
You can stop the bot at any time with Control+C, same as you stop a normal | |||||
Python program, and it will try to exit safely. You can also use the | |||||
"``!quit``" command on IRC. | |||||
Customizing | |||||
----------- | |||||
The bot's working directory contains a ``commands`` subdirectory and a | |||||
``tasks`` subdirectory. Custom IRC commands can be placed in the former, | |||||
whereas custom wiki bot tasks go into the latter. Developing custom modules is | |||||
explained below, and in more detail through the bot's documentation on PyPI_ | |||||
(or in the ``docs/`` dir). | |||||
Note that custom commands will override built-in commands and tasks with the | |||||
same name. | |||||
``Bot`` and ``BotConfig`` | |||||
~~~~~~~~~~~~~~~~~~~~~~~~~ | |||||
`earwigbot.bot.Bot`_ is EarwigBot's main class. You don't have to instantiate | |||||
this yourself, but it's good to be familiar with its attributes and methods, | |||||
because it is the main way to communicate with other parts of the bot. A | |||||
``Bot`` object is accessible as an attribute of commands and tasks (i.e., | |||||
``self.bot``). | |||||
`earwigbot.config.BotConfig`_ stores configuration information for the bot. Its | |||||
docstring explains what each attribute is used for, but essentially each "node" | |||||
(one of ``config.components``, ``wiki``, ``irc``, ``commands``, ``tasks``, and | |||||
``metadata``) maps to a section of the bot's ``config.yml`` file. For example, | |||||
if ``config.yml`` includes something like:: | |||||
irc: | |||||
frontend: | |||||
nick: MyAwesomeBot | |||||
channels: | |||||
- "##earwigbot" | |||||
- "#channel" | |||||
- "#other-channel" | |||||
...then ``config.irc["frontend"]["nick"]`` will be ``"MyAwesomeBot"`` and | |||||
``config.irc["frontend"]["channels"]`` will be ``["##earwigbot", "#channel", | |||||
"#other-channel"]``. | |||||
Custom IRC commands | |||||
~~~~~~~~~~~~~~~~~~~ | |||||
Custom commands are subclasses of `earwigbot.commands.Command`_ that override | |||||
``Command``'s ``process()`` (and optionally ``check()`` or ``setup()``) | |||||
methods. | |||||
The bot has a wide selection of built-in commands and plugins to act as sample | |||||
code and/or to give ideas. Start with test_, and then check out chanops_ and | |||||
afc_status_ for some more complicated scripts. | |||||
Custom bot tasks | |||||
~~~~~~~~~~~~~~~~ | |||||
Custom tasks are subclasses of `earwigbot.tasks.Task`_ that override ``Task``'s | |||||
``run()`` (and optionally ``setup()``) methods. | |||||
See the built-in wikiproject_tagger_ task for a relatively straightforward | |||||
task, or the afc_statistics_ plugin for a more complicated one. | |||||
The Wiki Toolset | |||||
---------------- | |||||
EarwigBot's answer to the `Pywikipedia framework`_ is the Wiki Toolset | |||||
(``earwigbot.wiki``), which you will mainly access through ``bot.wiki``. | |||||
``bot.wiki`` provides three methods for the management of Sites - | |||||
``get_site()``, ``add_site()``, and ``remove_site()``. Sites are objects that | |||||
simply represent a MediaWiki site. A single instance of EarwigBot (i.e. a | |||||
single *working directory*) is expected to relate to a single site or group of | |||||
sites using the same login info (like all WMF wikis with CentralAuth). | |||||
Load your default site (the one that you picked during setup) with | |||||
``site = bot.wiki.get_site()``. | |||||
Not all aspects of the toolset are covered in the docs. Explore `its code and | |||||
docstrings`_ to learn how to use it in a more hands-on fashion. For reference, | |||||
``bot.wiki`` is an instance of ``earwigbot.wiki.SitesDB`` tied to the | |||||
``sites.db`` file in the bot's working directory. | |||||
Footnotes | |||||
--------- | |||||
- Questions, comments, or suggestions about the documentation? `Let me know`_ | |||||
so I can improve it for other people. | |||||
.. [1] ``python setup.py install``/``develop`` may require root, or use the | |||||
``--user`` switch to install for the current user only. | |||||
.. _EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot | |||||
.. _Python: http://python.org/ | |||||
.. _Wikipedia: http://en.wikipedia.org/ | |||||
.. _IRC: http://en.wikipedia.org/wiki/Internet_Relay_Chat | |||||
.. _PyPI: http://packages.python.org/earwigbot | |||||
.. _Pywikipedia framework: http://pywikipediabot.sourceforge.net/ | |||||
.. _copyright violation detector: http://en.wikipedia.org/wiki/Wikipedia:Bots/Requests_for_approval/EarwigBot_1 | |||||
.. _several ongoing tasks: http://en.wikipedia.org/wiki/User:EarwigBot#Tasks | |||||
.. _my instance of EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot | |||||
.. _earwigbot-plugins: https://github.com/earwig/earwigbot-plugins | |||||
.. _Python Package Index: http://pypi.python.org | |||||
.. _get pip: http://pypi.python.org/pypi/pip | |||||
.. _git flow: http://nvie.com/posts/a-successful-git-branching-model/ | |||||
.. _explanation of YAML: http://en.wikipedia.org/wiki/YAML | |||||
.. _earwigbot.bot.Bot: https://github.com/earwig/earwigbot/blob/develop/earwigbot/bot.py | |||||
.. _earwigbot.config.BotConfig: https://github.com/earwig/earwigbot/blob/develop/earwigbot/config.py | |||||
.. _earwigbot.commands.Command: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/__init__.py | |||||
.. _test: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/test.py | |||||
.. _chanops: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/chanops.py | |||||
.. _afc_status: https://github.com/earwig/earwigbot-plugins/blob/develop/commands/afc_status.py | |||||
.. _earwigbot.tasks.Task: https://github.com/earwig/earwigbot/blob/develop/earwigbot/tasks/__init__.py | |||||
.. _wikiproject_tagger: https://github.com/earwig/earwigbot/blob/develop/earwigbot/tasks/wikiproject_tagger.py | |||||
.. _afc_statistics: https://github.com/earwig/earwigbot-plugins/blob/develop/tasks/afc_statistics.py | |||||
.. _its code and docstrings: https://github.com/earwig/earwigbot/tree/develop/earwigbot/wiki | |||||
.. _Let me know: ben.kurtovic@verizon.net |
@@ -1,25 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# EarwigBot Configuration File | |||||
# This file contains information that the bot uses to connect to IRC. | |||||
# our main (front-end) server's hostname and port | |||||
HOST = "irc.freenode.net" | |||||
PORT = 6667 | |||||
# our watcher server's hostname, port, and RC channel | |||||
WATCHER_HOST = "irc.wikimedia.org" | |||||
WATCHER_PORT = 6667 | |||||
WATCHER_CHAN = "#en.wikipedia" | |||||
# our nick, ident, and real name, used on both servers | |||||
NICK = "EarwigBot" | |||||
IDENT = "earwigbot" | |||||
REALNAME = "[[w:en:User:EarwigBot]]" | |||||
# channels to join on main server's startup | |||||
CHANS = ["##earwigbot", "##earwig", "#wikipedia-en-afc"] | |||||
# hardcoded hostnames of users with certain permissions | |||||
OWNERS = ["wikipedia/The-Earwig"] # can use owner-only commands (!restart and !git) | |||||
ADMINS = ["wikipedia/The-Earwig", "wikipedia/LeonardBloom"] # can use high-risk commands, e.g. !op |
@@ -1,24 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# EarwigBot Configuration File | |||||
# This file tells the bot which of its components should be enabled. | |||||
# The IRC frontend (configured in config/irc.py) sits on a public IRC network, | |||||
# responds to commands given to it, and reports edits (if the IRC watcher | |||||
# component is enabled). | |||||
enable_irc_frontend = True | |||||
# The IRC watcher (connection details configured in config/irc.py as well) sits | |||||
# on an IRC network that gives a recent changes feed, usually irc.wikimedia.net. | |||||
# It looks for edits matching certain (often regex) patterns (rules configured | |||||
# in config/watcher.py), and either reports them to the IRC frontend (if | |||||
# enabled), or activates a task on the WikiBot (if configured to do). | |||||
enable_irc_watcher = True | |||||
# EarwigBot doesn't have to edit a wiki, although this is its main purpose. If | |||||
# the wiki schedule is disabled, it will not be able to handle scheduled tasks | |||||
# that involve editing (such as creating a daily category every day at midnight | |||||
# UTC), but it can still edit through rules given in the watcher, and bot tasks | |||||
# can still be activated by the command line. The schedule is configured in | |||||
# config/schedule.py. | |||||
enable_wiki_schedule = True |
@@ -1,28 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# EarwigBot Configuration File | |||||
# This file tells the bot when to run certain wiki-editing tasks. | |||||
def check(minute, hour, month_day, month, week_day): | |||||
tasks = [] # tasks to run this turn, each as a tuple of (task_name, kwargs) or just task_name | |||||
if minute == 0: # run every hour on the hour | |||||
tasks.append(("afc_statistics", {"action": "save"})) # save statistics to [[Template:AFC_statistics]] | |||||
if hour == 0: # run every day at midnight | |||||
tasks.append("afc_dailycats") # create daily categories for WP:AFC | |||||
tasks.append("feed_dailycats") # create daily categories for WP:FEED | |||||
if week_day == 0: # run every Sunday at midnight (that is, the start of Sunday, not the end) | |||||
tasks.append("afc_undated") # clear [[Category:Undated AfC submissions]] | |||||
if week_day == 1: # run every Monday at midnight | |||||
tasks.append("afc_catdelink") # delink mainspace categories in declined AfC submissions | |||||
if week_day == 2: # run every Tuesday at midnight | |||||
tasks.append("wrongmime") # tag files whose extensions do not agree with their MIME type | |||||
if week_day == 3: # run every Wednesday at midnight | |||||
tasks.append("blptag") # add |blp=yes to {{WPB}} or {{WPBS}} when it is used along with {{WP Biography}} | |||||
return tasks |
@@ -1,9 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# EarwigBot Configuration File | |||||
# This file contains information that should be kept hidden, including passwords. | |||||
# IRC: identify ourselves to NickServ? | |||||
NS_AUTH = False | |||||
NS_USER = "" | |||||
NS_PASS = "" |
@@ -1,69 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# EarwigBot Configuration File | |||||
# This file contains rules for the bot's watcher component. | |||||
import re | |||||
from wiki import task_manager | |||||
# Define different report channels on our front-end server. They /must/ be in CHANS in config/irc.py or the bot will not be able to send messages to them (unless they have -n set). | |||||
AFC_CHANS = ["#wikipedia-en-afc"] # report recent AfC changes/give AfC status messages upon join | |||||
BOT_CHANS = ["##earwigbot", "#wikipedia-en-afc"] # report edits containing "!earwigbot" | |||||
# Define some commonly used strings. | |||||
afc_prefix = "wikipedia( talk)?:(wikiproject )?articles for creation" | |||||
# Define our compiled regexps used when finding certain edits. | |||||
r_page = re.compile(afc_prefix) | |||||
r_ffu = re.compile("wikipedia( talk)?:files for upload") | |||||
r_move1 = re.compile("moved \[\[{}".format(afc_prefix)) # an AFC page was either moved locally or out | |||||
r_move2 = re.compile("moved \[\[(.*?)\]\] to \[\[{}".format(afc_prefix)) # an outside page was moved into AFC | |||||
r_moved_pages = re.compile("^moved \[\[(.*?)\]\] to \[\[(.*?)\]\]") | |||||
r_delete = re.compile("deleted \"\[\[{}".format(afc_prefix)) | |||||
r_deleted_page = re.compile("^deleted \"\[\[(.*?)\]\]") | |||||
r_restore = re.compile("restored \"\[\[{}".format(afc_prefix)) | |||||
r_restored_page = re.compile("^restored \"\[\[(.*?)\]\]") | |||||
r_protect = re.compile("protected \"\[\[{}".format(afc_prefix)) | |||||
def process(rc): | |||||
chans = set() # channels to report this message to | |||||
page_name = rc.page.lower() | |||||
comment = rc.comment.lower() | |||||
if "!earwigbot" in rc.msg.lower(): | |||||
chans.update(BOT_CHANS) | |||||
if r_page.search(page_name): | |||||
task_manager.start_task("afc_statistics", action="process_edit", page=rc.page) | |||||
task_manager.start_task("afc_copyvios", action="process_edit", page=rc.page) | |||||
chans.update(AFC_CHANS) | |||||
elif r_ffu.match(page_name): | |||||
chans.update(AFC_CHANS) | |||||
elif page_name.startswith("template:afc submission"): | |||||
chans.update(AFC_CHANS) | |||||
elif rc.flags == "move" and (r_move1.match(comment) or r_move2.match(comment)): | |||||
p = r_moved_pages.findall(rc.comment)[0] | |||||
task_manager.start_task("afc_statistics", action="process_move", pages=p) | |||||
task_manager.start_task("afc_copyvios", action="process_move", pages=p) | |||||
chans.update(AFC_CHANS) | |||||
elif rc.flags == "delete" and r_delete.match(comment): | |||||
p = r_deleted_page.findall(rc.comment)[0][0] | |||||
task_manager.start_task("afc_statistics", action="process_delete", page=p) | |||||
task_manager.start_task("afc_copyvios", action="process_delete", page=p) | |||||
chans.update(AFC_CHANS) | |||||
elif rc.flags == "restore" and r_restore.match(comment): | |||||
p = r_restored_page.findall(rc.comment)[0][0] | |||||
task_manager.start_task("afc_statistics", action="process_restore", page=p) | |||||
task_manager.start_task("afc_copyvios", action="process_restore", page=p) | |||||
chans.update(AFC_CHANS) | |||||
elif rc.flags == "protect" and r_protect.match(comment): | |||||
chans.update(AFC_CHANS) | |||||
return chans |
@@ -1,122 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
## EarwigBot's Core | |||||
## EarwigBot has three components that can run independently of each other: an | |||||
## IRC front-end, an IRC watcher, and a wiki scheduler. | |||||
## * The IRC front-end runs on a normal IRC server and expects users to | |||||
## interact with it/give it commands. | |||||
## * The IRC watcher runs on a wiki recent-changes server and listens for | |||||
## edits. Users cannot interact with this part of the bot. | |||||
## * The wiki scheduler runs wiki-editing bot tasks in separate threads at | |||||
## user-defined times through a cron-like interface. | |||||
## There is a "priority" system here: | |||||
## 1. If the IRC frontend is enabled, it will run on the main thread, and the | |||||
## IRC watcher and wiki scheduler (if enabled) will run on separate threads. | |||||
## 2. If the wiki scheduler is enabled, it will run on the main thread, and the | |||||
## IRC watcher (if enabled) will run on a separate thread. | |||||
## 3. If the IRC watcher is enabled, it will run on the main (and only) thread. | |||||
## Else, the bot will stop, as no components are enabled. | |||||
import threading | |||||
import time | |||||
import traceback | |||||
import sys | |||||
import os | |||||
parent_dir = os.path.split(sys.path[0])[0] | |||||
sys.path.append(parent_dir) # make sure we look in the parent directory for modules | |||||
from config.main import * | |||||
from irc import frontend, watcher | |||||
from wiki import task_manager | |||||
f_conn = None | |||||
w_conn = None | |||||
def irc_watcher(f_conn): | |||||
"""Function to handle the IRC watcher as another thread (if frontend and/or | |||||
scheduler is enabled), otherwise run as the main thread.""" | |||||
global w_conn | |||||
print "\nStarting IRC watcher..." | |||||
while 1: # restart the watcher component if (just) it breaks | |||||
w_conn = watcher.get_connection() | |||||
w_conn.connect() | |||||
print # print a blank line here to signify that the bot has finished starting up | |||||
try: | |||||
watcher.main(w_conn, f_conn) | |||||
except: | |||||
traceback.print_exc() | |||||
time.sleep(5) # sleep a bit before restarting watcher | |||||
print "\nWatcher has stopped; restarting component..." | |||||
def wiki_scheduler(): | |||||
"""Function to handle the wiki scheduler as another thread, or as the | |||||
primary thread if the IRC frontend is not enabled.""" | |||||
while 1: | |||||
time_start = time.time() | |||||
now = time.gmtime(time_start) | |||||
task_manager.start_tasks(now) | |||||
time_end = time.time() | |||||
time_diff = time_start - time_end | |||||
if time_diff < 60: # sleep until the next minute | |||||
time.sleep(60 - time_diff) | |||||
def irc_frontend(): | |||||
"""If the IRC frontend is enabled, make it run on our primary thread, and | |||||
enable the wiki scheduler and IRC watcher on new threads if they are | |||||
enabled.""" | |||||
global f_conn | |||||
print "\nStarting IRC frontend..." | |||||
f_conn = frontend.get_connection() | |||||
frontend.startup(f_conn) | |||||
if enable_wiki_schedule: | |||||
print "\nStarting wiki scheduler..." | |||||
task_manager.load_tasks() | |||||
t_scheduler = threading.Thread(target=wiki_scheduler) | |||||
t_scheduler.name = "wiki-scheduler" | |||||
t_scheduler.daemon = True | |||||
t_scheduler.start() | |||||
if enable_irc_watcher: | |||||
t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,)) | |||||
t_watcher.name = "irc-watcher" | |||||
t_watcher.daemon = True | |||||
t_watcher.start() | |||||
frontend.main() | |||||
if enable_irc_watcher: | |||||
w_conn.close() | |||||
f_conn.close() | |||||
def run(): | |||||
if enable_irc_frontend: # make the frontend run on our primary thread if enabled, and enable additional components through that function | |||||
irc_frontend() | |||||
elif enable_wiki_schedule: # the scheduler is enabled - run it on the main thread, but also run the IRC watcher on another thread if it is enabled | |||||
print "\nStarting wiki scheduler..." | |||||
task_manager.load_tasks() | |||||
if enable_irc_watcher: | |||||
t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,)) | |||||
t_watcher.name = "irc-watcher" | |||||
t_watcher.daemon = True | |||||
t_watcher.start() | |||||
wiki_scheduler() | |||||
elif enable_irc_watcher: # the IRC watcher is our only enabled component, so run its function only and don't worry about anything else | |||||
irc_watcher() | |||||
else: # nothing is enabled! | |||||
exit("\nNo bot parts are enabled; stopping...") | |||||
if __name__ == "__main__": | |||||
try: | |||||
run() | |||||
except KeyboardInterrupt: | |||||
exit("\nKeyboardInterrupt: stopping main bot loop.") |
@@ -0,0 +1,153 @@ | |||||
# Makefile for Sphinx documentation | |||||
# | |||||
# You can set these variables from the command line. | |||||
SPHINXOPTS = | |||||
SPHINXBUILD = sphinx-build | |||||
PAPER = | |||||
BUILDDIR = _build | |||||
# Internal variables. | |||||
PAPEROPT_a4 = -D latex_paper_size=a4 | |||||
PAPEROPT_letter = -D latex_paper_size=letter | |||||
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . | |||||
# the i18n builder cannot share the environment and doctrees with the others | |||||
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . | |||||
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext | |||||
help: | |||||
@echo "Please use \`make <target>' where <target> is one of" | |||||
@echo " html to make standalone HTML files" | |||||
@echo " dirhtml to make HTML files named index.html in directories" | |||||
@echo " singlehtml to make a single large HTML file" | |||||
@echo " pickle to make pickle files" | |||||
@echo " json to make JSON files" | |||||
@echo " htmlhelp to make HTML files and a HTML help project" | |||||
@echo " qthelp to make HTML files and a qthelp project" | |||||
@echo " devhelp to make HTML files and a Devhelp project" | |||||
@echo " epub to make an epub" | |||||
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" | |||||
@echo " latexpdf to make LaTeX files and run them through pdflatex" | |||||
@echo " text to make text files" | |||||
@echo " man to make manual pages" | |||||
@echo " texinfo to make Texinfo files" | |||||
@echo " info to make Texinfo files and run them through makeinfo" | |||||
@echo " gettext to make PO message catalogs" | |||||
@echo " changes to make an overview of all changed/added/deprecated items" | |||||
@echo " linkcheck to check all external links for integrity" | |||||
@echo " doctest to run all doctests embedded in the documentation (if enabled)" | |||||
clean: | |||||
-rm -rf $(BUILDDIR)/* | |||||
html: | |||||
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html | |||||
@echo | |||||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html." | |||||
dirhtml: | |||||
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml | |||||
@echo | |||||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." | |||||
singlehtml: | |||||
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml | |||||
@echo | |||||
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." | |||||
pickle: | |||||
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle | |||||
@echo | |||||
@echo "Build finished; now you can process the pickle files." | |||||
json: | |||||
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json | |||||
@echo | |||||
@echo "Build finished; now you can process the JSON files." | |||||
htmlhelp: | |||||
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp | |||||
@echo | |||||
@echo "Build finished; now you can run HTML Help Workshop with the" \ | |||||
".hhp project file in $(BUILDDIR)/htmlhelp." | |||||
qthelp: | |||||
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp | |||||
@echo | |||||
@echo "Build finished; now you can run "qcollectiongenerator" with the" \ | |||||
".qhcp project file in $(BUILDDIR)/qthelp, like this:" | |||||
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/EarwigBot.qhcp" | |||||
@echo "To view the help file:" | |||||
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/EarwigBot.qhc" | |||||
devhelp: | |||||
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp | |||||
@echo | |||||
@echo "Build finished." | |||||
@echo "To view the help file:" | |||||
@echo "# mkdir -p $$HOME/.local/share/devhelp/EarwigBot" | |||||
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/EarwigBot" | |||||
@echo "# devhelp" | |||||
epub: | |||||
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub | |||||
@echo | |||||
@echo "Build finished. The epub file is in $(BUILDDIR)/epub." | |||||
latex: | |||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex | |||||
@echo | |||||
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." | |||||
@echo "Run \`make' in that directory to run these through (pdf)latex" \ | |||||
"(use \`make latexpdf' here to do that automatically)." | |||||
latexpdf: | |||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex | |||||
@echo "Running LaTeX files through pdflatex..." | |||||
$(MAKE) -C $(BUILDDIR)/latex all-pdf | |||||
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." | |||||
text: | |||||
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text | |||||
@echo | |||||
@echo "Build finished. The text files are in $(BUILDDIR)/text." | |||||
man: | |||||
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man | |||||
@echo | |||||
@echo "Build finished. The manual pages are in $(BUILDDIR)/man." | |||||
texinfo: | |||||
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo | |||||
@echo | |||||
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." | |||||
@echo "Run \`make' in that directory to run these through makeinfo" \ | |||||
"(use \`make info' here to do that automatically)." | |||||
info: | |||||
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo | |||||
@echo "Running Texinfo files through makeinfo..." | |||||
make -C $(BUILDDIR)/texinfo info | |||||
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." | |||||
gettext: | |||||
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale | |||||
@echo | |||||
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." | |||||
changes: | |||||
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes | |||||
@echo | |||||
@echo "The overview file is in $(BUILDDIR)/changes." | |||||
linkcheck: | |||||
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck | |||||
@echo | |||||
@echo "Link check complete; look for any errors in the above output " \ | |||||
"or in $(BUILDDIR)/linkcheck/output.txt." | |||||
doctest: | |||||
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest | |||||
@echo "Testing of doctests in the sources finished, look at the " \ | |||||
"results in $(BUILDDIR)/doctest/output.txt." |
@@ -0,0 +1,9 @@ | |||||
commands Package | |||||
================ | |||||
:mod:`commands` Package | |||||
----------------------- | |||||
.. automodule:: earwigbot.commands | |||||
:members: | |||||
:undoc-members: |
@@ -0,0 +1,46 @@ | |||||
config Package | |||||
============== | |||||
:mod:`config` Package | |||||
--------------------- | |||||
.. automodule:: earwigbot.config | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`formatter` Module | |||||
----------------------- | |||||
.. automodule:: earwigbot.config.formatter | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:mod:`node` Module | |||||
------------------ | |||||
.. automodule:: earwigbot.config.node | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`ordered_yaml` Module | |||||
-------------------------- | |||||
.. automodule:: earwigbot.config.ordered_yaml | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:mod:`permissions` Module | |||||
------------------------- | |||||
.. automodule:: earwigbot.config.permissions | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`script` Module | |||||
-------------------- | |||||
.. automodule:: earwigbot.config.script | |||||
:members: | |||||
:undoc-members: |
@@ -0,0 +1,46 @@ | |||||
irc Package | |||||
=========== | |||||
:mod:`irc` Package | |||||
------------------ | |||||
.. automodule:: earwigbot.irc | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`connection` Module | |||||
------------------------ | |||||
.. automodule:: earwigbot.irc.connection | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`data` Module | |||||
------------------ | |||||
.. automodule:: earwigbot.irc.data | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`frontend` Module | |||||
---------------------- | |||||
.. automodule:: earwigbot.irc.frontend | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:mod:`rc` Module | |||||
---------------- | |||||
.. automodule:: earwigbot.irc.rc | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`watcher` Module | |||||
--------------------- | |||||
.. automodule:: earwigbot.irc.watcher | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -0,0 +1,57 @@ | |||||
earwigbot Package | |||||
================= | |||||
:mod:`earwigbot` Package | |||||
------------------------ | |||||
.. automodule:: earwigbot.__init__ | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`bot` Module | |||||
----------------- | |||||
.. automodule:: earwigbot.bot | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`exceptions` Module | |||||
------------------------ | |||||
.. automodule:: earwigbot.exceptions | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:mod:`lazy` Module | |||||
------------------ | |||||
.. automodule:: earwigbot.lazy | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`managers` Module | |||||
---------------------- | |||||
.. automodule:: earwigbot.managers | |||||
:members: _ResourceManager, CommandManager, TaskManager | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:mod:`util` Module | |||||
------------------ | |||||
.. automodule:: earwigbot.util | |||||
:members: | |||||
:undoc-members: | |||||
Subpackages | |||||
----------- | |||||
.. toctree:: | |||||
earwigbot.commands | |||||
earwigbot.config | |||||
earwigbot.irc | |||||
earwigbot.tasks | |||||
earwigbot.wiki |
@@ -0,0 +1,16 @@ | |||||
tasks Package | |||||
============= | |||||
:mod:`tasks` Package | |||||
-------------------- | |||||
.. automodule:: earwigbot.tasks | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`wikiproject_tagger` Module | |||||
-------------------------------- | |||||
.. automodule:: earwigbot.tasks.wikiproject_tagger | |||||
:members: | |||||
:show-inheritance: |
@@ -0,0 +1,47 @@ | |||||
copyvios Package | |||||
================ | |||||
:mod:`copyvios` Package | |||||
----------------------- | |||||
.. automodule:: earwigbot.wiki.copyvios | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`exclusions` Module | |||||
------------------------ | |||||
.. automodule:: earwigbot.wiki.copyvios.exclusions | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`markov` Module | |||||
-------------------- | |||||
.. automodule:: earwigbot.wiki.copyvios.markov | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:mod:`parsers` Module | |||||
--------------------- | |||||
.. automodule:: earwigbot.wiki.copyvios.parsers | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:mod:`result` Module | |||||
-------------------- | |||||
.. automodule:: earwigbot.wiki.copyvios.result | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`search` Module | |||||
-------------------- | |||||
.. automodule:: earwigbot.wiki.copyvios.search | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: |
@@ -0,0 +1,59 @@ | |||||
wiki Package | |||||
============ | |||||
:mod:`wiki` Package | |||||
------------------- | |||||
.. automodule:: earwigbot.wiki | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`category` Module | |||||
---------------------- | |||||
.. automodule:: earwigbot.wiki.category | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`constants` Module | |||||
----------------------- | |||||
.. automodule:: earwigbot.wiki.constants | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`page` Module | |||||
------------------ | |||||
.. automodule:: earwigbot.wiki.page | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:mod:`site` Module | |||||
------------------ | |||||
.. automodule:: earwigbot.wiki.site | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`sitesdb` Module | |||||
--------------------- | |||||
.. automodule:: earwigbot.wiki.sitesdb | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`user` Module | |||||
------------------ | |||||
.. automodule:: earwigbot.wiki.user | |||||
:members: | |||||
:undoc-members: | |||||
Subpackages | |||||
----------- | |||||
.. toctree:: | |||||
earwigbot.wiki.copyvios |
@@ -0,0 +1,7 @@ | |||||
earwigbot | |||||
========= | |||||
.. toctree:: | |||||
:maxdepth: 6 | |||||
earwigbot |
@@ -0,0 +1,242 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# EarwigBot documentation build configuration file, created by | |||||
# sphinx-quickstart on Sun Apr 29 01:42:25 2012. | |||||
# | |||||
# This file is execfile()d with the current directory set to its containing dir. | |||||
# | |||||
# Note that not all possible configuration values are present in this | |||||
# autogenerated file. | |||||
# | |||||
# All configuration values have a default; values that are commented out | |||||
# serve to show the default. | |||||
import sys, os | |||||
# If extensions (or modules to document with autodoc) are in another directory, | |||||
# add these directories to sys.path here. If the directory is relative to the | |||||
# documentation root, use os.path.abspath to make it absolute, like shown here. | |||||
sys.path.insert(0, os.path.abspath('..')) | |||||
# -- General configuration ----------------------------------------------------- | |||||
# If your documentation needs a minimal Sphinx version, state it here. | |||||
#needs_sphinx = '1.0' | |||||
# Add any Sphinx extension module names here, as strings. They can be extensions | |||||
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. | |||||
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.viewcode'] | |||||
# Add any paths that contain templates here, relative to this directory. | |||||
templates_path = ['_templates'] | |||||
# The suffix of source filenames. | |||||
source_suffix = '.rst' | |||||
# The encoding of source files. | |||||
#source_encoding = 'utf-8-sig' | |||||
# The master toctree document. | |||||
master_doc = 'index' | |||||
# General information about the project. | |||||
project = u'EarwigBot' | |||||
copyright = u'2009, 2010, 2011, 2012 Ben Kurtovic' | |||||
# The version info for the project you're documenting, acts as replacement for | |||||
# |version| and |release|, also used in various other places throughout the | |||||
# built documents. | |||||
# | |||||
# The short X.Y version. | |||||
version = '0.1' | |||||
# The full version, including alpha/beta/rc tags. | |||||
release = '0.1' | |||||
# The language for content autogenerated by Sphinx. Refer to documentation | |||||
# for a list of supported languages. | |||||
#language = None | |||||
# There are two options for replacing |today|: either, you set today to some | |||||
# non-false value, then it is used: | |||||
#today = '' | |||||
# Else, today_fmt is used as the format for a strftime call. | |||||
#today_fmt = '%B %d, %Y' | |||||
# List of patterns, relative to source directory, that match files and | |||||
# directories to ignore when looking for source files. | |||||
exclude_patterns = ['_build'] | |||||
# The reST default role (used for this markup: `text`) to use for all documents. | |||||
#default_role = None | |||||
# If true, '()' will be appended to :func: etc. cross-reference text. | |||||
#add_function_parentheses = True | |||||
# If true, the current module name will be prepended to all description | |||||
# unit titles (such as .. function::). | |||||
#add_module_names = True | |||||
# If true, sectionauthor and moduleauthor directives will be shown in the | |||||
# output. They are ignored by default. | |||||
#show_authors = False | |||||
# The name of the Pygments (syntax highlighting) style to use. | |||||
pygments_style = 'sphinx' | |||||
# A list of ignored prefixes for module index sorting. | |||||
#modindex_common_prefix = [] | |||||
# -- Options for HTML output --------------------------------------------------- | |||||
# The theme to use for HTML and HTML Help pages. See the documentation for | |||||
# a list of builtin themes. | |||||
html_theme = 'nature' | |||||
# Theme options are theme-specific and customize the look and feel of a theme | |||||
# further. For a list of options available for each theme, see the | |||||
# documentation. | |||||
#html_theme_options = {} | |||||
# Add any paths that contain custom themes here, relative to this directory. | |||||
#html_theme_path = [] | |||||
# The name for this set of Sphinx documents. If None, it defaults to | |||||
# "<project> v<release> documentation". | |||||
#html_title = None | |||||
# A shorter title for the navigation bar. Default is the same as html_title. | |||||
#html_short_title = None | |||||
# The name of an image file (relative to this directory) to place at the top | |||||
# of the sidebar. | |||||
#html_logo = None | |||||
# The name of an image file (within the static path) to use as favicon of the | |||||
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 | |||||
# pixels large. | |||||
#html_favicon = None | |||||
# Add any paths that contain custom static files (such as style sheets) here, | |||||
# relative to this directory. They are copied after the builtin static files, | |||||
# so a file named "default.css" will overwrite the builtin "default.css". | |||||
html_static_path = ['_static'] | |||||
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, | |||||
# using the given strftime format. | |||||
#html_last_updated_fmt = '%b %d, %Y' | |||||
# If true, SmartyPants will be used to convert quotes and dashes to | |||||
# typographically correct entities. | |||||
#html_use_smartypants = True | |||||
# Custom sidebar templates, maps document names to template names. | |||||
#html_sidebars = {} | |||||
# Additional templates that should be rendered to pages, maps page names to | |||||
# template names. | |||||
#html_additional_pages = {} | |||||
# If false, no module index is generated. | |||||
#html_domain_indices = True | |||||
# If false, no index is generated. | |||||
#html_use_index = True | |||||
# If true, the index is split into individual pages for each letter. | |||||
#html_split_index = False | |||||
# If true, links to the reST sources are added to the pages. | |||||
#html_show_sourcelink = True | |||||
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. | |||||
#html_show_sphinx = True | |||||
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. | |||||
#html_show_copyright = True | |||||
# If true, an OpenSearch description file will be output, and all pages will | |||||
# contain a <link> tag referring to it. The value of this option must be the | |||||
# base URL from which the finished HTML is served. | |||||
#html_use_opensearch = '' | |||||
# This is the file name suffix for HTML files (e.g. ".xhtml"). | |||||
#html_file_suffix = None | |||||
# Output file base name for HTML help builder. | |||||
htmlhelp_basename = 'EarwigBotdoc' | |||||
# -- Options for LaTeX output -------------------------------------------------- | |||||
latex_elements = { | |||||
# The paper size ('letterpaper' or 'a4paper'). | |||||
#'papersize': 'letterpaper', | |||||
# The font size ('10pt', '11pt' or '12pt'). | |||||
#'pointsize': '10pt', | |||||
# Additional stuff for the LaTeX preamble. | |||||
#'preamble': '', | |||||
} | |||||
# Grouping the document tree into LaTeX files. List of tuples | |||||
# (source start file, target name, title, author, documentclass [howto/manual]). | |||||
latex_documents = [ | |||||
('index', 'EarwigBot.tex', u'EarwigBot Documentation', | |||||
u'Ben Kurtovic', 'manual'), | |||||
] | |||||
# The name of an image file (relative to this directory) to place at the top of | |||||
# the title page. | |||||
#latex_logo = None | |||||
# For "manual" documents, if this is true, then toplevel headings are parts, | |||||
# not chapters. | |||||
#latex_use_parts = False | |||||
# If true, show page references after internal links. | |||||
#latex_show_pagerefs = False | |||||
# If true, show URL addresses after external links. | |||||
#latex_show_urls = False | |||||
# Documents to append as an appendix to all manuals. | |||||
#latex_appendices = [] | |||||
# If false, no module index is generated. | |||||
#latex_domain_indices = True | |||||
# -- Options for manual page output -------------------------------------------- | |||||
# One entry per manual page. List of tuples | |||||
# (source start file, name, description, authors, manual section). | |||||
man_pages = [ | |||||
('index', 'earwigbot', u'EarwigBot Documentation', | |||||
[u'Ben Kurtovic'], 1) | |||||
] | |||||
# If true, show URL addresses after external links. | |||||
#man_show_urls = False | |||||
# -- Options for Texinfo output ------------------------------------------------ | |||||
# Grouping the document tree into Texinfo files. List of tuples | |||||
# (source start file, target name, title, author, | |||||
# dir menu entry, description, category) | |||||
texinfo_documents = [ | |||||
('index', 'EarwigBot', u'EarwigBot Documentation', | |||||
u'Ben Kurtovic', 'EarwigBot', 'One line description of project.', | |||||
'Miscellaneous'), | |||||
] | |||||
# Documents to append as an appendix to all manuals. | |||||
#texinfo_appendices = [] | |||||
# If false, no module index is generated. | |||||
#texinfo_domain_indices = True | |||||
# How to display URL addresses: 'footnote', 'no', or 'inline'. | |||||
#texinfo_show_urls = 'footnote' |
@@ -0,0 +1,240 @@ | |||||
Customizing | |||||
=========== | |||||
The bot's working directory contains a :file:`commands` subdirectory and a | |||||
:file:`tasks` subdirectory. Custom IRC commands can be placed in the former, | |||||
whereas custom wiki bot tasks go into the latter. Developing custom modules is | |||||
explained in detail in this documentation. | |||||
Note that custom commands will override built-in commands and tasks with the | |||||
same name. | |||||
:py:class:`~earwigbot.bot.Bot` and :py:class:`~earwigbot.bot.BotConfig` | |||||
----------------------------------------------------------------------- | |||||
:py:class:`earwigbot.bot.Bot` is EarwigBot's main class. You don't have to | |||||
instantiate this yourself, but it's good to be familiar with its attributes and | |||||
methods, because it is the main way to communicate with other parts of the bot. | |||||
A :py:class:`~earwigbot.bot.Bot` object is accessible as an attribute of | |||||
commands and tasks (i.e., :py:attr:`self.bot`). | |||||
The most useful attributes are: | |||||
- :py:attr:`~earwigbot.bot.Bot.config`: an instance of | |||||
:py:class:`~earwigbot.config.BotConfig`, for accessing the bot's | |||||
configuration data (see below). | |||||
- :py:attr:`~earwigbot.bot.Bot.commands`: the bot's | |||||
:py:class:`~earwigbot.managers.CommandManager`, which is used internally to | |||||
run IRC commands (through | |||||
:py:meth:`commands.call() <earwigbot.managers.CommandManager.call>`, which | |||||
you shouldn't have to use); you can safely reload all commands with | |||||
:py:meth:`commands.load() <earwigbot.managers._ResourceManager.load>`. | |||||
- :py:attr:`~earwigbot.bot.Bot.tasks`: the bot's | |||||
:py:class:`~earwigbot.managers.TaskManager`, which can be used to start tasks | |||||
with :py:meth:`tasks.start(task_name, **kwargs) | |||||
<earwigbot.managers.TaskManager.start>`. :py:meth:`tasks.load() | |||||
<earwigbot.managers._ResourceManager.load>` can be used to safely reload all | |||||
tasks. | |||||
- :py:attr:`~earwigbot.bot.Bot.frontend` / | |||||
:py:attr:`~earwigbot.bot.Bot.watcher`: instances of | |||||
:py:class:`earwigbot.irc.Frontend <earwigbot.irc.frontend.Frontend>` and | |||||
:py:class:`earwigbot.irc.Watcher <earwigbot.irc.watcher.Watcher>`, | |||||
respectively, which represent the bot's connections to these two servers; you | |||||
can, for example, send a message to the frontend with | |||||
:py:meth:`frontend.say(chan, msg) | |||||
<earwigbot.irc.connection.IRCConnection.say>` (more on communicating with IRC | |||||
below). | |||||
- :py:attr:`~earwigbot.bot.Bot.wiki`: interface with the | |||||
:doc:`Wiki Toolset <toolset>`. | |||||
- Finally, :py:meth:`~earwigbot.bot.Bot.restart` (restarts IRC components and | |||||
reloads config, commands, and tasks) and :py:meth:`~earwigbot.bot.Bot.stop` | |||||
can be used almost anywhere. Both take an optional "reason" that will be | |||||
logged and used as the quit message when disconnecting from IRC. | |||||
:py:class:`earwigbot.config.BotConfig` stores configuration information for the | |||||
bot. Its docstrings explains what each attribute is used for, but essentially | |||||
each "node" (one of :py:attr:`config.components | |||||
<earwigbot.config.BotConfig.components>`, | |||||
:py:attr:`~earwigbot.config.BotConfig.wiki`, | |||||
:py:attr:`~earwigbot.config.BotConfig.irc`, | |||||
:py:attr:`~earwigbot.config.BotConfig.commands`, | |||||
:py:attr:`~earwigbot.config.BotConfig.tasks`, or | |||||
:py:attr:`~earwigbot.config.BotConfig.metadata`) maps to a section | |||||
of the bot's :file:`config.yml` file. For example, if :file:`config.yml` | |||||
includes something like:: | |||||
irc: | |||||
frontend: | |||||
nick: MyAwesomeBot | |||||
channels: | |||||
- "##earwigbot" | |||||
- "#channel" | |||||
- "#other-channel" | |||||
...then :py:attr:`config.irc["frontend"]["nick"]` will be ``"MyAwesomeBot"`` | |||||
and :py:attr:`config.irc["frontend"]["channels"]` will be | |||||
``["##earwigbot", "#channel", "#other-channel"]``. | |||||
Custom IRC commands | |||||
------------------- | |||||
Custom commands are subclasses of :py:class:`earwigbot.commands.Command` that | |||||
override :py:class:`~earwigbot.commands.Command`'s | |||||
:py:meth:`~earwigbot.commands.Command.process` (and optionally | |||||
:py:meth:`~earwigbot.commands.Command.check` or | |||||
:py:meth:`~earwigbot.commands.Command.setup`) methods. | |||||
:py:class:`~earwigbot.commands.Command`'s docstrings should explain what each | |||||
attribute and method is for and what they should be overridden with, but these | |||||
are the basics: | |||||
- Class attribute :py:attr:`~earwigbot.commands.Command.name` is the name of | |||||
the command. This must be specified. | |||||
- Class attribute :py:attr:`~earwigbot.commands.Command.commands` is a list of | |||||
names that will trigger this command. It defaults to the command's | |||||
:py:attr:`~earwigbot.commands.Command.name`, but you can override it with | |||||
multiple names to serve as aliases. This is handled by the default | |||||
:py:meth:`~earwigbot.commands.Command.check` implementation (see below), so | |||||
if :py:meth:`~earwigbot.commands.Command.check` is overridden, this is | |||||
ignored by everything except the help_ command (so ``!help alias`` will | |||||
trigger help for the actual command). | |||||
- Class attribute :py:attr:`~earwigbot.commands.Command.hooks` is a list of the | |||||
"IRC events" that this command might respond to. It defaults to ``["msg"]``, | |||||
but options include ``"msg_private"`` (for private messages only), | |||||
``"msg_public"`` (for channel messages only), and ``"join"`` (for when a user | |||||
joins a channel). See the afc_status_ plugin for a command that responds to | |||||
other hook types. | |||||
- Method :py:meth:`~earwigbot.commands.Command.setup` is called *once* with no | |||||
arguments immediately after the command is first loaded. Does nothing by | |||||
default; treat it like an :py:meth:`__init__` if you want | |||||
(:py:meth:`~earwigbot.tasks.Command.__init__` does things by default and a | |||||
dedicated setup method is often easier than overriding | |||||
:py:meth:`~earwigbot.tasks.Command.__init__` and using :py:obj:`super`). | |||||
- Method :py:meth:`~earwigbot.commands.Command.check` is passed a | |||||
:py:class:`~earwigbot.irc.data.Data` object, and should return ``True`` if | |||||
you want to respond to this message, or ``False`` otherwise. The default | |||||
behavior is to return ``True`` only if :py:attr:`data.is_command` is ``True`` | |||||
and :py:attr:`data.command` ``==`` | |||||
:py:attr:`~earwigbot.commands.Command.name` (or :py:attr:`data.command | |||||
<earwigbot.irc.data.Data.command>` is in | |||||
:py:attr:`~earwigbot.commands.Command.commands` if that list is overriden; | |||||
see above), which is suitable for most cases. A possible reason for | |||||
overriding is if you want to do something in response to events from a | |||||
specific channel only. Note that by returning ``True``, you prevent any other | |||||
commands from responding to this message. | |||||
- Method :py:meth:`~earwigbot.commands.Command.process` is passed the same | |||||
:py:class:`~earwigbot.irc.data.Data` object as | |||||
:py:meth:`~earwigbot.commands.Command.check`, but only if | |||||
:py:meth:`~earwigbot.commands.Command.check` returned ``True``. This is where | |||||
the bulk of your command goes. To respond to IRC messages, there are a number | |||||
of methods of :py:class:`~earwigbot.commands.Command` at your disposal. See | |||||
the test_ command for a simple example, or look in | |||||
:py:class:`~earwigbot.commands.Command`'s | |||||
:py:meth:`~earwigbot.commands.Command.__init__` method for the full list. | |||||
The most common ones are :py:meth:`say(chan_or_user, msg) | |||||
<earwigbot.irc.connection.IRCConnection.say>`, :py:meth:`reply(data, msg) | |||||
<earwigbot.irc.connection.IRCConnection.reply>` (convenience function; sends | |||||
a reply to the issuer of the command in the channel it was received), | |||||
:py:meth:`action(chan_or_user, msg) | |||||
<earwigbot.irc.connection.IRCConnection.action>`, | |||||
:py:meth:`notice(chan_or_user, msg) | |||||
<earwigbot.irc.connection.IRCConnection.notice>`, :py:meth:`join(chan) | |||||
<earwigbot.irc.connection.IRCConnection.join>`, and | |||||
:py:meth:`part(chan) <earwigbot.irc.connection.IRCConnection.part>`. | |||||
Commands have access to :py:attr:`config.commands[command_name]` for config | |||||
information, which is a node in :file:`config.yml` like every other attribute | |||||
of :py:attr:`bot.config`. This can be used to store, for example, API keys or | |||||
SQL connection info, so that these can be easily changed without modifying the | |||||
command itself. | |||||
The command *class* doesn't need a specific name, but it should logically | |||||
follow the command's name. The filename doesn't matter, but it is recommended | |||||
to match the command name for readability. Multiple command classes are allowed | |||||
in one file. | |||||
The bot has a wide selection of built-in commands and plugins to act as sample | |||||
code and/or to give ideas. Start with test_, and then check out chanops_ and | |||||
afc_status_ for some more complicated scripts. | |||||
Custom bot tasks | |||||
---------------- | |||||
Custom tasks are subclasses of :py:class:`earwigbot.tasks.Task` that | |||||
override :py:class:`~earwigbot.tasks.Task`'s | |||||
:py:meth:`~earwigbot.tasks.Task.run` (and optionally | |||||
:py:meth:`~earwigbot.tasks.Task.setup`) methods. | |||||
:py:class:`~earwigbot.tasks.Task`'s docstrings should explain what each | |||||
attribute and method is for and what they should be overridden with, but these | |||||
are the basics: | |||||
- Class attribute :py:attr:`~earwigbot.tasks.Task.name` is the name of the | |||||
task. This must be specified. | |||||
- Class attribute :py:attr:`~earwigbot.tasks.Task.number` can be used to store | |||||
an optional "task number", possibly for use in edit summaries (to be | |||||
generated with :py:meth:`~earwigbot.tasks.Task.make_summary`). For | |||||
example, EarwigBot's :py:attr:`config.wiki["summary"]` is | |||||
``"([[WP:BOT|Bot]]; [[User:EarwigBot#Task $1|Task $1]]): $2"``, which the | |||||
task class's :py:meth:`make_summary(comment) | |||||
<earwigbot.tasks.Task.make_summary>` method will take and replace | |||||
``$1`` with the task number and ``$2`` with the details of the edit. | |||||
Additionally, :py:meth:`~earwigbot.tasks.Task.shutoff_enabled` (which checks | |||||
whether the bot has been told to stop on-wiki by checking the content of a | |||||
particular page) can check a different page for each task using similar | |||||
variables. EarwigBot's :py:attr:`config.wiki["shutoff"]["page"]` is | |||||
``"User:$1/Shutoff/Task $2"``; ``$1`` is substituted with the bot's username, | |||||
and ``$2`` is substituted with the task number, so, e.g., task #14 checks the | |||||
page ``[[User:EarwigBot/Shutoff/Task 14]].`` If the page's content does *not* | |||||
match :py:attr:`config.wiki["shutoff"]["disabled"]` (``"run"`` by default), | |||||
then shutoff is considered to be *enabled* and | |||||
:py:meth:`~earwigbot.tasks.Task.shutoff_enabled` will return ``True``, | |||||
indicating the task should not run. If you don't intend to use either of | |||||
these methods, feel free to leave this attribute blank. | |||||
- Method :py:meth:`~earwigbot.tasks.Task.setup` is called *once* with no | |||||
arguments immediately after the task is first loaded. Does nothing by | |||||
default; treat it like an :py:meth:`__init__` if you want | |||||
(:py:meth:`~earwigbot.tasks.Task.__init__` does things by default and a | |||||
dedicated setup method is often easier than overriding | |||||
:py:meth:`~earwigbot.tasks.Task.__init__` and using :py:obj:`super`). | |||||
- Method :py:meth:`~earwigbot.tasks.Task.run` is called with any number of | |||||
keyword arguments every time the task is executed (by | |||||
:py:meth:`tasks.start(task_name, **kwargs) | |||||
<earwigbot.managers.TaskManager.start>`, usually). This is where the bulk of | |||||
the task's code goes. For interfacing with MediaWiki sites, read up on the | |||||
:doc:`Wiki Toolset <toolset>`. | |||||
Tasks have access to :py:attr:`config.tasks[task_name]` for config information, | |||||
which is a node in :file:`config.yml` like every other attribute of | |||||
:py:attr:`bot.config`. This can be used to store, for example, edit summaries | |||||
or templates to append to user talk pages, so that these can be easily changed | |||||
without modifying the task itself. | |||||
The task *class* doesn't need a specific name, but it should logically follow | |||||
the task's name. The filename doesn't matter, but it is recommended to match | |||||
the task name for readability. Multiple tasks classes are allowed in one file. | |||||
See the built-in wikiproject_tagger_ task for a relatively straightforward | |||||
task, or the afc_statistics_ plugin for a more complicated one. | |||||
.. _help: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/help.py | |||||
.. _afc_status: https://github.com/earwig/earwigbot-plugins/blob/develop/commands/afc_status.py | |||||
.. _test: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/test.py | |||||
.. _chanops: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/chanops.py | |||||
.. _wikiproject_tagger: https://github.com/earwig/earwigbot/blob/develop/earwigbot/tasks/wikiproject_tagger.py | |||||
.. _afc_statistics: https://github.com/earwig/earwigbot-plugins/blob/develop/tasks/afc_statistics.py |
@@ -0,0 +1,48 @@ | |||||
EarwigBot v0.1 Documentation | |||||
============================ | |||||
EarwigBot_ is a Python_ robot that edits Wikipedia_ and interacts with people | |||||
over IRC_. | |||||
History | |||||
------- | |||||
Development began, based on the `Pywikipedia framework`_, in early 2009. | |||||
Approval for its fist task, a `copyright violation detector`_, was carried out | |||||
in May, and the bot has been running consistently ever since (with the | |||||
exception of Jan/Feb 2011). It currently handles `several ongoing tasks`_ | |||||
ranging from statistics generation to category cleanup, and on-demand tasks | |||||
such as WikiProject template tagging. Since it started running, the bot has | |||||
made over 50,000 edits. | |||||
A project to rewrite it from scratch began in early April 2011, thus moving | |||||
away from the Pywikipedia framework and allowing for less overall code, better | |||||
integration between bot parts, and easier maintenance. | |||||
.. _EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot | |||||
.. _Python: http://python.org/ | |||||
.. _Wikipedia: http://en.wikipedia.org/ | |||||
.. _IRC: http://en.wikipedia.org/wiki/Internet_Relay_Chat | |||||
.. _Pywikipedia framework: http://pywikipediabot.sourceforge.net/ | |||||
.. _copyright violation detector: http://en.wikipedia.org/wiki/Wikipedia:Bots/Requests_for_approval/EarwigBot_1 | |||||
.. _several ongoing tasks: http://en.wikipedia.org/wiki/User:EarwigBot#Tasks | |||||
Contents | |||||
-------- | |||||
.. toctree:: | |||||
:maxdepth: 2 | |||||
installation | |||||
setup | |||||
customizing | |||||
toolset | |||||
tips | |||||
API Reference <api/modules> | |||||
Indices and tables | |||||
------------------ | |||||
* :ref:`genindex` | |||||
* :ref:`modindex` | |||||
* :ref:`search` |
@@ -0,0 +1,55 @@ | |||||
Installation | |||||
============ | |||||
This package contains the core :py:mod:`earwigbot`, abstracted enough that it | |||||
should be usable and customizable by anyone running a bot on a MediaWiki site. | |||||
Since it is component-based, the IRC components can be disabled if desired. IRC | |||||
commands and bot tasks specific to `my instance of EarwigBot`_ that I don't | |||||
feel the average user will need are available from the repository | |||||
`earwigbot-plugins`_. | |||||
It's recommended to run the bot's unit tests before installing. Run | |||||
:command:`python setup.py test` from the project's root directory. Note that | |||||
some tests require an internet connection, and others may take a while to run. | |||||
Coverage is currently rather incomplete. | |||||
Latest release (v0.1) | |||||
--------------------- | |||||
EarwigBot is available from the `Python Package Index`_, so you can install the | |||||
latest release with :command:`pip install earwigbot` (`get pip`_). | |||||
You can also install it from source [1]_ directly:: | |||||
curl -Lo earwigbot.tgz https://github.com/earwig/earwigbot/tarball/v0.1 | |||||
tar -xf earwigbot.tgz | |||||
cd earwig-earwigbot-* | |||||
python setup.py install | |||||
cd .. | |||||
rm -r earwigbot.tgz earwig-earwigbot-* | |||||
Development version | |||||
------------------- | |||||
You can install the development version of the bot from :command:`git` by using | |||||
setuptools/`distribute`_'s :command:`develop` command [1]_, probably on the | |||||
``develop`` branch which contains (usually) working code. ``master`` contains | |||||
the latest release. EarwigBot uses `git flow`_, so you're free to browse by | |||||
tags or by new features (``feature/*`` branches):: | |||||
git clone git://github.com/earwig/earwigbot.git earwigbot | |||||
cd earwigbot | |||||
python setup.py develop | |||||
.. rubric:: Footnotes | |||||
.. [1] :command:`python setup.py install`/:command:`develop` may require root, | |||||
or use the :command:`--user` switch to install for the current user | |||||
only. | |||||
.. _my instance of EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot | |||||
.. _earwigbot-plugins: https://github.com/earwig/earwigbot-plugins | |||||
.. _Python Package Index: http://pypi.python.org | |||||
.. _get pip: http://pypi.python.org/pypi/pip | |||||
.. _distribute: http://pypi.python.org/pypi/distribute | |||||
.. _git flow: http://nvie.com/posts/a-successful-git-branching-model/ |
@@ -0,0 +1,28 @@ | |||||
Setup | |||||
===== | |||||
The bot stores its data in a "working directory", including its config file and | |||||
databases. This is also the location where you will place custom IRC commands | |||||
and bot tasks, which will be explained later. It doesn't matter where this | |||||
directory is, as long as the bot can write to it. | |||||
Start the bot with :command:`earwigbot path/to/working/dir`, or just | |||||
:command:`earwigbot` if the working directory is the current directory. It will | |||||
notice that no :file:`config.yml` file exists and take you through the setup | |||||
process. | |||||
There is currently no way to edit the :file:`config.yml` file from within the | |||||
bot after it has been created, but YAML is a very straightforward format, so | |||||
you should be able to make any necessary changes yourself. Check out the | |||||
`explanation of YAML`_ on Wikipedia for help. | |||||
After setup, the bot will start. This means it will connect to the IRC servers | |||||
it has been configured for, schedule bot tasks to run at specific times, and | |||||
then wait for instructions (as commands on IRC). For a list of commands, say | |||||
"``!help``" (commands are messages prefixed with an exclamation mark). | |||||
You can stop the bot at any time with :kbd:`Control-c`, same as you stop a | |||||
normal Python program, and it will try to exit safely. You can also use the | |||||
"``!quit``" command on IRC. | |||||
.. _explanation of YAML: http://en.wikipedia.org/wiki/YAML |
@@ -0,0 +1,46 @@ | |||||
Tips | |||||
==== | |||||
- Logging_ is a fantastic way to monitor the bot's progress as it runs. It has | |||||
a slew of built-in loggers, and enabling log retention (so logs are saved to | |||||
:file:`logs/` in the working directory) is highly recommended. In the normal | |||||
setup, there are three log files, each of which "rotate" at a specific time | |||||
(:file:`filename.log` becomes :file:`filename.log.2012-04-10`, for example). | |||||
The :file:`debug.log` file rotates every hour, and maintains six hours of | |||||
logs of every level (``DEBUG`` and up). :file:`bot.log` rotates every day at | |||||
midnight, and maintains seven days of non-debug logs (``INFO`` and up). | |||||
Finally, :file:`error.log` rotates every Sunday night, and maintains four | |||||
weeks of logs indicating unexpected events (``WARNING`` and up). | |||||
To use logging in your commands or tasks (recommended), | |||||
:py:class:~earwigbot.commands.BaseCommand` and | |||||
:py:class:~earwigbot.tasks.BaseTask` provide :py:attr:`logger` attributes | |||||
configured for the specific command or task. If you're working with other | |||||
classes, :py:attr:`bot.logger` is the root logger | |||||
(:py:obj:`logging.getLogger("earwigbot")` by default), so you can use | |||||
:py:func:`~logging.Logger.getChild` to make your logger. For example, task | |||||
loggers are essentially | |||||
:py:attr:`bot.logger.getChild("tasks").getChild(task.name) <bot.logger>`. | |||||
- A very useful IRC command is "``!reload``", which reloads all commands and | |||||
tasks without restarting the bot. [1]_ Combined with using the `!git plugin`_ | |||||
for pulling repositories from IRC, this can provide a seamless command/task | |||||
development workflow if the bot runs on an external server and you set up | |||||
its working directory as a git repo. | |||||
- You can run a task by itself instead of the entire bot with | |||||
:command:`earwigbot path/to/working/dir --task task_name`. | |||||
- Questions, comments, or suggestions about the documentation? `Let me know`_, | |||||
or `create an issue`_ so I can improve it for other people. | |||||
.. rubric:: Footnotes | |||||
.. [1] In reality, all this does is call :py:meth:`bot.commands.load() | |||||
<earwigbot.managers._ResourceManager.load>` and | |||||
:py:meth:`bot.tasks.load() <earwigbot.managers._ResourceManager.load>`! | |||||
.. _logging: http://docs.python.org/library/logging.html | |||||
.. _!git plugin: https://github.com/earwig/earwigbot-plugins/blob/develop/commands/git.py | |||||
.. _Let me know: ben.kurtovic@verizon.net | |||||
.. _create an issue: https://github.com/earwig/earwigbot/issues |
@@ -0,0 +1,247 @@ | |||||
The Wiki Toolset | |||||
================ | |||||
EarwigBot's answer to the `Pywikipedia framework`_ is the Wiki Toolset | |||||
(:py:mod:`earwigbot.wiki`), which you will mainly access through | |||||
:py:attr:`bot.wiki <earwigbot.bot.Bot.wiki>`. | |||||
:py:attr:`bot.wiki <earwigbot.bot.Bot.wiki>` provides three methods for the | |||||
management of Sites - :py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site`, | |||||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site`, and | |||||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.remove_site`. Sites are objects that | |||||
simply represent a MediaWiki site. A single instance of EarwigBot (i.e. a | |||||
single *working directory*) is expected to relate to a single site or group of | |||||
sites using the same login info (like all WMF wikis with `CentralAuth`_). | |||||
Load your default site (the one that you picked during setup) with | |||||
``site = bot.wiki.get_site()``. | |||||
Dealing with other sites | |||||
~~~~~~~~~~~~~~~~~~~~~~~~ | |||||
*Skip this section if you're only working with one site.* | |||||
If a site is *already known to the bot* (meaning that it is stored in the | |||||
:file:`sites.db` file, which includes just your default wiki at first), you can | |||||
load a site with ``site = bot.wiki.get_site(name)``, where ``name`` might be | |||||
``"enwiki"`` or ``"frwiktionary"`` (you can also do | |||||
``site = bot.wiki.get_site(project="wikipedia", lang="en")``). Recall that not | |||||
giving any arguments to ``get_site()`` will return the default site. | |||||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site` is used to add new sites to | |||||
the sites database. It may be called with similar arguments as | |||||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site`, but the difference is | |||||
important. :py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site` only needs | |||||
enough information to identify the site in its database, which is usually just | |||||
its name; the database stores all other necessary connection info. With | |||||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site`, you need to provide enough | |||||
connection info so the toolset can successfully access the site's API/SQL | |||||
databases and store that information for later. That might not be much; for WMF | |||||
wikis, you can usually use code like this:: | |||||
project, lang = "wikipedia", "es" | |||||
try: | |||||
site = bot.wiki.get_site(project=project, lang=lang) | |||||
except earwigbot.SiteNotFoundError: | |||||
# Load site info from http://es.wikipedia.org/w/api.php: | |||||
site = bot.wiki.add_site(project=project, lang=lang) | |||||
This works because EarwigBot assumes that the URL for the site is | |||||
``"//{lang}.{project}.org"``, the API is at ``/w/api.php``, and the SQL | |||||
connection info (if any) is stored as ``config.wiki["sql"]``. This might change | |||||
if you're dealing with non-WMF wikis, where the code might look something more | |||||
like:: | |||||
project, lang = "mywiki", "it" | |||||
try: | |||||
site = bot.wiki.get_site(project=project, lang=lang) | |||||
except earwigbot.SiteNotFoundError: | |||||
# Load site info from http://mysite.net/mywiki/it/s/api.php: | |||||
base_url = "http://mysite.net/" + project + "/" + lang | |||||
db_name = lang + project + "_p" | |||||
sql = {host: "sql.mysite.net", db: db_name} | |||||
site = bot.wiki.add_site(base_url=base_url, script_path="/s", sql=sql) | |||||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.remove_site` does the opposite of | |||||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site`: give it a site's name or a | |||||
project/lang pair like :py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site` | |||||
takes, and it'll remove that site from the sites database. | |||||
Sites | |||||
~~~~~ | |||||
:py:class:`earwigbot.wiki.Site <earwigbot.wiki.site.Site>` objects provide the | |||||
following attributes: | |||||
- :py:attr:`~earwigbot.wiki.site.Site.name`: the site's name (or "wikiid"), | |||||
like ``"enwiki"`` | |||||
- :py:attr:`~earwigbot.wiki.site.Site.project`: the site's project name, like | |||||
``"wikipedia"`` | |||||
- :py:attr:`~earwigbot.wiki.site.Site.lang`: the site's language code, like | |||||
``"en"`` | |||||
- :py:attr:`~earwigbot.wiki.site.Site.domain`: the site's web domain, like | |||||
``"en.wikipedia.org"`` | |||||
- :py:attr:`~earwigbot.wiki.site.Site.url`: the site's full base URL, like | |||||
``"https://en.wikipedia.org"`` | |||||
and the following methods: | |||||
- :py:meth:`api_query(**kwargs) <earwigbot.wiki.site.Site.api_query>`: does an | |||||
API query with the given keyword arguments as params | |||||
- :py:meth:`sql_query(query, params=(), ...) | |||||
<earwigbot.wiki.site.Site.sql_query>`: does an SQL query and yields its | |||||
results (as a generator) | |||||
- :py:meth:`~earwigbot.wiki.site.Site.get_replag`: returns the estimated | |||||
database replication lag (if we have the site's SQL connection info) | |||||
- :py:meth:`namespace_id_to_name(id, all=False) | |||||
<earwigbot.wiki.site.Site.namespace_id_to_name>`: given a namespace ID, | |||||
returns the primary associated namespace name (or a list of all names when | |||||
``all`` is ``True``) | |||||
- :py:meth:`namespace_name_to_id(name) | |||||
<earwigbot.wiki.site.Site.namespace_name_to_id>`: given a namespace name, | |||||
returns the associated namespace ID | |||||
- :py:meth:`get_page(title, follow_redirects=False, ...) | |||||
<earwigbot.wiki.site.Site.get_page>`: returns a ``Page`` object for the given | |||||
title (or a :py:class:`~earwigbot.wiki.category.Category` object if the | |||||
page's namespace is "``Category:``") | |||||
- :py:meth:`get_category(catname, follow_redirects=False, ...) | |||||
<earwigbot.wiki.site.Site.get_category>`: returns a ``Category`` object for | |||||
the given title (sans namespace) | |||||
- :py:meth:`get_user(username) <earwigbot.wiki.site.Site.get_user>`: returns a | |||||
:py:class:`~earwigbot.wiki.user.User` object for the given username | |||||
- :py:meth:`delegate(services, ...) <earwigbot.wiki.site.Site.delegate>`: | |||||
delegates a task to either the API or SQL depending on various conditions, | |||||
such as server lag | |||||
Pages and categories | |||||
~~~~~~~~~~~~~~~~~~~~ | |||||
Create :py:class:`earwigbot.wiki.Page <earwigbot.wiki.page.Page>` objects with | |||||
:py:meth:`site.get_page(title) <earwigbot.wiki.site.Site.get_page>`, | |||||
:py:meth:`page.toggle_talk() <earwigbot.wiki.page.Page.toggle_talk>`, | |||||
:py:meth:`user.get_userpage() <earwigbot.wiki.user.User.get_userpage>`, or | |||||
:py:meth:`user.get_talkpage() <earwigbot.wiki.user.User.get_talkpage>`. They | |||||
provide the following attributes: | |||||
- :py:attr:`~earwigbot.wiki.page.Page.site`: the page's corresponding | |||||
:py:class:`~earwigbot.wiki.site.Site` object | |||||
- :py:attr:`~earwigbot.wiki.page.Page.title`: the page's title, or pagename | |||||
- :py:attr:`~earwigbot.wiki.page.Page.exists`: whether or not the page exists | |||||
- :py:attr:`~earwigbot.wiki.page.Page.pageid`: an integer ID representing the | |||||
page | |||||
- :py:attr:`~earwigbot.wiki.page.Page.url`: the page's URL | |||||
- :py:attr:`~earwigbot.wiki.page.Page.namespace`: the page's namespace as an | |||||
integer | |||||
- :py:attr:`~earwigbot.wiki.page.Page.protection`: the page's current | |||||
protection status | |||||
- :py:attr:`~earwigbot.wiki.page.Page.is_talkpage`: ``True`` if the page is a | |||||
talkpage, else ``False`` | |||||
- :py:attr:`~earwigbot.wiki.page.Page.is_redirect`: ``True`` if the page is a | |||||
redirect, else ``False`` | |||||
and the following methods: | |||||
- :py:meth:`~earwigbot.wiki.page.Page.reload`: forcibly reloads the page's | |||||
attributes (emphasis on *reload* - this is only necessary if there is reason | |||||
to believe they have changed) | |||||
- :py:meth:`toggle_talk(...) <earwigbot.wiki.page.Page.toggle_talk>`: returns a | |||||
content page's talk page, or vice versa | |||||
- :py:meth:`~earwigbot.wiki.page.Page.get`: returns page content | |||||
- :py:meth:`~earwigbot.wiki.page.Page.get_redirect_target`: if the page is a | |||||
redirect, returns its destination | |||||
- :py:meth:`~earwigbot.wiki.page.Page.get_creator`: returns a | |||||
:py:class:`~earwigbot.wiki.user.User` object representing the first user to | |||||
edit the page | |||||
- :py:meth:`edit(text, summary, minor=False, bot=True, force=False) | |||||
<earwigbot.wiki.page.Page.edit>`: replaces the page's content with ``text`` | |||||
or creates a new page | |||||
- :py:meth:`add_section(text, title, minor=False, bot=True, force=False) | |||||
<earwigbot.wiki.page.Page.add_section>`: adds a new section named ``title`` | |||||
at the bottom of the page | |||||
- :py:meth:`copyvio_check(...) | |||||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>`: checks the page for | |||||
copyright violations | |||||
- :py:meth:`copyvio_compare(url, ...) | |||||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`: checks the page like | |||||
:py:meth:`~earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check`, but | |||||
against a specific URL | |||||
- :py:meth:`check_exclusion(username=None, optouts=None) | |||||
<earwigbot.wiki.page.Page.check_exclusion>`: checks whether or not we are | |||||
allowed to edit the page per ``{{bots}}``/``{{nobots}}`` | |||||
Additionally, :py:class:`~earwigbot.wiki.category.Category` objects (created | |||||
with :py:meth:`site.get_category(name) <earwigbot.wiki.site.Site.get_category>` | |||||
or :py:meth:`site.get_page(title) <earwigbot.wiki.site.Site.get_page>` where | |||||
``title`` is in the ``Category:`` namespace) provide the following additional | |||||
attributes: | |||||
- :py:attr:`~earwigbot.wiki.category.Category.size`: the total number of | |||||
members in the category | |||||
- :py:attr:`~earwigbot.wiki.category.Category.pages`: the number of pages in | |||||
the category | |||||
- :py:attr:`~earwigbot.wiki.category.Category.files`: the number of files in | |||||
the category | |||||
- :py:attr:`~earwigbot.wiki.category.Category.subcats`: the number of | |||||
subcategories in the category | |||||
And the following additional method: | |||||
- :py:meth:`get_members(limit=None, ...) | |||||
<earwigbot.wiki.category.Category.get_members>`: iterates over | |||||
:py:class:`~earwigbot.wiki.page.Page`\ s in the category, until either the | |||||
category is exhausted or (if given) ``limit`` is reached | |||||
Users | |||||
~~~~~ | |||||
Create :py:class:`earwigbot.wiki.User <earwigbot.wiki.user.User>` objects with | |||||
:py:meth:`site.get_user(name) <earwigbot.wiki.site.Site.get_user>` or | |||||
:py:meth:`page.get_creator() <earwigbot.wiki.page.Page.get_creator>`. They | |||||
provide the following attributes: | |||||
- :py:attr:`~earwigbot.wiki.user.User.site`: the user's corresponding | |||||
:py:class:`~earwigbot.wiki.site.Site` object | |||||
- :py:attr:`~earwigbot.wiki.user.User.name`: the user's username | |||||
- :py:attr:`~earwigbot.wiki.user.User.exists`: ``True`` if the user exists, or | |||||
``False`` if they do not | |||||
- :py:attr:`~earwigbot.wiki.user.User.userid`: an integer ID representing the | |||||
user | |||||
- :py:attr:`~earwigbot.wiki.user.User.blockinfo`: information about any current | |||||
blocks on the user (``False`` if no block, or a dict of | |||||
``{"by": blocking_user, "reason": block_reason, | |||||
"expiry": block_expire_time}``) | |||||
- :py:attr:`~earwigbot.wiki.user.User.groups`: a list of the user's groups | |||||
- :py:attr:`~earwigbot.wiki.user.User.rights`: a list of the user's rights | |||||
- :py:attr:`~earwigbot.wiki.user.User.editcount`: the number of edits made by | |||||
the user | |||||
- :py:attr:`~earwigbot.wiki.user.User.registration`: the time the user | |||||
registered as a :py:obj:`time.struct_time` | |||||
- :py:attr:`~earwigbot.wiki.user.User.emailable`: ``True`` if you can email the | |||||
user, ``False`` if you cannot | |||||
- :py:attr:`~earwigbot.wiki.user.User.gender`: the user's gender (``"male"``, | |||||
``"female"``, or ``"unknown"``) | |||||
- :py:attr:`~earwigbot.wiki.user.User.is_ip`: ``True`` if the user is an IP | |||||
address, IPv4 or IPv6, otherwise ``False`` | |||||
and the following methods: | |||||
- :py:meth:`~earwigbot.wiki.user.User.reload`: forcibly reloads the user's | |||||
attributes (emphasis on *reload* - this is only necessary if there is reason | |||||
to believe they have changed) | |||||
- :py:meth:`~earwigbot.wiki.user.User.get_userpage`: returns a | |||||
:py:class:`~earwigbot.wiki.page.Page` object representing the user's userpage | |||||
- :py:meth:`~earwigbot.wiki.user.User.get_talkpage`: returns a | |||||
:py:class:`~earwigbot.wiki.page.Page` object representing the user's talkpage | |||||
Additional features | |||||
~~~~~~~~~~~~~~~~~~~ | |||||
Not all aspects of the toolset are covered here. Explore `its code and | |||||
docstrings`_ to learn how to use it in a more hands-on fashion. For reference, | |||||
:py:attr:`bot.wiki <earwigbot.bot.Bot.wiki>` is an instance of | |||||
:py:class:`earwigbot.wiki.SitesDB <earwigbot.wiki.sitesdb.SitesDB>` tied to the | |||||
:file:`sites.db` file in the bot's working directory. | |||||
.. _Pywikipedia framework: http://pywikipediabot.sourceforge.net/ | |||||
.. _CentralAuth: http://www.mediawiki.org/wiki/Extension:CentralAuth | |||||
.. _its code and docstrings: https://github.com/earwig/earwigbot/tree/develop/earwigbot/wiki |
@@ -1,22 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
import time | |||||
from subprocess import * | |||||
try: | |||||
from config import irc, main, schedule, secure, watcher | |||||
except ImportError: | |||||
print """Missing a config file! Make sure you have configured the bot. All *.py.default files in config/ | |||||
should have their .default extension removed, and the info inside should be corrected.""" | |||||
exit() | |||||
def main(): | |||||
while 1: | |||||
call(['python', 'core/main.py']) | |||||
time.sleep(5) # sleep for five seconds between bot runs | |||||
if __name__ == "__main__": | |||||
try: | |||||
main() | |||||
except KeyboardInterrupt: | |||||
exit("\nKeyboardInterrupt: stopping bot wrapper.") |
@@ -0,0 +1,68 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
""" | |||||
`EarwigBot <https://github.com/earwig/earwigbot>`_ is a Python robot that edits | |||||
Wikipedia and interacts with people over IRC. | |||||
See :file:`README.rst` for an overview, or the :file:`docs/` directory for | |||||
details. This documentation is also available `online | |||||
<http://packages.python.org/earwigbot>`_. | |||||
""" | |||||
__author__ = "Ben Kurtovic" | |||||
__copyright__ = "Copyright (C) 2009, 2010, 2011, 2012 Ben Kurtovic" | |||||
__license__ = "MIT License" | |||||
__version__ = "0.1" | |||||
__email__ = "ben.kurtovic@verizon.net" | |||||
__release__ = True | |||||
if not __release__: | |||||
def _get_git_commit_id(): | |||||
"""Return the ID of the git HEAD commit.""" | |||||
from git import Repo | |||||
from os.path import split, dirname | |||||
path = split(dirname(__file__))[0] | |||||
commit_id = Repo(path).head.object.hexsha | |||||
return commit_id[:8] | |||||
try: | |||||
__version__ += ".git+" + _get_git_commit_id() | |||||
except Exception: | |||||
pass | |||||
finally: | |||||
del _get_git_commit_id | |||||
from earwigbot import lazy | |||||
importer = lazy.LazyImporter() | |||||
bot = importer.new("earwigbot.bot") | |||||
commands = importer.new("earwigbot.commands") | |||||
config = importer.new("earwigbot.config") | |||||
exceptions = importer.new("earwigbot.exceptions") | |||||
irc = importer.new("earwigbot.irc") | |||||
managers = importer.new("earwigbot.managers") | |||||
tasks = importer.new("earwigbot.tasks") | |||||
util = importer.new("earwigbot.util") | |||||
wiki = importer.new("earwigbot.wiki") | |||||
del importer |
@@ -0,0 +1,222 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import logging | |||||
from threading import Lock, Thread, enumerate as enumerate_threads | |||||
from time import sleep, time | |||||
from earwigbot import __version__ | |||||
from earwigbot.config import BotConfig | |||||
from earwigbot.irc import Frontend, Watcher | |||||
from earwigbot.managers import CommandManager, TaskManager | |||||
from earwigbot.wiki import SitesDB | |||||
__all__ = ["Bot"] | |||||
class Bot(object): | |||||
""" | |||||
**EarwigBot: Main Bot Class** | |||||
The :py:class:`Bot` class is the core of EarwigBot, essentially responsible | |||||
for starting the various bot components and making sure they are all happy. | |||||
EarwigBot has three components that can run independently of each other: an | |||||
IRC front-end, an IRC watcher, and a wiki scheduler. | |||||
- The IRC front-end runs on a normal IRC server and expects users to | |||||
interact with it/give it commands. | |||||
- The IRC watcher runs on a wiki recent-changes server and listens for | |||||
edits. Users cannot interact with this part of the bot. | |||||
- The wiki scheduler runs wiki-editing bot tasks in separate threads at | |||||
user-defined times through a cron-like interface. | |||||
The :py:class:`Bot` object is accessible from within commands and tasks as | |||||
:py:attr:`self.bot`. This is the primary way to access data from other | |||||
components of the bot. For example, our | |||||
:py:class:`~earwigbot.config.BotConfig` object is accessable from | |||||
:py:attr:`bot.config`, tasks can be started with | |||||
:py:meth:`bot.tasks.start() <earwigbot.managers.TaskManager.start>`, and | |||||
sites can be loaded from the wiki toolset with | |||||
:py:meth:`bot.wiki.get_site() <earwigbot.wiki.sitesdb.SitesDB.get_site>`. | |||||
""" | |||||
def __init__(self, root_dir, level=logging.INFO): | |||||
self.config = BotConfig(self, root_dir, level) | |||||
self.logger = logging.getLogger("earwigbot") | |||||
self.commands = CommandManager(self) | |||||
self.tasks = TaskManager(self) | |||||
self.wiki = SitesDB(self) | |||||
self.frontend = None | |||||
self.watcher = None | |||||
self.component_lock = Lock() | |||||
self._keep_looping = True | |||||
self.config.load() | |||||
self.commands.load() | |||||
self.tasks.load() | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the Bot.""" | |||||
return "Bot(config={0!r})".format(self.config) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the Bot.""" | |||||
return "<Bot at {0}>".format(self.config.root_dir) | |||||
def _dispatch_irc_component(self, name, klass): | |||||
"""Create a new IRC component, record it internally, and start it.""" | |||||
component = klass(self) | |||||
setattr(self, name, component) | |||||
Thread(name="irc_" + name, target=component.loop).start() | |||||
def _start_irc_components(self): | |||||
"""Start the IRC frontend/watcher in separate threads if enabled.""" | |||||
if self.config.components.get("irc_frontend"): | |||||
self.logger.info("Starting IRC frontend") | |||||
self._dispatch_irc_component("frontend", Frontend) | |||||
if self.config.components.get("irc_watcher"): | |||||
self.logger.info("Starting IRC watcher") | |||||
self._dispatch_irc_component("watcher", Watcher) | |||||
def _start_wiki_scheduler(self): | |||||
"""Start the wiki scheduler in a separate thread if enabled.""" | |||||
def wiki_scheduler(): | |||||
while self._keep_looping: | |||||
time_start = time() | |||||
self.tasks.schedule() | |||||
time_end = time() | |||||
time_diff = time_start - time_end | |||||
if time_diff < 60: # Sleep until the next minute | |||||
sleep(60 - time_diff) | |||||
if self.config.components.get("wiki_scheduler"): | |||||
self.logger.info("Starting wiki scheduler") | |||||
thread = Thread(name="wiki_scheduler", target=wiki_scheduler) | |||||
thread.daemon = True # Stop if other threads stop | |||||
thread.start() | |||||
def _keep_irc_component_alive(self, name, klass): | |||||
"""Ensure that IRC components stay connected, else restart them.""" | |||||
component = getattr(self, name) | |||||
if component: | |||||
component.keep_alive() | |||||
if component.is_stopped(): | |||||
log = "IRC {0} has stopped; restarting".format(name) | |||||
self.logger.warn(log) | |||||
self._dispatch_irc_component(name, klass) | |||||
def _stop_irc_components(self, msg): | |||||
"""Request the IRC frontend and watcher to stop if enabled.""" | |||||
if self.frontend: | |||||
self.frontend.stop(msg) | |||||
if self.watcher: | |||||
self.watcher.stop(msg) | |||||
def _stop_daemon_threads(self): | |||||
"""Notify the user of which threads are going to be killed. | |||||
Unfortunately, there is no method right now of stopping command and | |||||
task threads safely. This is because there is no way to tell them to | |||||
stop like the IRC components can be told; furthermore, they are run as | |||||
daemons, and daemon threads automatically stop without calling any | |||||
__exit__ or try/finally code when all non-daemon threads stop. They | |||||
were originally implemented as regular non-daemon threads, but this | |||||
meant there was no way to completely stop the bot if tasks were | |||||
running, because all other threads would exit and threading would | |||||
absorb KeyboardInterrupts. | |||||
The advantage of this is that stopping the bot is truly guarenteed to | |||||
*stop* the bot, while the disadvantage is that the threads are given no | |||||
advance warning of their forced shutdown. | |||||
""" | |||||
tasks = [] | |||||
component_names = self.config.components.keys() | |||||
skips = component_names + ["MainThread", "reminder", "irc:quit"] | |||||
for thread in enumerate_threads(): | |||||
if thread.name not in skips and thread.is_alive(): | |||||
tasks.append(thread.name) | |||||
if tasks: | |||||
log = "The following commands or tasks will be killed: {0}" | |||||
self.logger.warn(log.format(" ".join(tasks))) | |||||
@property | |||||
def is_running(self): | |||||
"""Whether or not the bot is currently running. | |||||
This may return ``False`` even if the bot is still technically active, | |||||
but in the process of shutting down. | |||||
""" | |||||
return self._keep_looping | |||||
def run(self): | |||||
"""Main entry point into running the bot. | |||||
Starts all config-enabled components and then enters an idle loop, | |||||
ensuring that all components remain online and restarting components | |||||
that get disconnected from their servers. | |||||
""" | |||||
self.logger.info("Starting bot (EarwigBot {0})".format(__version__)) | |||||
self._start_irc_components() | |||||
self._start_wiki_scheduler() | |||||
while self._keep_looping: | |||||
with self.component_lock: | |||||
self._keep_irc_component_alive("frontend", Frontend) | |||||
self._keep_irc_component_alive("watcher", Watcher) | |||||
sleep(2) | |||||
def restart(self, msg=None): | |||||
"""Reload config, commands, tasks, and safely restart IRC components. | |||||
This is thread-safe, and it will gracefully stop IRC components before | |||||
reloading anything. Note that you can safely reload commands or tasks | |||||
without restarting the bot with :py:meth:`bot.commands.load() | |||||
<earwigbot.managers._ResourceManager.load>` or | |||||
:py:meth:`bot.tasks.load() <earwigbot.managers._ResourceManager.load>`. | |||||
These should not interfere with running components or tasks. | |||||
If given, *msg* will be used as our quit message. | |||||
""" | |||||
if msg: | |||||
self.logger.info('Restarting bot ("{0}")'.format(msg)) | |||||
else: | |||||
self.logger.info("Restarting bot") | |||||
with self.component_lock: | |||||
self._stop_irc_components(msg) | |||||
self.config.load() | |||||
self.commands.load() | |||||
self.tasks.load() | |||||
self._start_irc_components() | |||||
def stop(self, msg=None): | |||||
"""Gracefully stop all bot components. | |||||
If given, *msg* will be used as our quit message. | |||||
""" | |||||
if msg: | |||||
self.logger.info('Stopping bot ("{0}")'.format(msg)) | |||||
else: | |||||
self.logger.info("Stopping bot") | |||||
with self.component_lock: | |||||
self._stop_irc_components(msg) | |||||
self._keep_looping = False | |||||
self._stop_daemon_threads() |
@@ -0,0 +1,122 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
__all__ = ["Command"] | |||||
class Command(object): | |||||
""" | |||||
**EarwigBot: Base IRC Command** | |||||
This package provides built-in IRC "commands" used by the bot's front-end | |||||
component. Additional commands can be installed as plugins in the bot's | |||||
working directory. | |||||
This class (import with ``from earwigbot.commands import Command``), can be | |||||
subclassed to create custom IRC commands. | |||||
This docstring is reported to the user when they type ``"!help | |||||
<command>"``. | |||||
""" | |||||
# The command's name, as reported to the user when they use !help: | |||||
name = None | |||||
# A list of names that will trigger this command. If left empty, it will | |||||
# be triggered by the command's name and its name only: | |||||
commands = [] | |||||
# Hooks are "msg", "msg_private", "msg_public", and "join". "msg" is the | |||||
# default behavior; if you wish to override that, change the value in your | |||||
# command subclass: | |||||
hooks = ["msg"] | |||||
def __init__(self, bot): | |||||
"""Constructor for new commands. | |||||
This is called once when the command is loaded (from | |||||
:py:meth:`commands.load() <earwigbot.managers._ResourceManager.load>`). | |||||
*bot* is out base :py:class:`~earwigbot.bot.Bot` object. Don't override | |||||
this directly; if you do, remember to place | |||||
``super(Command, self).__init()`` first. Use :py:meth:`setup` for | |||||
typical command-init/setup needs. | |||||
""" | |||||
self.bot = bot | |||||
self.config = bot.config | |||||
self.logger = bot.commands.logger.getChild(self.name) | |||||
# Convenience functions: | |||||
self.say = lambda target, msg, hidelog=False: self.bot.frontend.say(target, msg, hidelog) | |||||
self.reply = lambda data, msg, hidelog=False: self.bot.frontend.reply(data, msg, hidelog) | |||||
self.action = lambda target, msg, hidelog=False: self.bot.frontend.action(target, msg, hidelog) | |||||
self.notice = lambda target, msg, hidelog=False: self.bot.frontend.notice(target, msg, hidelog) | |||||
self.join = lambda chan, hidelog=False: self.bot.frontend.join(chan, hidelog) | |||||
self.part = lambda chan, msg=None, hidelog=False: self.bot.frontend.part(chan, msg, hidelog) | |||||
self.mode = lambda t, level, msg, hidelog=False: self.bot.frontend.mode(t, level, msg, hidelog) | |||||
self.ping = lambda target, hidelog=False: self.bot.frontend.ping(target, hidelog) | |||||
self.pong = lambda target, hidelog=False: self.bot.frontend.pong(target, hidelog) | |||||
self.setup() | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the Command.""" | |||||
res = "Command(name={0!r}, commands={1!r}, hooks={2!r}, bot={3!r})" | |||||
return res.format(self.name, self.commands, self.hooks, self.bot) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the Command.""" | |||||
return "<Command {0} of {1}>".format(self.name, self.bot) | |||||
def setup(self): | |||||
"""Hook called immediately after the command is loaded. | |||||
Does nothing by default; feel free to override. | |||||
""" | |||||
pass | |||||
def check(self, data): | |||||
"""Return whether this command should be called in response to *data*. | |||||
Given a :py:class:`~earwigbot.irc.data.Data` instance, return ``True`` | |||||
if we should respond to this activity, or ``False`` if we should ignore | |||||
it and move on. Be aware that since this is called for each message | |||||
sent on IRC, it should be cheap to execute and unlikely to throw | |||||
exceptions. | |||||
Most commands return ``True`` only if :py:attr:`data.command | |||||
<earwigbot.irc.data.Data.command>` ``==`` :py:attr:`self.name <name>`, | |||||
or :py:attr:`data.command <earwigbot.irc.data.Data.command>` is in | |||||
:py:attr:`self.commands <commands>` if that list is overriden. This is | |||||
the default behavior; you should only override it if you wish to change | |||||
that. | |||||
""" | |||||
if self.commands: | |||||
return data.is_command and data.command in self.commands | |||||
return data.is_command and data.command == self.name | |||||
def process(self, data): | |||||
"""Main entry point for doing a command. | |||||
Handle an activity (usually a message) on IRC. At this point, thanks | |||||
to :py:meth:`check` which is called automatically by the command | |||||
handler, we know this is something we should respond to. Place your | |||||
command's body here. | |||||
""" | |||||
pass |
@@ -0,0 +1,142 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import re | |||||
from earwigbot.commands import Command | |||||
class Access(Command): | |||||
"""Control and get info on who can access the bot.""" | |||||
name = "access" | |||||
commands = ["access", "permission", "permissions", "perm", "perms"] | |||||
def process(self, data): | |||||
if not data.args: | |||||
self.reply(data, "Subcommands are self, list, add, remove.") | |||||
return | |||||
permdb = self.config.irc["permissions"] | |||||
if data.args[0] == "self": | |||||
self.do_self(data, permdb) | |||||
elif data.args[0] == "list": | |||||
self.do_list(data, permdb) | |||||
elif data.args[0] == "add": | |||||
self.do_add(data, permdb) | |||||
elif data.args[0] == "remove": | |||||
self.do_remove(data, permdb) | |||||
else: | |||||
msg = "Unknown subcommand \x0303{0}\x0F.".format(data.args[0]) | |||||
self.reply(data, msg) | |||||
def do_self(self, data, permdb): | |||||
if permdb.is_owner(data): | |||||
msg = "You are a bot owner (matching rule \x0302{0}\x0F)." | |||||
self.reply(data, msg.format(permdb.is_owner(data))) | |||||
elif permdb.is_admin(data): | |||||
msg = "You are a bot admin (matching rule \x0302{0}\x0F)." | |||||
self.reply(data, msg.format(permdb.is_admin(data))) | |||||
else: | |||||
self.reply(data, "You do not match any bot access rules.") | |||||
def do_list(self, data, permdb): | |||||
if len(data.args) > 1: | |||||
if data.args[1] in ["owner", "owners"]: | |||||
name, rules = "owners", permdb.data.get(permdb.OWNER) | |||||
elif data.args[1] in ["admin", "admins"]: | |||||
name, rules = "admins", permdb.data.get(permdb.ADMIN) | |||||
else: | |||||
msg = "Unknown access level \x0302{0}\x0F." | |||||
self.reply(data, msg.format(data.args[1])) | |||||
return | |||||
if rules: | |||||
msg = "Bot {0}: {1}.".format(name, ", ".join(map(str, rules))) | |||||
else: | |||||
msg = "No bot {0}.".format(name) | |||||
self.reply(data, msg) | |||||
else: | |||||
owners = len(permdb.data.get(permdb.OWNER, [])) | |||||
admins = len(permdb.data.get(permdb.ADMIN, [])) | |||||
msg = "There are {0} bot owners and {1} bot admins. Use '!{2} list owners' or '!{2} list admins' for details." | |||||
self.reply(data, msg.format(owners, admins, data.command)) | |||||
def do_add(self, data, permdb): | |||||
user = self.get_user_from_args(data, permdb) | |||||
if user: | |||||
nick, ident, host = user | |||||
if data.args[1] in ["owner", "owners"]: | |||||
name, level, adder = "owner", permdb.OWNER, permdb.add_owner | |||||
else: | |||||
name, level, adder = "admin", permdb.ADMIN, permdb.add_admin | |||||
if permdb.has_exact(level, nick, ident, host): | |||||
rule = "{0}!{1}@{2}".format(nick, ident, host) | |||||
msg = "\x0302{0}\x0F is already a bot {1}.".format(rule, name) | |||||
self.reply(data, msg) | |||||
else: | |||||
rule = adder(nick, ident, host) | |||||
msg = "Added bot {0} \x0302{1}\x0F.".format(name, rule) | |||||
self.reply(data, msg) | |||||
def do_remove(self, data, permdb): | |||||
user = self.get_user_from_args(data, permdb) | |||||
if user: | |||||
nick, ident, host = user | |||||
if data.args[1] in ["owner", "owners"]: | |||||
name, rmver = "owner", permdb.remove_owner | |||||
else: | |||||
name, rmver = "admin", permdb.remove_admin | |||||
rule = rmver(nick, ident, host) | |||||
if rule: | |||||
msg = "Removed bot {0} \x0302{1}\x0F.".format(name, rule) | |||||
self.reply(data, msg) | |||||
else: | |||||
rule = "{0}!{1}@{2}".format(nick, ident, host) | |||||
msg = "No bot {0} matching \x0302{1}\x0F.".format(name, rule) | |||||
self.reply(data, msg) | |||||
def get_user_from_args(self, data, permdb): | |||||
if not permdb.is_owner(data): | |||||
msg = "You must be a bot owner to add users to the access list." | |||||
self.reply(data, msg) | |||||
return | |||||
levels = ["owner", "owners", "admin", "admins"] | |||||
if len(data.args) == 1 or data.args[1] not in levels: | |||||
msg = "Please specify an access level ('owners' or 'admins')." | |||||
self.reply(data, msg) | |||||
return | |||||
if len(data.args) == 2: | |||||
self.no_arg_error(data) | |||||
return | |||||
kwargs = data.kwargs | |||||
if "nick" in kwargs or "ident" in kwargs or "host" in kwargs: | |||||
nick = kwargs.get("nick", "*") | |||||
ident = kwargs.get("ident", "*") | |||||
host = kwargs.get("host", "*") | |||||
return nick, ident, host | |||||
user = re.match(r"(.*?)!(.*?)@(.*?)$", data.args[2]) | |||||
if not user: | |||||
self.no_arg_error(data) | |||||
return | |||||
return user.group(1), user.group(2), user.group(3) | |||||
def no_arg_error(self, data): | |||||
msg = 'Please specify a user, either as "\x0302nick\x0F!\x0302ident\x0F@\x0302host\x0F"' | |||||
msg += ' or "nick=\x0302nick\x0F, ident=\x0302ident\x0F, host=\x0302host\x0F".' | |||||
self.reply(data, msg) |
@@ -0,0 +1,83 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import re | |||||
import urllib | |||||
from earwigbot.commands import Command | |||||
class Calc(Command): | |||||
"""A somewhat advanced calculator: see http://futureboy.us/fsp/frink.fsp | |||||
for details.""" | |||||
name = "calc" | |||||
def process(self, data): | |||||
if not data.args: | |||||
self.reply(data, "What do you want me to calculate?") | |||||
return | |||||
query = ' '.join(data.args) | |||||
query = self.cleanup(query) | |||||
url = "http://futureboy.us/fsp/frink.fsp?fromVal={0}" | |||||
url = url.format(urllib.quote(query)) | |||||
result = urllib.urlopen(url).read() | |||||
r_result = re.compile(r'(?i)<A NAME=results>(.*?)</A>') | |||||
r_tag = re.compile(r'<\S+.*?>') | |||||
match = r_result.search(result) | |||||
if not match: | |||||
self.reply(data, "Calculation error.") | |||||
return | |||||
result = match.group(1) | |||||
result = r_tag.sub("", result) # strip span.warning tags | |||||
result = result.replace(">", ">") | |||||
result = result.replace("(undefined symbol)", "(?) ") | |||||
result = result.strip() | |||||
if not result: | |||||
result = '?' | |||||
elif " in " in query: | |||||
result += " " + query.split(" in ", 1)[1] | |||||
res = "%s = %s" % (query, result) | |||||
self.reply(data, res) | |||||
def cleanup(self, query): | |||||
fixes = [ | |||||
(' in ', ' -> '), | |||||
(' over ', ' / '), | |||||
(u'£', 'GBP '), | |||||
(u'€', 'EUR '), | |||||
('\$', 'USD '), | |||||
(r'\bKB\b', 'kilobytes'), | |||||
(r'\bMB\b', 'megabytes'), | |||||
(r'\bGB\b', 'kilobytes'), | |||||
('kbps', '(kilobits / second)'), | |||||
('mbps', '(megabits / second)') | |||||
] | |||||
for original, fix in fixes: | |||||
query = re.sub(original, fix, query) | |||||
return query.strip() |
@@ -0,0 +1,91 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from earwigbot.commands import Command | |||||
class ChanOps(Command): | |||||
"""Voice, devoice, op, or deop users in the channel, or join or part from | |||||
other channels.""" | |||||
name = "chanops" | |||||
commands = ["chanops", "voice", "devoice", "op", "deop", "join", "part"] | |||||
def process(self, data): | |||||
if data.command == "chanops": | |||||
msg = "Available commands are !voice, !devoice, !op, !deop, !join, and !part." | |||||
self.reply(data, msg) | |||||
return | |||||
de_escalate = data.command in ["devoice", "deop"] | |||||
if de_escalate and (not data.args or data.args[0] == data.nick): | |||||
target = data.nick | |||||
elif not self.config.irc["permissions"].is_admin(data): | |||||
self.reply(data, "You must be a bot admin to use this command.") | |||||
return | |||||
if data.command == "join": | |||||
self.do_join(data) | |||||
elif data.command == "part": | |||||
self.do_part(data) | |||||
else: | |||||
# If it is just !op/!devoice/whatever without arguments, assume | |||||
# they want to do this to themselves: | |||||
if not data.args: | |||||
target = data.nick | |||||
else: | |||||
target = data.args[0] | |||||
command = data.command.upper() | |||||
self.say("ChanServ", " ".join((command, data.chan, target))) | |||||
log = "{0} requested {1} on {2} in {3}" | |||||
self.logger.info(log.format(data.nick, command, target, data.chan)) | |||||
def do_join(self, data): | |||||
if data.args: | |||||
channel = data.args[0] | |||||
if not channel.startswith("#"): | |||||
channel = "#" + channel | |||||
else: | |||||
msg = "You must specify a channel to join or part from." | |||||
self.reply(data, msg) | |||||
return | |||||
self.join(channel) | |||||
log = "{0} requested JOIN to {1}".format(data.nick, channel) | |||||
self.logger.info(log) | |||||
def do_part(self, data): | |||||
channel = data.chan | |||||
reason = None | |||||
if data.args: | |||||
if data.args[0].startswith("#"): | |||||
# "!part #channel reason for parting" | |||||
channel = data.args[0] | |||||
if data.args[1:]: | |||||
reason = " ".join(data.args[1:]) | |||||
else: # "!part reason for parting"; assume current channel | |||||
reason = " ".join(data.args) | |||||
msg = "Requested by {0}".format(data.nick) | |||||
log = "{0} requested PART from {1}".format(data.nick, channel) | |||||
if reason: | |||||
msg += ": {0}".format(reason) | |||||
log += ' ("{0}")'.format(reason) | |||||
self.part(channel, msg) | |||||
self.logger.info(log) |
@@ -0,0 +1,79 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import hashlib | |||||
from Crypto.Cipher import Blowfish | |||||
from earwigbot.commands import Command | |||||
class Crypt(Command): | |||||
"""Provides hash functions with !hash (!hash list for supported algorithms) | |||||
and Blowfish encryption with !encrypt and !decrypt.""" | |||||
name = "crypt" | |||||
commands = ["crypt", "hash", "encrypt", "decrypt"] | |||||
def process(self, data): | |||||
if data.command == "crypt": | |||||
msg = "Available commands are !hash, !encrypt, and !decrypt." | |||||
self.reply(data, msg) | |||||
return | |||||
if not data.args: | |||||
msg = "What do you want me to {0}?".format(data.command) | |||||
self.reply(data, msg) | |||||
return | |||||
if data.command == "hash": | |||||
algo = data.args[0] | |||||
if algo == "list": | |||||
algos = ', '.join(hashlib.algorithms) | |||||
msg = algos.join(("Supported algorithms: ", ".")) | |||||
self.reply(data, msg) | |||||
elif algo in hashlib.algorithms: | |||||
string = ' '.join(data.args[1:]) | |||||
result = getattr(hashlib, algo)(string).hexdigest() | |||||
self.reply(data, result) | |||||
else: | |||||
msg = "Unknown algorithm: '{0}'.".format(algo) | |||||
self.reply(data, msg) | |||||
else: | |||||
key = data.args[0] | |||||
text = " ".join(data.args[1:]) | |||||
if not text: | |||||
msg = "A key was provided, but text to {0} was not." | |||||
self.reply(data, msg.format(data.command)) | |||||
return | |||||
cipher = Blowfish.new(hashlib.sha256(key).digest()) | |||||
try: | |||||
if data.command == "encrypt": | |||||
if len(text) % 8: | |||||
pad = 8 - len(text) % 8 | |||||
text = text.ljust(len(text) + pad, "\x00") | |||||
self.reply(data, cipher.encrypt(text).encode("hex")) | |||||
else: | |||||
self.reply(data, cipher.decrypt(text.decode("hex"))) | |||||
except ValueError as error: | |||||
self.reply(data, error.message) |
@@ -0,0 +1,68 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import platform | |||||
import time | |||||
from earwigbot import __version__ | |||||
from earwigbot.commands import Command | |||||
class CTCP(Command): | |||||
"""Not an actual command; this module implements responses to the CTCP | |||||
requests PING, TIME, and VERSION.""" | |||||
name = "ctcp" | |||||
hooks = ["msg_private"] | |||||
def check(self, data): | |||||
if data.is_command and data.command == "ctcp": | |||||
return True | |||||
commands = ["PING", "TIME", "VERSION"] | |||||
msg = data.line[3] | |||||
if msg[:2] == ":\x01" and msg[2:].rstrip("\x01") in commands: | |||||
return True | |||||
return False | |||||
def process(self, data): | |||||
if data.is_command: | |||||
return | |||||
target = data.nick | |||||
command = data.line[3][1:].strip("\x01") | |||||
if command == "PING": | |||||
msg = " ".join(data.line[4:]) | |||||
if msg: | |||||
self.notice(target, "\x01PING {0}\x01".format(msg)) | |||||
else: | |||||
self.notice(target, "\x01PING\x01") | |||||
elif command == "TIME": | |||||
ts = time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.localtime()) | |||||
self.notice(target, "\x01TIME {0}\x01".format(ts)) | |||||
elif command == "VERSION": | |||||
default = "EarwigBot - $1 - Python/$2 https://github.com/earwig/earwigbot" | |||||
vers = self.config.irc.get("version", default) | |||||
vers = vers.replace("$1", __version__) | |||||
vers = vers.replace("$2", platform.python_version()) | |||||
self.notice(target, "\x01VERSION {0}\x01".format(vers)) |
@@ -0,0 +1,181 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import re | |||||
from earwigbot import exceptions | |||||
from earwigbot.commands import Command | |||||
class Dictionary(Command): | |||||
"""Define words and stuff.""" | |||||
name = "dictionary" | |||||
commands = ["dict", "dictionary", "define"] | |||||
def process(self, data): | |||||
if not data.args: | |||||
self.reply(data, "What do you want me to define?") | |||||
return | |||||
term = " ".join(data.args) | |||||
lang = self.bot.wiki.get_site().lang | |||||
try: | |||||
defined = self.define(term, lang) | |||||
except exceptions.APIError: | |||||
msg = "Cannot find a {0}-language Wiktionary." | |||||
self.reply(data, msg.format(lang)) | |||||
else: | |||||
self.reply(data, defined.encode("utf8")) | |||||
def define(self, term, lang, tries=2): | |||||
try: | |||||
site = self.bot.wiki.get_site(project="wiktionary", lang=lang) | |||||
except exceptions.SiteNotFoundError: | |||||
site = self.bot.wiki.add_site(project="wiktionary", lang=lang) | |||||
page = site.get_page(term, follow_redirects=True) | |||||
try: | |||||
entry = page.get() | |||||
except (exceptions.PageNotFoundError, exceptions.InvalidPageError): | |||||
if term.lower() != term and tries: | |||||
return self.define(term.lower(), lang, tries - 1) | |||||
if term.capitalize() != term and tries: | |||||
return self.define(term.capitalize(), lang, tries - 1) | |||||
return "No definition found." | |||||
level, languages = self.get_languages(entry) | |||||
if not languages: | |||||
return u"Couldn't parse {0}!".format(page.url) | |||||
result = [] | |||||
for lang, section in sorted(languages.items()): | |||||
definition = self.get_definition(section, level) | |||||
result.append(u"({0}) {1}".format(lang, definition)) | |||||
return u"; ".join(result) | |||||
def get_languages(self, entry, level=2): | |||||
regex = r"(?:\A|\n)==\s*([a-zA-Z0-9_ ]*?)\s*==(?:\Z|\n)" | |||||
split = re.split(regex, entry) | |||||
if len(split) % 2 == 0: | |||||
if level == 2: | |||||
return self.get_languages(entry, level=3) | |||||
else: | |||||
return 3, None | |||||
return 2, None | |||||
split.pop(0) | |||||
languages = {} | |||||
for i in xrange(0, len(split), 2): | |||||
languages[split[i]] = split[i + 1] | |||||
return level, languages | |||||
def get_definition(self, section, level): | |||||
parts_of_speech = { | |||||
"v.": "Verb", | |||||
"n.": "Noun", | |||||
"pron.": "Pronoun", | |||||
"adj.": "Adjective", | |||||
"adv.": "Adverb", | |||||
"prep.": "Preposition", | |||||
"conj.": "Conjunction", | |||||
"inter.": "Interjection", | |||||
"symbol": "Symbol", | |||||
"suffix": "Suffix", | |||||
"initialism": "Initialism", | |||||
"phrase": "Phrase", | |||||
"proverb": "Proverb", | |||||
"prop. n.": "Proper noun", | |||||
"abbr.": "Abbreviation", | |||||
"punct.": "Punctuation mark", | |||||
} | |||||
blocks = "=" * (level + 1) | |||||
defs = [] | |||||
for part, basename in parts_of_speech.iteritems(): | |||||
fullnames = [basename, "\{\{" + basename + "\}\}", | |||||
"\{\{" + basename.lower() + "\}\}"] | |||||
for fullname in fullnames: | |||||
regex = blocks + "\s*" + fullname + "\s*" + blocks | |||||
if re.search(regex, section): | |||||
regex = blocks + "\s*" + fullname | |||||
regex += "\s*{0}(.*?)(?:(?:{0})|\Z)".format(blocks) | |||||
bodies = re.findall(regex, section, re.DOTALL) | |||||
if bodies: | |||||
for body in bodies: | |||||
definition = self.parse_body(body) | |||||
if definition: | |||||
msg = u"\x02{0}\x0F {1}" | |||||
defs.append(msg.format(part, definition)) | |||||
return "; ".join(defs) | |||||
def parse_body(self, body): | |||||
substitutions = [ | |||||
("<!--(.*?)-->", ""), | |||||
("<ref>(.*?)</ref>", ""), | |||||
("\[\[[^\]|]*?\|([^\]|]*?)\]\]", r"\1"), | |||||
("\{\{unsupported\|(.*?)\}\}", r"\1"), | |||||
("\{\{(.*?) of\|([^}|]*?)(\|(.*?))?\}\}", r"\1 of \2."), | |||||
("\{\{w\|(.*?)\}\}", r"\1"), | |||||
("\{\{surname(.*?)\}\}", r"A surname."), | |||||
("\{\{given name\|([^}|]*?)(\|(.*?))?\}\}", r"A \1 given name."), | |||||
] | |||||
senses = [] | |||||
for line in body.splitlines(): | |||||
line = line.strip() | |||||
if re.match("#\s*[^:*#]", line): | |||||
for regex, repl in substitutions: | |||||
line = re.sub(regex, repl, line) | |||||
line = self.strip_templates(line) | |||||
line = line[1:].replace("'''", "").replace("''", "") | |||||
line = line.replace("[[", "").replace("]]", "") | |||||
if line.strip(): | |||||
senses.append(line.strip()[0].upper() + line.strip()[1:]) | |||||
if not senses: | |||||
return None | |||||
if len(senses) == 1: | |||||
return senses[0] | |||||
result = [] # Number the senses incrementally | |||||
for i, sense in enumerate(senses): | |||||
result.append(u"{0}. {1}".format(i + 1, sense)) | |||||
return " ".join(result) | |||||
def strip_templates(self, line): | |||||
line = list(line) | |||||
stripped = "" | |||||
depth = 0 | |||||
while line: | |||||
this = line.pop(0) | |||||
if line: | |||||
next = line[0] | |||||
else: | |||||
next = "" | |||||
if this == "{" and next == "{": | |||||
line.pop(0) | |||||
depth += 1 | |||||
elif this == "}" and next == "}": | |||||
line.pop(0) | |||||
depth -= 1 | |||||
elif depth == 0: | |||||
stripped += this | |||||
return stripped |
@@ -0,0 +1,53 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from urllib import quote_plus | |||||
from earwigbot import exceptions | |||||
from earwigbot.commands import Command | |||||
class Editcount(Command): | |||||
"""Return a user's edit count.""" | |||||
name = "editcount" | |||||
commands = ["ec", "editcount"] | |||||
def process(self, data): | |||||
if not data.args: | |||||
name = data.nick | |||||
else: | |||||
name = ' '.join(data.args) | |||||
site = self.bot.wiki.get_site() | |||||
user = site.get_user(name) | |||||
try: | |||||
count = user.editcount | |||||
except exceptions.UserNotFoundError: | |||||
msg = "The user \x0302{0}\x0F does not exist." | |||||
self.reply(data, msg.format(name)) | |||||
return | |||||
safe = quote_plus(user.name.encode("utf8")) | |||||
url = "http://toolserver.org/~tparis/pcount/index.php?name={0}&lang={1}&wiki={2}" | |||||
fullurl = url.format(safe, site.lang, site.project) | |||||
msg = "\x0302{0}\x0F has {1} edits ({2})." | |||||
self.reply(data, msg.format(name, count, fullurl)) |
@@ -0,0 +1,71 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import re | |||||
from earwigbot.commands import Command | |||||
class Help(Command): | |||||
"""Displays help information.""" | |||||
name = "help" | |||||
def check(self, data): | |||||
if data.is_command: | |||||
if data.command == "help": | |||||
return True | |||||
if not data.command and data.trigger == data.my_nick: | |||||
return True | |||||
return False | |||||
def process(self, data): | |||||
if not data.command: | |||||
self.do_hello(data) | |||||
elif data.args: | |||||
self.do_command_help(data) | |||||
else: | |||||
self.do_main_help(data) | |||||
def do_main_help(self, data): | |||||
"""Give the user a general help message with a list of all commands.""" | |||||
msg = "Hi, I'm a bot! I have {0} commands loaded: {1}. You can get help for any command with '!help <command>'." | |||||
cmnds = sorted([cmnd.name for cmnd in self.bot.commands]) | |||||
msg = msg.format(len(cmnds), ', '.join(cmnds)) | |||||
self.reply(data, msg) | |||||
def do_command_help(self, data): | |||||
"""Give the user help for a specific command.""" | |||||
target = data.args[0] | |||||
for command in self.bot.commands: | |||||
if command.name == target or target in command.commands: | |||||
if command.__doc__: | |||||
doc = command.__doc__.replace("\n", "") | |||||
doc = re.sub("\s\s+", " ", doc) | |||||
msg = 'Help for command \x0303{0}\x0F: "{1}"' | |||||
self.reply(data, msg.format(target, doc)) | |||||
return | |||||
msg = "Sorry, no help for \x0303{0}\x0F.".format(target) | |||||
self.reply(data, msg) | |||||
def do_hello(self, data): | |||||
self.say(data.chan, "Yes, {0}?".format(data.nick)) |
@@ -0,0 +1,101 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from earwigbot import exceptions | |||||
from earwigbot.commands import Command | |||||
class Lag(Command): | |||||
"""Return the replag for a specific database on the Toolserver.""" | |||||
name = "lag" | |||||
commands = ["lag", "replag", "maxlag"] | |||||
def process(self, data): | |||||
site = self.get_site(data) | |||||
if not site: | |||||
return | |||||
if data.command == "replag": | |||||
base = "\x0302{0}\x0F: {1}." | |||||
msg = base.format(site.name, self.get_replag(site)) | |||||
elif data.command == "maxlag": | |||||
base = "\x0302{0}\x0F: {1}." | |||||
msg = base.format(site.name, self.get_maxlag(site).capitalize()) | |||||
else: | |||||
base = "\x0302{0}\x0F: {1}; {2}." | |||||
msg = base.format(site.name, self.get_replag(site), | |||||
self.get_maxlag(site)) | |||||
self.reply(data, msg) | |||||
def get_replag(self, site): | |||||
return "Toolserver replag is {0}".format(self.time(site.get_replag())) | |||||
def get_maxlag(self, site): | |||||
return "database maxlag is {0}".format(self.time(site.get_maxlag())) | |||||
def get_site(self, data): | |||||
if data.kwargs and "project" in data.kwargs and "lang" in data.kwargs: | |||||
project, lang = data.kwargs["project"], data.kwargs["lang"] | |||||
return self.get_site_from_proj_and_lang(data, project, lang) | |||||
if not data.args: | |||||
return self.bot.wiki.get_site() | |||||
if len(data.args) > 1: | |||||
name = " ".join(data.args) | |||||
self.reply(data, "Unknown site: \x0302{0}\x0F.".format(name)) | |||||
return | |||||
name = data.args[0] | |||||
if "." in name: | |||||
lang, project = name.split(".")[:2] | |||||
elif ":" in name: | |||||
project, lang = name.split(":")[:2] | |||||
else: | |||||
try: | |||||
return self.bot.wiki.get_site(name) | |||||
except exceptions.SiteNotFoundError: | |||||
msg = "Unknown site: \x0302{0}\x0F.".format(name) | |||||
self.reply(data, msg) | |||||
return | |||||
return self.get_site_from_proj_and_lang(data, project, lang) | |||||
def get_site_from_proj_and_lang(self, data, project, lang): | |||||
try: | |||||
site = self.bot.wiki.get_site(project=project, lang=lang) | |||||
except exceptions.SiteNotFoundError: | |||||
try: | |||||
site = self.bot.wiki.add_site(project=project, lang=lang) | |||||
except exceptions.APIError: | |||||
msg = "Site \x0302{0}:{1}\x0F not found." | |||||
self.reply(data, msg.format(project, lang)) | |||||
return | |||||
return site | |||||
def time(self, seconds): | |||||
parts = [("year", 31536000), ("day", 86400), ("hour", 3600), | |||||
("minute", 60), ("second", 1)] | |||||
msg = [] | |||||
for name, size in parts: | |||||
num = seconds / size | |||||
seconds -= num * size | |||||
if num: | |||||
chunk = "{0} {1}".format(num, name if num == 1 else name + "s") | |||||
msg.append(chunk) | |||||
return ", ".join(msg) if msg else "0 seconds" |
@@ -0,0 +1,62 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from earwigbot.commands import Command | |||||
class Langcode(Command): | |||||
"""Convert a language code into its name and a list of WMF sites in that | |||||
language, or a name into its code.""" | |||||
name = "langcode" | |||||
commands = ["langcode", "lang", "language"] | |||||
def process(self, data): | |||||
if not data.args: | |||||
self.reply(data, "Please specify a language code.") | |||||
return | |||||
code, lcase = data.args[0], data.args[0].lower() | |||||
site = self.bot.wiki.get_site() | |||||
matrix = site.api_query(action="sitematrix")["sitematrix"] | |||||
del matrix["count"] | |||||
del matrix["specials"] | |||||
for site in matrix.itervalues(): | |||||
if not site["name"]: | |||||
continue | |||||
name = site["name"].encode("utf8") | |||||
localname = site["localname"].encode("utf8") | |||||
if site["code"] == lcase: | |||||
if name != localname: | |||||
name += " ({0})".format(localname) | |||||
sites = ", ".join([s["url"] for s in site["site"]]) | |||||
msg = "\x0302{0}\x0F is {1} ({2})".format(code, name, sites) | |||||
self.reply(data, msg) | |||||
return | |||||
elif name.lower() == lcase or localname.lower() == lcase: | |||||
if name != localname: | |||||
name += " ({0})".format(localname) | |||||
sites = ", ".join([s["url"] for s in site["site"]]) | |||||
msg = "{0} is \x0302{1}\x0F ({2})" | |||||
self.reply(data, msg.format(name, site["code"], sites)) | |||||
return | |||||
self.reply(data, "Language \x0302{0}\x0F not found.".format(code)) |
@@ -0,0 +1,79 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import re | |||||
from earwigbot.commands import Command | |||||
class Link(Command): | |||||
"""Convert a Wikipedia page name into a URL.""" | |||||
name = "link" | |||||
def setup(self): | |||||
self.last = {} | |||||
def check(self, data): | |||||
if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", data.msg): | |||||
self.last[data.chan] = data.msg # Store most recent link | |||||
return data.is_command and data.command == self.name | |||||
def process(self, data): | |||||
self.site = self.bot.wiki.get_site() | |||||
if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", data.msg): | |||||
links = u" , ".join(self.parse_line(data.msg)) | |||||
self.reply(data, links.encode("utf8")) | |||||
elif data.command == "link": | |||||
if not data.args: | |||||
if data.chan in self.last: | |||||
links = u" , ".join(self.parse_line(self.last[data.chan])) | |||||
self.reply(data, links.encode("utf8")) | |||||
else: | |||||
self.reply(data, "What do you want me to link to?") | |||||
return | |||||
pagename = " ".join(data.args) | |||||
link = self.site.get_page(pagename).url.encode("utf8") | |||||
self.reply(data, link) | |||||
def parse_line(self, line): | |||||
"""Return a list of links within a line of text.""" | |||||
results = [] | |||||
# Destroy {{{template parameters}}}: | |||||
line = re.sub("\{\{\{(.*?)\}\}\}", "", line) | |||||
# Find all [[links]]: | |||||
links = re.findall("(\[\[(.*?)(\||\]\]))", line) | |||||
if links: | |||||
# re.findall() returns a list of tuples, but we only want the 2nd | |||||
# item in each tuple: | |||||
results = [self.site.get_page(name[1]).url for name in links] | |||||
# Find all {{templates}} | |||||
templates = re.findall("(\{\{(.*?)(\||\}\}))", line) | |||||
if templates: | |||||
p_tmpl = lambda name: self.site.get_page("Template:" + name).url | |||||
templates = [p_tmpl(i[1]) for i in templates] | |||||
results += templates | |||||
return results |
@@ -0,0 +1,319 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from datetime import datetime | |||||
from os import path | |||||
import re | |||||
import sqlite3 as sqlite | |||||
from threading import Lock | |||||
from earwigbot.commands import Command | |||||
class Notes(Command): | |||||
"""A mini IRC-based wiki for storing notes, tips, and reminders.""" | |||||
name = "notes" | |||||
commands = ["notes", "note", "about"] | |||||
version = 2 | |||||
def setup(self): | |||||
self._dbfile = path.join(self.config.root_dir, "notes.db") | |||||
self._db_access_lock = Lock() | |||||
def process(self, data): | |||||
commands = { | |||||
"help": self.do_help, | |||||
"list": self.do_list, | |||||
"read": self.do_read, | |||||
"edit": self.do_edit, | |||||
"info": self.do_info, | |||||
"rename": self.do_rename, | |||||
"delete": self.do_delete, | |||||
} | |||||
if not data.args: | |||||
msg = "\x0302The Earwig Mini-Wiki\x0F: running v{0}. Subcommands are: {1}. You can get help on any with '!{2} help subcommand'." | |||||
cmnds = ", ".join((commands)) | |||||
self.reply(data, msg.format(self.version, cmnds, data.command)) | |||||
return | |||||
command = data.args[0].lower() | |||||
if command in commands: | |||||
commands[command](data) | |||||
else: | |||||
msg = "Unknown subcommand: \x0303{0}\x0F.".format(command) | |||||
self.reply(data, msg) | |||||
def do_help(self, data): | |||||
"""Get help on a subcommand.""" | |||||
info = { | |||||
"help": "Get help on other subcommands.", | |||||
"list": "List existing entries.", | |||||
"read": "Read an existing entry ('!notes read [name]').", | |||||
"edit": """Modify or create a new entry ('!notes edit name | |||||
[entry content]...'). If modifying, you must be the | |||||
entry author or a bot admin.""", | |||||
"info": """Get information on an existing entry ('!notes info | |||||
[name]').""", | |||||
"rename": """Rename an existing entry ('!notes rename [old_name] | |||||
[new_name]'). You must be the entry author or a bot | |||||
admin.""", | |||||
"delete": """Delete an existing entry ('!notes delete [name]'). You | |||||
must be the entry author or a bot admin.""", | |||||
} | |||||
try: | |||||
command = data.args[1] | |||||
except IndexError: | |||||
self.reply(data, "Please specify a subcommand to get help on.") | |||||
return | |||||
try: | |||||
help_ = re.sub(r"\s\s+", " ", info[command].replace("\n", "")) | |||||
self.reply(data, "\x0303{0}\x0F: ".format(command) + help_) | |||||
except KeyError: | |||||
msg = "Unknown subcommand: \x0303{0}\x0F.".format(command) | |||||
self.reply(data, msg) | |||||
def do_list(self, data): | |||||
"""Show a list of entries in the notes database.""" | |||||
query = "SELECT entry_title FROM entries" | |||||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||||
try: | |||||
entries = conn.execute(query).fetchall() | |||||
except sqlite.OperationalError: | |||||
entries = [] | |||||
if entries: | |||||
entries = [entry[0] for entry in entries] | |||||
self.reply(data, "Entries: {0}".format(", ".join(entries))) | |||||
else: | |||||
self.reply(data, "No entries in the database.") | |||||
def do_read(self, data): | |||||
"""Read an entry from the notes database.""" | |||||
query = """SELECT entry_title, rev_content FROM entries | |||||
INNER JOIN revisions ON entry_revision = rev_id | |||||
WHERE entry_slug = ?""" | |||||
try: | |||||
slug = self.slugify(data.args[1]) | |||||
except IndexError: | |||||
self.reply(data, "Please specify an entry to read from.") | |||||
return | |||||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||||
try: | |||||
title, content = conn.execute(query, (slug,)).fetchone() | |||||
except (sqlite.OperationalError, TypeError): | |||||
title, content = slug, None | |||||
if content: | |||||
self.reply(data, "\x0302{0}\x0F: {1}".format(title, content)) | |||||
else: | |||||
self.reply(data, "Entry \x0302{0}\x0F not found.".format(title)) | |||||
def do_edit(self, data): | |||||
"""Edit an entry in the notes database.""" | |||||
query1 = """SELECT entry_id, entry_title, user_host FROM entries | |||||
INNER JOIN revisions ON entry_revision = rev_id | |||||
INNER JOIN users ON rev_user = user_id | |||||
WHERE entry_slug = ?""" | |||||
query2 = "INSERT INTO revisions VALUES (?, ?, ?, ?, ?)" | |||||
query3 = "INSERT INTO entries VALUES (?, ?, ?, ?)" | |||||
query4 = "UPDATE entries SET entry_revision = ? WHERE entry_id = ?" | |||||
try: | |||||
slug = self.slugify(data.args[1]) | |||||
except IndexError: | |||||
self.reply(data, "Please specify an entry to edit.") | |||||
return | |||||
content = " ".join(data.args[2:]).strip() | |||||
if not content: | |||||
self.reply(data, "Please give some content to put in the entry.") | |||||
return | |||||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||||
create = True | |||||
try: | |||||
id_, title, author = conn.execute(query1, (slug,)).fetchone() | |||||
create = False | |||||
except sqlite.OperationalError: | |||||
id_, title, author = 1, data.args[1], data.host | |||||
self.create_db(conn) | |||||
except TypeError: | |||||
id_ = self.get_next_entry(conn) | |||||
title, author = data.args[1], data.host | |||||
permdb = self.config.irc["permissions"] | |||||
if author != data.host and not permdb.is_admin(data): | |||||
msg = "You must be an author or a bot admin to edit this entry." | |||||
self.reply(data, msg) | |||||
return | |||||
revid = self.get_next_revision(conn) | |||||
userid = self.get_user(conn, data.host) | |||||
now = datetime.utcnow().strftime("%b %d, %Y %H:%M:%S") | |||||
conn.execute(query2, (revid, id_, userid, now, content)) | |||||
if create: | |||||
conn.execute(query3, (id_, slug, title, revid)) | |||||
else: | |||||
conn.execute(query4, (revid, id_)) | |||||
self.reply(data, "Entry \x0302{0}\x0F updated.".format(title)) | |||||
def do_info(self, data): | |||||
"""Get info on an entry in the notes database.""" | |||||
query = """SELECT entry_title, rev_timestamp, user_host FROM entries | |||||
INNER JOIN revisions ON entry_id = rev_entry | |||||
INNER JOIN users ON rev_user = user_id | |||||
WHERE entry_slug = ?""" | |||||
try: | |||||
slug = self.slugify(data.args[1]) | |||||
except IndexError: | |||||
self.reply(data, "Please specify an entry to get info on.") | |||||
return | |||||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||||
try: | |||||
info = conn.execute(query, (slug,)).fetchall() | |||||
except sqlite.OperationalError: | |||||
info = [] | |||||
if info: | |||||
title = info[0][0] | |||||
times = [datum[1] for datum in info] | |||||
earliest = min(times) | |||||
msg = "\x0302{0}\x0F: {1} edits since {2}" | |||||
msg = msg.format(title, len(info), earliest) | |||||
if len(times) > 1: | |||||
latest = max(times) | |||||
msg += "; last edit on {0}".format(latest) | |||||
names = [datum[2] for datum in info] | |||||
msg += "; authors: {0}.".format(", ".join(list(set(names)))) | |||||
self.reply(data, msg) | |||||
else: | |||||
title = data.args[1] | |||||
self.reply(data, "Entry \x0302{0}\x0F not found.".format(title)) | |||||
def do_rename(self, data): | |||||
"""Rename an entry in the notes database.""" | |||||
query1 = """SELECT entry_id, user_host FROM entries | |||||
INNER JOIN revisions ON entry_revision = rev_id | |||||
INNER JOIN users ON rev_user = user_id | |||||
WHERE entry_slug = ?""" | |||||
query2 = """UPDATE entries SET entry_slug = ?, entry_title = ? | |||||
WHERE entry_id = ?""" | |||||
try: | |||||
slug = self.slugify(data.args[1]) | |||||
except IndexError: | |||||
self.reply(data, "Please specify an entry to rename.") | |||||
return | |||||
try: | |||||
newtitle = data.args[2] | |||||
except IndexError: | |||||
self.reply(data, "Please specify a new name for the entry.") | |||||
return | |||||
if newtitle == data.args[1]: | |||||
self.reply(data, "The old and new names are identical.") | |||||
return | |||||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||||
try: | |||||
id_, author = conn.execute(query1, (slug,)).fetchone() | |||||
except (sqlite.OperationalError, TypeError): | |||||
msg = "Entry \x0302{0}\x0F not found.".format(data.args[1]) | |||||
self.reply(data, msg) | |||||
return | |||||
permdb = self.config.irc["permissions"] | |||||
if author != data.host and not permdb.is_admin(data): | |||||
msg = "You must be an author or a bot admin to rename this entry." | |||||
self.reply(data, msg) | |||||
return | |||||
conn.execute(query2, (self.slugify(newtitle), newtitle, id_)) | |||||
msg = "Entry \x0302{0}\x0F renamed to \x0302{1}\x0F." | |||||
self.reply(data, msg.format(data.args[1], newtitle)) | |||||
def do_delete(self, data): | |||||
"""Delete an entry from the notes database.""" | |||||
query1 = """SELECT entry_id, user_host FROM entries | |||||
INNER JOIN revisions ON entry_revision = rev_id | |||||
INNER JOIN users ON rev_user = user_id | |||||
WHERE entry_slug = ?""" | |||||
query2 = "DELETE FROM entries WHERE entry_id = ?" | |||||
query3 = "DELETE FROM revisions WHERE rev_entry = ?" | |||||
try: | |||||
slug = self.slugify(data.args[1]) | |||||
except IndexError: | |||||
self.reply(data, "Please specify an entry to delete.") | |||||
return | |||||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||||
try: | |||||
id_, author = conn.execute(query1, (slug,)).fetchone() | |||||
except (sqlite.OperationalError, TypeError): | |||||
msg = "Entry \x0302{0}\x0F not found.".format(data.args[1]) | |||||
self.reply(data, msg) | |||||
return | |||||
permdb = self.config.irc["permissions"] | |||||
if author != data.host and not permdb.is_admin(data): | |||||
msg = "You must be an author or a bot admin to delete this entry." | |||||
self.reply(data, msg) | |||||
return | |||||
conn.execute(query2, (id_,)) | |||||
conn.execute(query3, (id_,)) | |||||
self.reply(data, "Entry \x0302{0}\x0F deleted.".format(data.args[1])) | |||||
def slugify(self, name): | |||||
"""Convert *name* into an identifier for storing in the database.""" | |||||
return name.lower().replace("_", "").replace("-", "") | |||||
def create_db(self, conn): | |||||
"""Initialize the notes database with its necessary tables.""" | |||||
script = """ | |||||
CREATE TABLE entries (entry_id, entry_slug, entry_title, | |||||
entry_revision); | |||||
CREATE TABLE users (user_id, user_host); | |||||
CREATE TABLE revisions (rev_id, rev_entry, rev_user, rev_timestamp, | |||||
rev_content); | |||||
""" | |||||
conn.executescript(script) | |||||
def get_next_entry(self, conn): | |||||
"""Get the next entry ID.""" | |||||
query = "SELECT MAX(entry_id) FROM entries" | |||||
later = conn.execute(query).fetchone()[0] | |||||
return later + 1 if later else 1 | |||||
def get_next_revision(self, conn): | |||||
"""Get the next revision ID.""" | |||||
query = "SELECT MAX(rev_id) FROM revisions" | |||||
later = conn.execute(query).fetchone()[0] | |||||
return later + 1 if later else 1 | |||||
def get_user(self, conn, host): | |||||
"""Get the user ID corresponding to a hostname, or make one.""" | |||||
query1 = "SELECT user_id FROM users WHERE user_host = ?" | |||||
query2 = "SELECT MAX(user_id) FROM users" | |||||
query3 = "INSERT INTO users VALUES (?, ?)" | |||||
user = conn.execute(query1, (host,)).fetchone() | |||||
if user: | |||||
return user[0] | |||||
last = conn.execute(query2).fetchone()[0] | |||||
later = last + 1 if last else 1 | |||||
conn.execute(query3, (later, host)) | |||||
return later |
@@ -0,0 +1,68 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from earwigbot.commands import Command | |||||
class Quit(Command): | |||||
"""Quit, restart, or reload components from the bot. Only the owners can | |||||
run this command.""" | |||||
name = "quit" | |||||
commands = ["quit", "restart", "reload"] | |||||
def process(self, data): | |||||
if not self.config.irc["permissions"].is_owner(data): | |||||
self.reply(data, "You must be a bot owner to use this command.") | |||||
return | |||||
if data.command == "quit": | |||||
self.do_quit(data) | |||||
elif data.command == "restart": | |||||
self.do_restart(data) | |||||
else: | |||||
self.do_reload(data) | |||||
def do_quit(self, data): | |||||
args = data.args | |||||
if data.trigger == data.my_nick: | |||||
reason = " ".join(args) | |||||
else: | |||||
if not args or args[0].lower() != data.my_nick: | |||||
self.reply(data, "To confirm this action, the first argument must be my name.") | |||||
return | |||||
reason = " ".join(args[1:]) | |||||
if reason: | |||||
self.bot.stop("Stopped by {0}: {1}".format(data.nick, reason)) | |||||
else: | |||||
self.bot.stop("Stopped by {0}".format(data.nick)) | |||||
def do_restart(self, data): | |||||
if data.args: | |||||
msg = " ".join(data.args) | |||||
self.bot.restart("Restarted by {0}: {1}".format(data.nick, msg)) | |||||
else: | |||||
self.bot.restart("Restarted by {0}".format(data.nick)) | |||||
def do_reload(self, data): | |||||
self.logger.info("{0} requested command/task reload".format(data.nick)) | |||||
self.bot.commands.load() | |||||
self.bot.tasks.load() | |||||
self.reply(data, "IRC commands and bot tasks reloaded.") |
@@ -0,0 +1,72 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import time | |||||
from earwigbot import exceptions | |||||
from earwigbot.commands import Command | |||||
class Registration(Command): | |||||
"""Return when a user registered.""" | |||||
name = "registration" | |||||
commands = ["registration", "reg", "age"] | |||||
def process(self, data): | |||||
if not data.args: | |||||
name = data.nick | |||||
else: | |||||
name = ' '.join(data.args) | |||||
site = self.bot.wiki.get_site() | |||||
user = site.get_user(name) | |||||
try: | |||||
reg = user.registration | |||||
except exceptions.UserNotFoundError: | |||||
msg = "The user \x0302{0}\x0F does not exist." | |||||
self.reply(data, msg.format(name)) | |||||
return | |||||
date = time.strftime("%b %d, %Y at %H:%M:%S UTC", reg) | |||||
age = self.get_diff(time.mktime(reg), time.mktime(time.gmtime())) | |||||
if user.gender == "male": | |||||
gender = "He's" | |||||
elif user.gender == "female": | |||||
gender = "She's" | |||||
else: | |||||
gender = "They're" # Singular they? | |||||
msg = "\x0302{0}\x0F registered on {1}. {2} {3} old." | |||||
self.reply(data, msg.format(name, date, gender, age)) | |||||
def get_diff(self, t1, t2): | |||||
parts = [("year", 31536000), ("day", 86400), ("hour", 3600), | |||||
("minute", 60), ("second", 1)] | |||||
msg = [] | |||||
for name, size in parts: | |||||
num = int(t2 - t1) / size | |||||
t1 += num * size | |||||
if num: | |||||
chunk = "{0} {1}".format(num, name if num == 1 else name + "s") | |||||
msg.append(chunk) | |||||
return ", ".join(msg) if msg else "0 seconds" |
@@ -0,0 +1,62 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from threading import Timer | |||||
import time | |||||
from earwigbot.commands import Command | |||||
class Remind(Command): | |||||
"""Set a message to be repeated to you in a certain amount of time.""" | |||||
name = "remind" | |||||
commands = ["remind", "reminder"] | |||||
def process(self, data): | |||||
if not data.args: | |||||
msg = "Please specify a time (in seconds) and a message in the following format: !remind <time> <msg>." | |||||
self.reply(data, msg) | |||||
return | |||||
try: | |||||
wait = int(data.args[0]) | |||||
except ValueError: | |||||
msg = "The time must be given as an integer, in seconds." | |||||
self.reply(data, msg) | |||||
return | |||||
message = ' '.join(data.args[1:]) | |||||
if not message: | |||||
msg = "What message do you want me to give you when time is up?" | |||||
self.reply(data, msg) | |||||
return | |||||
end = time.localtime(time.time() + wait) | |||||
end_time = time.strftime("%b %d %H:%M:%S", end) | |||||
end_time_with_timezone = time.strftime("%b %d %H:%M:%S %Z", end) | |||||
msg = 'Set reminder for "{0}" in {1} seconds (ends {2}).' | |||||
msg = msg.format(message, wait, end_time_with_timezone) | |||||
self.reply(data, msg) | |||||
t_reminder = Timer(wait, self.reply, args=(data, message)) | |||||
t_reminder.name = "reminder " + end_time | |||||
t_reminder.daemon = True | |||||
t_reminder.start() |
@@ -0,0 +1,52 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from earwigbot import exceptions | |||||
from earwigbot.commands import Command | |||||
class Rights(Command): | |||||
"""Retrieve a list of rights for a given username.""" | |||||
name = "rights" | |||||
commands = ["rights", "groups", "permissions", "privileges"] | |||||
def process(self, data): | |||||
if not data.args: | |||||
name = data.nick | |||||
else: | |||||
name = ' '.join(data.args) | |||||
site = self.bot.wiki.get_site() | |||||
user = site.get_user(name) | |||||
try: | |||||
rights = user.groups | |||||
except exceptions.UserNotFoundError: | |||||
msg = "The user \x0302{0}\x0F does not exist." | |||||
self.reply(data, msg.format(name)) | |||||
return | |||||
try: | |||||
rights.remove("*") # Remove the '*' group given to everyone | |||||
except ValueError: | |||||
pass | |||||
msg = "The rights for \x0302{0}\x0F are {1}." | |||||
self.reply(data, msg.format(name, ', '.join(rights))) |
@@ -0,0 +1,37 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import random | |||||
from earwigbot.commands import Command | |||||
class Test(Command): | |||||
"""Test the bot!""" | |||||
name = "test" | |||||
def process(self, data): | |||||
user = "\x02" + data.nick + "\x0F" # Wrap nick in bold | |||||
hey = random.randint(0, 1) | |||||
if hey: | |||||
self.say(data.chan, "Hey {0}!".format(user)) | |||||
else: | |||||
self.say(data.chan, "'Sup {0}?".format(user)) |
@@ -0,0 +1,143 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import threading | |||||
import re | |||||
from earwigbot.commands import Command | |||||
class Threads(Command): | |||||
"""Manage wiki tasks from IRC, and check on thread status.""" | |||||
name = "threads" | |||||
commands = ["tasks", "task", "threads", "tasklist"] | |||||
def process(self, data): | |||||
self.data = data | |||||
if not self.config.irc["permissions"].is_owner(data): | |||||
msg = "You must be a bot owner to use this command." | |||||
self.reply(data, msg) | |||||
return | |||||
if not data.args: | |||||
if data.command == "tasklist": | |||||
self.do_list() | |||||
else: | |||||
msg = "No arguments provided. Maybe you wanted '!{0} list', '!{0} start', or '!{0} listall'?" | |||||
self.reply(data, msg.format(data.command)) | |||||
return | |||||
if data.args[0] == "list": | |||||
self.do_list() | |||||
elif data.args[0] == "start": | |||||
self.do_start() | |||||
elif data.args[0] in ["listall", "all"]: | |||||
self.do_listall() | |||||
else: # They asked us to do something we don't know | |||||
msg = "Unknown argument: \x0303{0}\x0F.".format(data.args[0]) | |||||
self.reply(data, msg) | |||||
def do_list(self): | |||||
"""With !tasks list (or abbreviation !tasklist), list all running | |||||
threads. This includes the main threads, like the irc frontend and the | |||||
watcher, and task threads.""" | |||||
threads = threading.enumerate() | |||||
normal_threads = [] | |||||
daemon_threads = [] | |||||
for thread in threads: | |||||
tname = thread.name | |||||
if tname == "MainThread": | |||||
t = "\x0302MainThread\x0F (id {0})" | |||||
normal_threads.append(t.format(thread.ident)) | |||||
elif tname in self.config.components: | |||||
t = "\x0302{0}\x0F (id {1})" | |||||
normal_threads.append(t.format(tname, thread.ident)) | |||||
elif tname.startswith("reminder"): | |||||
tname = tname.replace("reminder ", "") | |||||
t = "\x0302reminder\x0F (until {0})" | |||||
normal_threads.append(t.format(tname)) | |||||
else: | |||||
tname, start_time = re.findall("^(.*?) \((.*?)\)$", tname)[0] | |||||
t = "\x0302{0}\x0F (id {1}, since {2})" | |||||
daemon_threads.append(t.format(tname, thread.ident, | |||||
start_time)) | |||||
if daemon_threads: | |||||
if len(daemon_threads) > 1: | |||||
msg = "\x02{0}\x0F threads active: {1}, and \x02{2}\x0F command/task threads: {3}." | |||||
else: | |||||
msg = "\x02{0}\x0F threads active: {1}, and \x02{2}\x0F command/task thread: {3}." | |||||
msg = msg.format(len(threads), ', '.join(normal_threads), | |||||
len(daemon_threads), ', '.join(daemon_threads)) | |||||
else: | |||||
msg = "\x02{0}\x0F threads active: {1}, and \x020\x0F command/task threads." | |||||
msg = msg.format(len(threads), ', '.join(normal_threads)) | |||||
self.reply(self.data, msg) | |||||
def do_listall(self): | |||||
"""With !tasks listall or !tasks all, list all loaded tasks, and report | |||||
whether they are currently running or idle.""" | |||||
threads = threading.enumerate() | |||||
tasklist = [] | |||||
for task in sorted([task.name for task in self.bot.tasks]): | |||||
threadlist = [t for t in threads if t.name.startswith(task)] | |||||
ids = [str(t.ident) for t in threadlist] | |||||
if not ids: | |||||
tasklist.append("\x0302{0}\x0F (idle)".format(task)) | |||||
elif len(ids) == 1: | |||||
t = "\x0302{0}\x0F (\x02active\x0F as id {1})" | |||||
tasklist.append(t.format(task, ids[0])) | |||||
else: | |||||
t = "\x0302{0}\x0F (\x02active\x0F as ids {1})" | |||||
tasklist.append(t.format(task, ', '.join(ids))) | |||||
tasks = ", ".join(tasklist) | |||||
msg = "\x02{0}\x0F tasks loaded: {1}.".format(len(tasklist), tasks) | |||||
self.reply(self.data, msg) | |||||
def do_start(self): | |||||
"""With !tasks start, start any loaded task by name with or without | |||||
kwargs.""" | |||||
data = self.data | |||||
try: | |||||
task_name = data.args[1] | |||||
except IndexError: # No task name given | |||||
self.reply(data, "What task do you want me to start?") | |||||
return | |||||
if task_name not in [task.name for task in self.bot.tasks]: | |||||
# This task does not exist or hasn't been loaded: | |||||
msg = "Task could not be found; either it doesn't exist, or it wasn't loaded correctly." | |||||
self.reply(data, msg.format(task_name)) | |||||
return | |||||
data.kwargs["fromIRC"] = True | |||||
self.bot.tasks.start(task_name, **data.kwargs) | |||||
msg = "Task \x0302{0}\x0F started.".format(task_name) | |||||
self.reply(data, msg) |
@@ -0,0 +1,65 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from datetime import datetime | |||||
from math import floor | |||||
from time import time | |||||
import pytz | |||||
from earwigbot.commands import Command | |||||
class Time(Command): | |||||
"""Report the current time in any timezone (UTC default), or in beats.""" | |||||
name = "time" | |||||
commands = ["time", "beats", "swatch"] | |||||
def process(self, data): | |||||
if data.command in ["beats", "swatch"]: | |||||
self.do_beats(data) | |||||
return | |||||
if data.args: | |||||
timezone = data.args[0] | |||||
else: | |||||
timezone = "UTC" | |||||
if timezone in ["beats", "swatch"]: | |||||
self.do_beats(data) | |||||
else: | |||||
self.do_time(data, timezone) | |||||
def do_beats(self, data): | |||||
beats = ((time() + 3600) % 86400) / 86.4 | |||||
beats = int(floor(beats)) | |||||
self.reply(data, "@{0:0>3}".format(beats)) | |||||
def do_time(self, data, timezone): | |||||
if not pytz: | |||||
msg = "This command requires the 'pytz' module: http://pytz.sourceforge.net/" | |||||
self.reply(data, msg) | |||||
return | |||||
try: | |||||
tzinfo = pytz.timezone(timezone) | |||||
except pytz.exceptions.UnknownTimeZoneError: | |||||
self.reply(data, "Unknown timezone: {0}.".format(timezone)) | |||||
return | |||||
now = pytz.utc.localize(datetime.utcnow()).astimezone(tzinfo) | |||||
self.reply(data, now.strftime("%Y-%m-%d %H:%M:%S %Z")) |
@@ -0,0 +1,48 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from unicodedata import normalize | |||||
from earwigbot.commands import Command | |||||
class Trout(Command): | |||||
"""Slap someone with a trout, or related fish.""" | |||||
name = "trout" | |||||
commands = ["trout", "whale"] | |||||
def setup(self): | |||||
try: | |||||
self.exceptions = self.config.commands[self.name]["exceptions"] | |||||
except KeyError: | |||||
self.exceptions = {} | |||||
def process(self, data): | |||||
animal = data.command | |||||
target = " ".join(data.args) or data.nick | |||||
target = "himself" if target == "yourself" else target | |||||
normal = normalize("NFKD", target.decode("utf8")).lower() | |||||
if normal in self.exceptions: | |||||
self.reply(data, self.exceptions[normal]) | |||||
else: | |||||
msg = "slaps \x02{0}\x0F around a bit with a large {1}." | |||||
self.action(data.chan, msg.format(target, animal)) |
@@ -0,0 +1,347 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from collections import OrderedDict | |||||
from getpass import getpass | |||||
from hashlib import sha256 | |||||
import logging | |||||
import logging.handlers | |||||
from os import mkdir, path | |||||
import stat | |||||
from Crypto.Cipher import Blowfish | |||||
import bcrypt | |||||
import yaml | |||||
from earwigbot.config.formatter import BotFormatter | |||||
from earwigbot.config.node import ConfigNode | |||||
from earwigbot.config.ordered_yaml import OrderedLoader | |||||
from earwigbot.config.permissions import PermissionsDB | |||||
from earwigbot.config.script import ConfigScript | |||||
from earwigbot.exceptions import NoConfigError | |||||
__all__ = ["BotConfig"] | |||||
class BotConfig(object): | |||||
""" | |||||
**EarwigBot: YAML Config File Manager** | |||||
This handles all tasks involving reading and writing to our config file, | |||||
including encrypting and decrypting passwords and making a new config file | |||||
from scratch at the inital bot run. | |||||
BotConfig has a few attributes and methods, including the following: | |||||
- :py:attr:`root_dir`: bot's working directory; contains | |||||
:file:`config.yml`, :file:`logs/` | |||||
- :py:attr:`path`: path to the bot's config file | |||||
- :py:attr:`components`: enabled components | |||||
- :py:attr:`wiki`: information about wiki-editing | |||||
- :py:attr:`irc`: information about IRC | |||||
- :py:attr:`commands`: information about IRC commands | |||||
- :py:attr:`tasks`: information for bot tasks | |||||
- :py:attr:`metadata`: miscellaneous information | |||||
- :py:meth:`schedule`: tasks scheduled to run at a given time | |||||
BotConfig also has some methods used in config loading: | |||||
- :py:meth:`load`: loads (or reloads) and parses our config file | |||||
- :py:meth:`decrypt`: decrypts an object in the config tree | |||||
""" | |||||
def __init__(self, bot, root_dir, level): | |||||
self._bot = bot | |||||
self._root_dir = root_dir | |||||
self._logging_level = level | |||||
self._config_path = path.join(self.root_dir, "config.yml") | |||||
self._log_dir = path.join(self.root_dir, "logs") | |||||
perms_file = path.join(self.root_dir, "permissions.db") | |||||
self._permissions = PermissionsDB(perms_file) | |||||
self._decryption_cipher = None | |||||
self._data = None | |||||
self._components = ConfigNode() | |||||
self._wiki = ConfigNode() | |||||
self._irc = ConfigNode() | |||||
self._commands = ConfigNode() | |||||
self._tasks = ConfigNode() | |||||
self._metadata = ConfigNode() | |||||
self._nodes = [self._components, self._wiki, self._irc, self._commands, | |||||
self._tasks, self._metadata] | |||||
self._decryptable_nodes = [ # Default nodes to decrypt | |||||
(self._wiki, ("password",)), | |||||
(self._wiki, ("search", "credentials", "key")), | |||||
(self._wiki, ("search", "credentials", "secret")), | |||||
(self._irc, ("frontend", "nickservPassword")), | |||||
(self._irc, ("watcher", "nickservPassword")), | |||||
] | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the BotConfig.""" | |||||
res = "BotConfig(root_dir={0!r}, level={1!r})" | |||||
return res.format(self.root_dir, self.logging_level) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the BotConfig.""" | |||||
return "<BotConfig at {0}>".format(self.root_dir) | |||||
def _handle_missing_config(self): | |||||
print "Config file missing or empty:", self._config_path | |||||
msg = "Would you like to create a config file now? [Y/n] " | |||||
choice = raw_input(msg) | |||||
if choice.lower().startswith("n"): | |||||
raise NoConfigError() | |||||
else: | |||||
try: | |||||
ConfigScript(self).make_new() | |||||
except KeyboardInterrupt: | |||||
raise NoConfigError() | |||||
def _load(self): | |||||
"""Load data from our JSON config file (config.yml) into self._data.""" | |||||
filename = self._config_path | |||||
with open(filename, 'r') as fp: | |||||
try: | |||||
self._data = yaml.load(fp, OrderedLoader) | |||||
except yaml.YAMLError: | |||||
print "Error parsing config file {0}:".format(filename) | |||||
raise | |||||
def _setup_logging(self): | |||||
"""Configures the logging module so it works the way we want it to.""" | |||||
log_dir = self._log_dir | |||||
logger = logging.getLogger("earwigbot") | |||||
logger.handlers = [] # Remove any handlers already attached to us | |||||
logger.setLevel(logging.DEBUG) | |||||
color_formatter = BotFormatter(color=True) | |||||
formatter = BotFormatter() | |||||
if self.metadata.get("enableLogging"): | |||||
hand = logging.handlers.TimedRotatingFileHandler | |||||
logfile = lambda f: path.join(log_dir, f) | |||||
if not path.isdir(log_dir): | |||||
if not path.exists(log_dir): | |||||
mkdir(log_dir, stat.S_IWUSR|stat.S_IRUSR|stat.S_IXUSR) | |||||
else: | |||||
msg = "log_dir ({0}) exists but is not a directory!" | |||||
print msg.format(log_dir) | |||||
return | |||||
main_handler = hand(logfile("bot.log"), "midnight", 1, 7) | |||||
error_handler = hand(logfile("error.log"), "W6", 1, 4) | |||||
debug_handler = hand(logfile("debug.log"), "H", 1, 6) | |||||
main_handler.setLevel(logging.INFO) | |||||
error_handler.setLevel(logging.WARNING) | |||||
debug_handler.setLevel(logging.DEBUG) | |||||
for h in (main_handler, error_handler, debug_handler): | |||||
h.setFormatter(formatter) | |||||
logger.addHandler(h) | |||||
self._stream_handler = stream = logging.StreamHandler() | |||||
stream.setLevel(self._logging_level) | |||||
stream.setFormatter(color_formatter) | |||||
logger.addHandler(stream) | |||||
def _decrypt(self, node, nodes): | |||||
"""Try to decrypt the contents of a config node. Use self.decrypt().""" | |||||
try: | |||||
node._decrypt(self._decryption_cipher, nodes[:-1], nodes[-1]) | |||||
except ValueError: | |||||
print "Error decrypting passwords:" | |||||
raise | |||||
@property | |||||
def bot(self): | |||||
"""The config's Bot object.""" | |||||
return self._bot | |||||
@property | |||||
def root_dir(self): | |||||
"""The bot's root directory containing its config file and more.""" | |||||
return self._root_dir | |||||
@property | |||||
def logging_level(self): | |||||
"""The minimum logging level for messages logged via stdout.""" | |||||
return self._logging_level | |||||
@logging_level.setter | |||||
def logging_level(self, level): | |||||
self._logging_level = level | |||||
self._stream_handler.setLevel(level) | |||||
@property | |||||
def path(self): | |||||
"""The path to the bot's config file.""" | |||||
return self._config_path | |||||
@property | |||||
def log_dir(self): | |||||
"""The directory containing the bot's logs.""" | |||||
return self._log_dir | |||||
@property | |||||
def data(self): | |||||
"""The entire config file as a decoded JSON object.""" | |||||
return self._data | |||||
@property | |||||
def components(self): | |||||
"""A dict of enabled components.""" | |||||
return self._components | |||||
@property | |||||
def wiki(self): | |||||
"""A dict of information about wiki-editing.""" | |||||
return self._wiki | |||||
@property | |||||
def irc(self): | |||||
"""A dict of information about IRC.""" | |||||
return self._irc | |||||
@property | |||||
def commands(self): | |||||
"""A dict of information for IRC commands.""" | |||||
return self._commands | |||||
@property | |||||
def tasks(self): | |||||
"""A dict of information for bot tasks.""" | |||||
return self._tasks | |||||
@property | |||||
def metadata(self): | |||||
"""A dict of miscellaneous information.""" | |||||
return self._metadata | |||||
def is_loaded(self): | |||||
"""Return ``True`` if our config file has been loaded, or ``False``.""" | |||||
return self._data is not None | |||||
def is_encrypted(self): | |||||
"""Return ``True`` if passwords are encrypted, otherwise ``False``.""" | |||||
return self.metadata.get("encryptPasswords", False) | |||||
def load(self): | |||||
"""Load, or reload, our config file. | |||||
First, check if we have a valid config file, and if not, notify the | |||||
user. If there is no config file at all, offer to make one, otherwise | |||||
exit. | |||||
Data from the config file is stored in six | |||||
:py:class:`~earwigbot.config.ConfigNode`\ s (:py:attr:`components`, | |||||
:py:attr:`wiki`, :py:attr:`irc`, :py:attr:`commands`, :py:attr:`tasks`, | |||||
:py:attr:`metadata`) for easy access (as well as the lower-level | |||||
:py:attr:`data` attribute). If passwords are encrypted, we'll use | |||||
:py:func:`~getpass.getpass` for the key and then decrypt them. If the | |||||
config is being reloaded, encrypted items will be automatically | |||||
decrypted if they were decrypted earlier. | |||||
""" | |||||
if not path.exists(self._config_path): | |||||
self._handle_missing_config() | |||||
self._load() | |||||
if not self._data: | |||||
self._handle_missing_config() | |||||
self._load() | |||||
self.components._load(self._data.get("components", OrderedDict())) | |||||
self.wiki._load(self._data.get("wiki", OrderedDict())) | |||||
self.irc._load(self._data.get("irc", OrderedDict())) | |||||
self.commands._load(self._data.get("commands", OrderedDict())) | |||||
self.tasks._load(self._data.get("tasks", OrderedDict())) | |||||
self.metadata._load(self._data.get("metadata", OrderedDict())) | |||||
self._setup_logging() | |||||
if self.is_encrypted(): | |||||
if not self._decryption_cipher: | |||||
key = getpass("Enter key to decrypt bot passwords: ") | |||||
self._decryption_cipher = Blowfish.new(sha256(key).digest()) | |||||
signature = self.metadata["signature"] | |||||
if bcrypt.hashpw(key, signature) != signature: | |||||
raise RuntimeError("Incorrect password.") | |||||
for node, nodes in self._decryptable_nodes: | |||||
self._decrypt(node, nodes) | |||||
if self.irc: | |||||
self.irc["permissions"] = self._permissions | |||||
self._permissions.load() | |||||
def decrypt(self, node, *nodes): | |||||
"""Decrypt an object in our config tree. | |||||
:py:attr:`_decryption_cipher` is used as our key, retrieved using | |||||
:py:func:`~getpass.getpass` in :py:meth:`load` if it wasn't already | |||||
specified. If this is called when passwords are not encrypted (check | |||||
with :py:meth:`is_encrypted`), nothing will happen. We'll also keep | |||||
track of this node if :py:meth:`load` is called again (i.e. to reload) | |||||
and automatically decrypt it. | |||||
Example usage:: | |||||
>>> config.decrypt(config.irc, "frontend", "nickservPassword") | |||||
# decrypts config.irc["frontend"]["nickservPassword"] | |||||
""" | |||||
signature = (node, nodes) | |||||
if signature in self._decryptable_nodes: | |||||
return # Already decrypted | |||||
self._decryptable_nodes.append(signature) | |||||
if self.is_encrypted(): | |||||
self._decrypt(node, nodes) | |||||
def schedule(self, minute, hour, month_day, month, week_day): | |||||
"""Return a list of tasks scheduled to run at the specified time. | |||||
The schedule data comes from our config file's ``schedule`` field, | |||||
which is stored as :py:attr:`self.data["schedule"] <data>`. | |||||
""" | |||||
# Tasks to run this turn, each as a list of either [task_name, kwargs], | |||||
# or just the task_name: | |||||
tasks = [] | |||||
now = {"minute": minute, "hour": hour, "month_day": month_day, | |||||
"month": month, "week_day": week_day} | |||||
data = self._data.get("schedule", []) | |||||
for event in data: | |||||
do = True | |||||
for key, value in now.items(): | |||||
try: | |||||
requirement = event[key] | |||||
except KeyError: | |||||
continue | |||||
if requirement != value: | |||||
do = False | |||||
break | |||||
if do: | |||||
try: | |||||
tasks.extend(event["tasks"]) | |||||
except KeyError: | |||||
pass | |||||
return tasks |
@@ -0,0 +1,51 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import logging | |||||
__all__ = ["BotFormatter"] | |||||
class BotFormatter(logging.Formatter): | |||||
def __init__(self, color=False): | |||||
self._format = super(BotFormatter, self).format | |||||
if color: | |||||
fmt = "[%(asctime)s %(lvl)s] %(name)s: %(message)s" | |||||
self.format = lambda rec: self._format(self.format_color(rec)) | |||||
else: | |||||
fmt = "[%(asctime)s %(levelname)-8s] %(name)s: %(message)s" | |||||
self.format = self._format | |||||
datefmt = "%Y-%m-%d %H:%M:%S" | |||||
super(BotFormatter, self).__init__(fmt=fmt, datefmt=datefmt) | |||||
def format_color(self, record): | |||||
l = record.levelname.ljust(8) | |||||
if record.levelno == logging.DEBUG: | |||||
record.lvl = l.join(("\x1b[34m", "\x1b[0m")) # Blue | |||||
if record.levelno == logging.INFO: | |||||
record.lvl = l.join(("\x1b[32m", "\x1b[0m")) # Green | |||||
if record.levelno == logging.WARNING: | |||||
record.lvl = l.join(("\x1b[33m", "\x1b[0m")) # Yellow | |||||
if record.levelno == logging.ERROR: | |||||
record.lvl = l.join(("\x1b[31m", "\x1b[0m")) # Red | |||||
if record.levelno == logging.CRITICAL: | |||||
record.lvl = l.join(("\x1b[1m\x1b[31m", "\x1b[0m")) # Bold red | |||||
return record |
@@ -0,0 +1,104 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from collections import OrderedDict | |||||
__all__ = ["ConfigNode"] | |||||
class ConfigNode(object): | |||||
def __init__(self): | |||||
self._data = OrderedDict() | |||||
def __repr__(self): | |||||
return self._data | |||||
def __nonzero__(self): | |||||
return bool(self._data) | |||||
def __len__(self): | |||||
return len(self._data) | |||||
def __getitem__(self, key): | |||||
return self._data[key] | |||||
def __setitem__(self, key, item): | |||||
self._data[key] = item | |||||
def __getattr__(self, key): | |||||
if key == "_data": | |||||
return super(ConfigNode, self).__getattr__(key) | |||||
return self._data[key] | |||||
def __setattr__(self, key, item): | |||||
if key == "_data": | |||||
super(ConfigNode, self).__setattr__(key, item) | |||||
else: | |||||
self._data[key] = item | |||||
def __iter__(self): | |||||
for key in self._data: | |||||
yield key | |||||
def __contains__(self, item): | |||||
return item in self._data | |||||
def _dump(self): | |||||
data = self._data.copy() | |||||
for key, val in data.iteritems(): | |||||
if isinstance(val, ConfigNode): | |||||
data[key] = val._dump() | |||||
return data | |||||
def _load(self, data): | |||||
self._data = data.copy() | |||||
def _decrypt(self, cipher, intermediates, item): | |||||
base = self._data | |||||
for inter in intermediates: | |||||
try: | |||||
base = base[inter] | |||||
except KeyError: | |||||
return | |||||
if item in base: | |||||
ciphertext = base[item].decode("hex") | |||||
base[item] = cipher.decrypt(ciphertext).rstrip("\x00") | |||||
def get(self, *args, **kwargs): | |||||
return self._data.get(*args, **kwargs) | |||||
def keys(self): | |||||
return self._data.keys() | |||||
def values(self): | |||||
return self._data.values() | |||||
def items(self): | |||||
return self._data.items() | |||||
def iterkeys(self): | |||||
return self._data.iterkeys() | |||||
def itervalues(self): | |||||
return self._data.itervalues() | |||||
def iteritems(self): | |||||
return self._data.iteritems() |
@@ -0,0 +1,106 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
""" | |||||
Based on: | |||||
* https://gist.github.com/844388 | |||||
* http://pyyaml.org/attachment/ticket/161/use_ordered_dict.py | |||||
with modifications. | |||||
""" | |||||
from collections import OrderedDict | |||||
import yaml | |||||
__all__ = ["OrderedLoader", "OrderedDumper"] | |||||
class OrderedLoader(yaml.Loader): | |||||
"""A YAML loader that loads mappings into ordered dictionaries.""" | |||||
def __init__(self, *args, **kwargs): | |||||
super(OrderedLoader, self).__init__(*args, **kwargs) | |||||
constructor = type(self).construct_yaml_map | |||||
self.add_constructor(u"tag:yaml.org,2002:map", constructor) | |||||
self.add_constructor(u"tag:yaml.org,2002:omap", constructor) | |||||
def construct_yaml_map(self, node): | |||||
data = OrderedDict() | |||||
yield data | |||||
value = self.construct_mapping(node) | |||||
data.update(value) | |||||
def construct_mapping(self, node, deep=False): | |||||
if isinstance(node, yaml.MappingNode): | |||||
self.flatten_mapping(node) | |||||
else: | |||||
raise yaml.constructor.ConstructorError(None, None, | |||||
"expected a mapping node, but found {0}".format(node.id), | |||||
node.start_mark) | |||||
mapping = OrderedDict() | |||||
for key_node, value_node in node.value: | |||||
key = self.construct_object(key_node, deep=deep) | |||||
try: | |||||
hash(key) | |||||
except TypeError, exc: | |||||
raise yaml.constructor.ConstructorError( | |||||
"while constructing a mapping", node.start_mark, | |||||
"found unacceptable key ({0})".format(exc), | |||||
key_node.start_mark) | |||||
value = self.construct_object(value_node, deep=deep) | |||||
mapping[key] = value | |||||
return mapping | |||||
class OrderedDumper(yaml.SafeDumper): | |||||
"""A YAML dumper that dumps ordered dictionaries into mappings.""" | |||||
def __init__(self, *args, **kwargs): | |||||
super(OrderedDumper, self).__init__(*args, **kwargs) | |||||
self.add_representer(OrderedDict, type(self).represent_dict) | |||||
def represent_mapping(self, tag, mapping, flow_style=None): | |||||
value = [] | |||||
node = yaml.MappingNode(tag, value, flow_style=flow_style) | |||||
if self.alias_key is not None: | |||||
self.represented_objects[self.alias_key] = node | |||||
best_style = True | |||||
if hasattr(mapping, "items"): | |||||
mapping = list(mapping.items()) | |||||
for item_key, item_value in mapping: | |||||
node_key = self.represent_data(item_key) | |||||
node_value = self.represent_data(item_value) | |||||
if not (isinstance(node_key, yaml.ScalarNode) and not | |||||
node_key.style): | |||||
best_style = False | |||||
if not (isinstance(node_value, yaml.ScalarNode) and not | |||||
node_value.style): | |||||
best_style = False | |||||
value.append((node_key, node_value)) | |||||
if flow_style is None: | |||||
if self.default_flow_style is not None: | |||||
node.flow_style = self.default_flow_style | |||||
else: | |||||
node.flow_style = best_style | |||||
return node |
@@ -0,0 +1,176 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from fnmatch import fnmatch | |||||
import sqlite3 as sqlite | |||||
from threading import Lock | |||||
__all__ = ["PermissionsDB"] | |||||
class PermissionsDB(object): | |||||
""" | |||||
**EarwigBot: Permissions Database Manager** | |||||
Controls the :file:`permissions.db` file, which stores the bot's owners and | |||||
admins for the purposes of using certain dangerous IRC commands. | |||||
""" | |||||
ADMIN = 1 | |||||
OWNER = 2 | |||||
def __init__(self, dbfile): | |||||
self._dbfile = dbfile | |||||
self._db_access_lock = Lock() | |||||
self._data = {} | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the PermissionsDB.""" | |||||
res = "PermissionsDB(dbfile={0!r})" | |||||
return res.format(self._dbfile) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the PermissionsDB.""" | |||||
return "<PermissionsDB at {0}>".format(self._dbfile) | |||||
def _create(self, conn): | |||||
"""Initialize the permissions database with its necessary tables.""" | |||||
query = """CREATE TABLE users (user_nick, user_ident, user_host, | |||||
user_rank)""" | |||||
conn.execute(query) | |||||
def _is_rank(self, user, rank): | |||||
"""Return True if the given user has the given rank, else False.""" | |||||
try: | |||||
for rule in self._data[rank]: | |||||
if user in rule: | |||||
return rule | |||||
except KeyError: | |||||
pass | |||||
return False | |||||
def _set_rank(self, user, rank): | |||||
"""Add a User to the database under a given rank.""" | |||||
query = "INSERT INTO users VALUES (?, ?, ?, ?)" | |||||
with self._db_access_lock: | |||||
with sqlite.connect(self._dbfile) as conn: | |||||
conn.execute(query, (user.nick, user.ident, user.host, rank)) | |||||
try: | |||||
self._data[rank].append(user) | |||||
except KeyError: | |||||
self._data[rank] = [user] | |||||
return user | |||||
def _del_rank(self, user, rank): | |||||
"""Remove a User from the database.""" | |||||
query = """DELETE FROM users WHERE user_nick = ? AND user_ident = ? AND | |||||
user_host = ? AND user_rank = ?""" | |||||
with self._db_access_lock: | |||||
try: | |||||
for rule in self._data[rank]: | |||||
if user in rule: | |||||
with sqlite.connect(self._dbfile) as conn: | |||||
args = (user.nick, user.ident, user.host, rank) | |||||
conn.execute(query, args) | |||||
self._data[rank].remove(rule) | |||||
return rule | |||||
except KeyError: | |||||
pass | |||||
return None | |||||
@property | |||||
def data(self): | |||||
"""A dict of all entries in the permissions database.""" | |||||
return self._data | |||||
def load(self): | |||||
"""Load permissions from an existing database, or create a new one.""" | |||||
query = "SELECT user_nick, user_ident, user_host, user_rank FROM users" | |||||
self._data = {} | |||||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||||
try: | |||||
for nick, ident, host, rank in conn.execute(query): | |||||
try: | |||||
self._data[rank].append(_User(nick, ident, host)) | |||||
except KeyError: | |||||
self._data[rank] = [_User(nick, ident, host)] | |||||
except sqlite.OperationalError: | |||||
self._create(conn) | |||||
def has_exact(self, rank, nick="*", ident="*", host="*"): | |||||
"""Return ``True`` if there is an exact match for this rule.""" | |||||
try: | |||||
for usr in self._data[rank]: | |||||
if nick != usr.nick or ident != usr.ident or host != usr.host: | |||||
continue | |||||
return usr | |||||
except KeyError: | |||||
pass | |||||
return False | |||||
def is_admin(self, data): | |||||
"""Return ``True`` if the given user is a bot admin, else ``False``.""" | |||||
user = _User(data.nick, data.ident, data.host) | |||||
return self._is_rank(user, rank=self.ADMIN) | |||||
def is_owner(self, data): | |||||
"""Return ``True`` if the given user is a bot owner, else ``False``.""" | |||||
user = _User(data.nick, data.ident, data.host) | |||||
return self._is_rank(user, rank=self.OWNER) | |||||
def add_admin(self, nick="*", ident="*", host="*"): | |||||
"""Add a nick/ident/host combo to the bot admins list.""" | |||||
return self._set_rank(_User(nick, ident, host), rank=self.ADMIN) | |||||
def add_owner(self, nick="*", ident="*", host="*"): | |||||
"""Add a nick/ident/host combo to the bot owners list.""" | |||||
return self._set_rank(_User(nick, ident, host), rank=self.OWNER) | |||||
def remove_admin(self, nick="*", ident="*", host="*"): | |||||
"""Remove a nick/ident/host combo to the bot admins list.""" | |||||
return self._del_rank(_User(nick, ident, host), rank=self.ADMIN) | |||||
def remove_owner(self, nick="*", ident="*", host="*"): | |||||
"""Remove a nick/ident/host combo to the bot owners list.""" | |||||
return self._del_rank(_User(nick, ident, host), rank=self.OWNER) | |||||
class _User(object): | |||||
"""A class that represents an IRC user for the purpose of testing rules.""" | |||||
def __init__(self, nick, ident, host): | |||||
self.nick = nick | |||||
self.ident = ident | |||||
self.host = host | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the User.""" | |||||
res = "_User(nick={0!r}, ident={1!r}, host={2!r})" | |||||
return res.format(self.nick, self.ident, self.host) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the User.""" | |||||
return "{0}!{1}@{2}".format(self.nick, self.ident, self.host) | |||||
def __contains__(self, user): | |||||
if fnmatch(user.nick, self.nick): | |||||
if fnmatch(user.ident, self.ident): | |||||
if fnmatch(user.host, self.host): | |||||
return True | |||||
return False |
@@ -0,0 +1,446 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from collections import OrderedDict | |||||
from getpass import getpass | |||||
from hashlib import sha256 | |||||
from os import chmod, mkdir, path | |||||
import re | |||||
import stat | |||||
import sys | |||||
from textwrap import fill, wrap | |||||
from Crypto.Cipher import Blowfish | |||||
import bcrypt | |||||
import yaml | |||||
from earwigbot import exceptions | |||||
from earwigbot.config.ordered_yaml import OrderedDumper | |||||
__all__ = ["ConfigScript"] | |||||
RULES_TEMPLATE = """# -*- coding: utf-8 -*- | |||||
def process(bot, rc): | |||||
\"\"\"Given a Bot() object and an RC() object, return a list of channels | |||||
to report this event to. Also, start any wiki bot tasks within this | |||||
function if necessary.\"\"\" | |||||
pass | |||||
""" | |||||
class ConfigScript(object): | |||||
"""A script to guide a user through the creation of a new config file.""" | |||||
WIDTH = 79 | |||||
PROMPT = "\x1b[32m> \x1b[0m" | |||||
BCRYPT_ROUNDS = 12 | |||||
def __init__(self, config): | |||||
self.config = config | |||||
self.data = OrderedDict([ | |||||
("metadata", OrderedDict()), | |||||
("components", OrderedDict()), | |||||
("wiki", OrderedDict()), | |||||
("irc", OrderedDict()), | |||||
("commands", OrderedDict()), | |||||
("tasks", OrderedDict()), | |||||
("schedule", []) | |||||
]) | |||||
self._cipher = None | |||||
self._wmf = False | |||||
self._proj = None | |||||
self._lang = None | |||||
def _print(self, text): | |||||
print fill(re.sub("\s\s+", " ", text), self.WIDTH) | |||||
def _print_no_nl(self, text): | |||||
sys.stdout.write(fill(re.sub("\s\s+", " ", text), self.WIDTH)) | |||||
sys.stdout.flush() | |||||
def _pause(self): | |||||
raw_input(self.PROMPT + "Press enter to continue: ") | |||||
def _ask(self, text, default=None, require=True): | |||||
text = self.PROMPT + text | |||||
if default: | |||||
text += " \x1b[33m[{0}]\x1b[0m".format(default) | |||||
lines = wrap(re.sub("\s\s+", " ", text), self.WIDTH) | |||||
if len(lines) > 1: | |||||
print "\n".join(lines[:-1]) | |||||
while True: | |||||
answer = raw_input(lines[-1] + " ") or default | |||||
if answer or not require: | |||||
return answer | |||||
def _ask_bool(self, text, default=True): | |||||
text = self.PROMPT + text | |||||
if default: | |||||
text += " \x1b[33m[Y/n]\x1b[0m" | |||||
else: | |||||
text += " \x1b[33m[y/N]\x1b[0m" | |||||
lines = wrap(re.sub("\s\s+", " ", text), self.WIDTH) | |||||
if len(lines) > 1: | |||||
print "\n".join(lines[:-1]) | |||||
while True: | |||||
answer = raw_input(lines[-1] + " ").lower() | |||||
if not answer: | |||||
return default | |||||
if answer.startswith("y"): | |||||
return True | |||||
if answer.startswith("n"): | |||||
return False | |||||
def _ask_pass(self, text, encrypt=True): | |||||
password = getpass(self.PROMPT + text + " ") | |||||
if encrypt: | |||||
return self._encrypt(password) | |||||
return password | |||||
def _encrypt(self, password): | |||||
if self._cipher: | |||||
mod = len(password) % 8 | |||||
if mod: | |||||
password = password.ljust(len(password) + (8 - mod), "\x00") | |||||
return self._cipher.encrypt(password).encode("hex") | |||||
else: | |||||
return password | |||||
def _ask_list(self, text): | |||||
print fill(re.sub("\s\s+", " ", self.PROMPT + text), self.WIDTH) | |||||
print "[one item per line; blank line to end]:" | |||||
result = [] | |||||
while True: | |||||
line = raw_input(self.PROMPT) | |||||
if line: | |||||
result.append(line) | |||||
else: | |||||
return result | |||||
def _set_metadata(self): | |||||
self.data["metadata"] = OrderedDict([("version", 1)]) | |||||
self._print("""I can encrypt passwords stored in your config file in | |||||
addition to preventing other users on your system from | |||||
reading the file. Encryption is recommended if the bot | |||||
is to run on a public computer like the Toolserver, but | |||||
otherwise the need to enter a key everytime you start | |||||
the bot may be annoying.""") | |||||
if self._ask_bool("Encrypt stored passwords?"): | |||||
self.data["metadata"]["encryptPasswords"] = True | |||||
key = getpass(self.PROMPT + "Enter an encryption key: ") | |||||
msg = "Running {0} rounds of bcrypt...".format(self.BCRYPT_ROUNDS) | |||||
self._print_no_nl(msg) | |||||
signature = bcrypt.hashpw(key, bcrypt.gensalt(self.BCRYPT_ROUNDS)) | |||||
self.data["metadata"]["signature"] = signature | |||||
self._cipher = Blowfish.new(sha256(key).digest()) | |||||
print " done." | |||||
else: | |||||
self.data["metadata"]["encryptPasswords"] = False | |||||
self._print("""The bot can temporarily store its logs in the logs/ | |||||
subdirectory. Error logs are kept for a month whereas | |||||
normal logs are kept for a week. If you disable this, | |||||
the bot will still print logs to stdout.""") | |||||
logging = self._ask_bool("Enable logging?") | |||||
self.data["metadata"]["enableLogging"] = logging | |||||
def _set_components(self): | |||||
self._print("""The bot contains three separate components that can run | |||||
independently of each other.""") | |||||
self._print("""- The IRC front-end runs on a normal IRC server, like | |||||
freenode, and expects users to interact with it through | |||||
commands.""") | |||||
self._print("""- The IRC watcher runs on a wiki recent-changes server, | |||||
like irc.wikimedia.org, and listens for edits. Users | |||||
cannot interact with this component. It can detect | |||||
specific events and report them to "feed" channels on | |||||
the front-end or start bot tasks.""") | |||||
self._print("""- The wiki task scheduler runs wiki-editing bot tasks in | |||||
separate threads at user-defined times through a | |||||
cron-like interface. Tasks which are not scheduled can | |||||
be started by the IRC watcher manually through the IRC | |||||
front-end.""") | |||||
frontend = self._ask_bool("Enable the IRC front-end?") | |||||
watcher = self._ask_bool("Enable the IRC watcher?") | |||||
scheduler = self._ask_bool("Enable the wiki task scheduler?") | |||||
self.data["components"]["irc_frontend"] = frontend | |||||
self.data["components"]["irc_watcher"] = watcher | |||||
self.data["components"]["wiki_scheduler"] = scheduler | |||||
def _login(self, kwargs): | |||||
self.config.wiki._load(self.data["wiki"]) | |||||
self._print_no_nl("Trying to connect to the site...") | |||||
try: | |||||
site = self.config.bot.wiki.add_site(**kwargs) | |||||
except exceptions.APIError as exc: | |||||
print " API error!" | |||||
print "\x1b[31m" + exc.message + "\x1b[0m" | |||||
question = "Would you like to re-enter the site information?" | |||||
if self._ask_bool(question): | |||||
return self._set_wiki() | |||||
question = "This will cancel the setup process. Are you sure?" | |||||
if self._ask_bool(question, default=False): | |||||
raise exceptions.NoConfigError() | |||||
return self._set_wiki() | |||||
except exceptions.LoginError as exc: | |||||
print " login error!" | |||||
print "\x1b[31m" + exc.message + "\x1b[0m" | |||||
question = "Would you like to re-enter your login information?" | |||||
if self._ask_bool(question): | |||||
self.data["wiki"]["username"] = self._ask("Bot username:") | |||||
password = self._ask_pass("Bot password:", encrypt=False) | |||||
self.data["wiki"]["password"] = password | |||||
return self._login(kwargs) | |||||
else: | |||||
password = self.data["wiki"]["password"] | |||||
question = "Would you like to re-enter the site information?" | |||||
if self._ask_bool(question): | |||||
return self._set_wiki() | |||||
self._print("""Moving on. You can modify the login information | |||||
stored in the bot's config in the future.""") | |||||
self.data["wiki"]["password"] = None # Clear so we don't login | |||||
self.config.wiki._load(self.data["wiki"]) | |||||
self._print_no_nl("Trying to connect to the site...") | |||||
site = self.config.bot.wiki.add_site(**kwargs) | |||||
print " success." | |||||
self.data["wiki"]["password"] = password # Reset original value | |||||
else: | |||||
print " success." | |||||
# Remember to store the encrypted password: | |||||
password = self._encrypt(self.data["wiki"]["password"]) | |||||
self.data["wiki"]["password"] = password | |||||
return site | |||||
def _set_wiki(self): | |||||
self._wmf = self._ask_bool("""Will this bot run on Wikimedia Foundation | |||||
wikis, like Wikipedia?""") | |||||
if self._wmf: | |||||
msg = "Site project (e.g. 'wikipedia', 'wiktionary', 'wikimedia'):" | |||||
self._proj = project = self._ask(msg, "wikipedia").lower() | |||||
msg = "Site language code (e.g. 'en', 'fr', 'commons'):" | |||||
self._lang = lang = self._ask(msg, "en").lower() | |||||
kwargs = {"project": project, "lang": lang} | |||||
else: | |||||
msg = "Site base URL, without the script path and trailing slash;" | |||||
msg += " can be protocol-insensitive (e.g. '//en.wikipedia.org'):" | |||||
url = self._ask(msg) | |||||
script = self._ask("Site script path:", "/w") | |||||
kwargs = {"base_url": url, "script_path": script} | |||||
self.data["wiki"]["username"] = self._ask("Bot username:") | |||||
password = self._ask_pass("Bot password:", encrypt=False) | |||||
self.data["wiki"]["password"] = password | |||||
self.data["wiki"]["userAgent"] = "EarwigBot/$1 (Python/$2; https://github.com/earwig/earwigbot)" | |||||
self.data["wiki"]["summary"] = "([[WP:BOT|Bot]]): $2" | |||||
self.data["wiki"]["useHTTPS"] = True | |||||
self.data["wiki"]["assert"] = "user" | |||||
self.data["wiki"]["maxlag"] = 10 | |||||
self.data["wiki"]["waitTime"] = 2 | |||||
self.data["wiki"]["defaultSite"] = self._login(kwargs).name | |||||
self.data["wiki"]["sql"] = {} | |||||
if self._wmf: | |||||
msg = "Will this bot run from the Wikimedia Toolserver?" | |||||
toolserver = self._ask_bool(msg, default=False) | |||||
if toolserver: | |||||
args = [("host", "$1-p.rrdb.toolserver.org"), ("db", "$1_p")] | |||||
self.data["wiki"]["sql"] = OrderedDict(args) | |||||
self.data["wiki"]["shutoff"] = {} | |||||
msg = "Would you like to enable an automatic shutoff page for the bot?" | |||||
if self._ask_bool(msg): | |||||
self._print("""The page title can contain two wildcards: $1 will be | |||||
substituted with the bot's username, and $2 with the | |||||
current task number. This can be used to implement a | |||||
separate shutoff page for each task.""") | |||||
page = self._ask("Page title:", "User:$1/Shutoff") | |||||
msg = "Page content to indicate the bot is *not* shut off:" | |||||
disabled = self._ask(msg, "run") | |||||
args = [("page", page), ("disabled", disabled)] | |||||
self.data["wiki"]["shutoff"] = OrderedDict(args) | |||||
self.data["wiki"]["search"] = {} | |||||
def _set_irc(self): | |||||
if self.data["components"]["irc_frontend"]: | |||||
frontend = self.data["irc"]["frontend"] = OrderedDict() | |||||
msg = "Hostname of the frontend's IRC server, without 'irc://':" | |||||
frontend["host"] = self._ask(msg, "irc.freenode.net") | |||||
frontend["port"] = self._ask("Frontend port:", 6667) | |||||
frontend["nick"] = self._ask("Frontend bot's nickname:") | |||||
frontend["ident"] = self._ask("Frontend bot's ident:", | |||||
frontend["nick"].lower()) | |||||
question = "Frontend bot's real name (gecos):" | |||||
frontend["realname"] = self._ask(question, "EarwigBot") | |||||
if self._ask_bool("Should the bot identify to NickServ?"): | |||||
ns_user = self._ask("NickServ username:", frontend["nick"]) | |||||
ns_pass = self._ask_pass("Nickserv password:") | |||||
frontend["nickservUsername"] = ns_user | |||||
frontend["nickservPassword"] = ns_pass | |||||
chan_question = "Frontend channels to join by default:" | |||||
frontend["channels"] = self._ask_list(chan_question) | |||||
self._print("""The bot keeps a database of its admins (users who | |||||
can use certain sensitive commands) and owners | |||||
(users who can quit the bot and modify its access | |||||
list), identified by nick, ident, and/or hostname. | |||||
Hostname is the most secure option since it cannot | |||||
be easily spoofed. If you have a cloak, this will | |||||
probably look like 'wikipedia/Username' or | |||||
'unaffiliated/nickname'.""") | |||||
host = self._ask("Your hostname on the frontend:", require=False) | |||||
if host: | |||||
permdb = self.config._permissions | |||||
permdb.load() | |||||
permdb.add_owner(host=host) | |||||
permdb.add_admin(host=host) | |||||
else: | |||||
frontend = {} | |||||
if self.data["components"]["irc_watcher"]: | |||||
watcher = self.data["irc"]["watcher"] = OrderedDict() | |||||
if self._wmf: | |||||
watcher["host"] = "irc.wikimedia.org" | |||||
watcher["port"] = 6667 | |||||
else: | |||||
msg = "Hostname of the watcher's IRC server, without 'irc://':" | |||||
watcher["host"] = self._ask(msg) | |||||
watcher["port"] = self._ask("Watcher port:", 6667) | |||||
nick = self._ask("Watcher bot's nickname:", frontend.get("nick")) | |||||
ident = self._ask("Watcher bot's ident:", nick.lower()) | |||||
watcher["nick"] = nick | |||||
watcher["ident"] = ident | |||||
question = "Watcher bot's real name (gecos):" | |||||
default = frontend.get("realname", "EarwigBot") | |||||
watcher["realname"] = self._ask(question, default) | |||||
watcher_ns = "Should the bot identify to NickServ?" | |||||
if not self._wmf and self._ask_bool(watcher_ns): | |||||
ns_user = self._ask("NickServ username:", watcher["nick"]) | |||||
ns_pass = self._ask_pass("Nickserv password:") | |||||
watcher["nickservUsername"] = ns_user | |||||
watcher["nickservPassword"] = ns_pass | |||||
if self._wmf: | |||||
chan = "#{0}.{1}".format(self._lang, self._proj) | |||||
watcher["channels"] = [chan] | |||||
else: | |||||
chan_question = "Watcher channels to join by default:" | |||||
watcher["channels"] = self._ask_list(chan_question) | |||||
self._print("""I am now creating a blank 'rules.py' file, which | |||||
will determine how the bot handles messages received | |||||
from the IRC watcher. It contains a process() | |||||
function that takes a Bot object (allowing you to | |||||
start tasks) and an RC object (storing the message | |||||
from the watcher). See the documentation for | |||||
details.""") | |||||
with open(path.join(self.config.root_dir, "rules.py"), "w") as fp: | |||||
fp.write(RULES_TEMPLATE) | |||||
self._pause() | |||||
self.data["irc"]["version"] = "EarwigBot - $1 - Python/$2 https://github.com/earwig/earwigbot" | |||||
def _set_commands(self): | |||||
msg = """Would you like to disable the default IRC commands? You can | |||||
fine-tune which commands are disabled later on.""" | |||||
if (not self.data["components"]["irc_frontend"] or | |||||
self._ask_bool(msg, default=False)): | |||||
self.data["commands"]["disable"] = True | |||||
self._print("""I am now creating the 'commands/' directory, where you | |||||
can place custom IRC commands and plugins. Creating your | |||||
own commands is described in the documentation.""") | |||||
mkdir(path.join(self.config.root_dir, "commands")) | |||||
self._pause() | |||||
def _set_tasks(self): | |||||
self._print("""I am now creating the 'tasks/' directory, where you can | |||||
place custom bot tasks and plugins. Creating your own | |||||
tasks is described in the documentation.""") | |||||
mkdir(path.join(self.config.root_dir, "tasks")) | |||||
self._pause() | |||||
def _set_schedule(self): | |||||
self._print("""The final section of your config file, 'schedule', is a | |||||
list of bot tasks to be started by the wiki scheduler. | |||||
Each entry contains cron-like time quantifiers and a | |||||
list of tasks. For example, the following starts the | |||||
'foobot' task every hour on the half-hour:""") | |||||
print "\x1b[33mschedule:" | |||||
print " - minute: 30" | |||||
print " tasks:" | |||||
print " - foobot\x1b[0m" | |||||
self._print("""The following starts the 'barbot' task with the keyword | |||||
arguments 'action="baz"' every Monday at 05:00 UTC:""") | |||||
print "\x1b[33m - week_day: 1" | |||||
print " hour: 5" | |||||
print " tasks:" | |||||
print ' - ["barbot", {"action": "baz"}]\x1b[0m' | |||||
self._print("""The full list of quantifiers is minute, hour, month_day, | |||||
month, and week_day. See the documentation for more | |||||
information.""") | |||||
self._pause() | |||||
def _save(self): | |||||
with open(self.config.path, "w") as stream: | |||||
yaml.dump(self.data, stream, OrderedDumper, indent=4, | |||||
allow_unicode=True, default_flow_style=False) | |||||
def make_new(self): | |||||
"""Make a new config file based on the user's input.""" | |||||
try: | |||||
open(self.config.path, "w").close() | |||||
chmod(self.config.path, stat.S_IRUSR|stat.S_IWUSR) | |||||
except IOError: | |||||
print "I can't seem to write to the config file:" | |||||
raise | |||||
self._set_metadata() | |||||
self._set_components() | |||||
self._set_wiki() | |||||
components = self.data["components"] | |||||
if components["irc_frontend"] or components["irc_watcher"]: | |||||
self._set_irc() | |||||
self._set_commands() | |||||
self._set_tasks() | |||||
if components["wiki_scheduler"]: | |||||
self._set_schedule() | |||||
self._print("""I am now saving config.yml with your settings. YAML is a | |||||
relatively straightforward format and you should be able | |||||
to update these settings in the future when necessary. | |||||
I will start the bot at your signal. Feel free to | |||||
contact me at wikipedia.earwig@gmail.com if you have any | |||||
questions.""") | |||||
self._save() | |||||
if not self._ask_bool("Start the bot now?"): | |||||
exit() |
@@ -0,0 +1,256 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
""" | |||||
**EarwigBot: Exceptions** | |||||
This module contains all exceptions used by EarwigBot:: | |||||
EarwigBotError | |||||
+-- NoConfigError | |||||
+-- IRCError | |||||
| +-- BrokenSocketError | |||||
+-- WikiToolsetError | |||||
+-- SiteNotFoundError | |||||
+-- ServiceError | |||||
| +-- APIError | |||||
| +-- SQLError | |||||
+-- NoServiceError | |||||
+-- LoginError | |||||
+-- NamespaceNotFoundError | |||||
+-- PageNotFoundError | |||||
+-- InvalidPageError | |||||
+-- RedirectError | |||||
+-- UserNotFoundError | |||||
+-- EditError | |||||
| +-- PermissionsError | |||||
| +-- EditConflictError | |||||
| +-- NoContentError | |||||
| +-- ContentTooBigError | |||||
| +-- SpamDetectedError | |||||
| +-- FilteredError | |||||
+-- CopyvioCheckError | |||||
+-- UnknownSearchEngineError | |||||
+-- UnsupportedSearchEngineError | |||||
+-- SearchQueryError | |||||
""" | |||||
class EarwigBotError(Exception): | |||||
"""Base exception class for errors in EarwigBot.""" | |||||
class NoConfigError(EarwigBotError): | |||||
"""The bot cannot be run without a config file. | |||||
This occurs if no config file exists, and the user said they did not want | |||||
one to be created. | |||||
""" | |||||
class IRCError(EarwigBotError): | |||||
"""Base exception class for errors in IRC-relation sections of the bot.""" | |||||
class BrokenSocketError(IRCError): | |||||
"""A socket has broken, because it is not sending data. | |||||
Raised by :py:meth:`IRCConnection._get | |||||
<earwigbot.irc.connection.IRCConnection._get>`. | |||||
""" | |||||
class WikiToolsetError(EarwigBotError): | |||||
"""Base exception class for errors in the Wiki Toolset.""" | |||||
class SiteNotFoundError(WikiToolsetError): | |||||
"""A particular site could not be found in the sites database. | |||||
Raised by :py:class:`~earwigbot.wiki.sitesdb.SitesDB`. | |||||
""" | |||||
class ServiceError(WikiToolsetError): | |||||
"""Base exception class for an error within a service (the API or SQL). | |||||
This is caught by :py:meth:`Site.delegate | |||||
<earwigbot.wiki.site.Site.delegate>` to indicate a service is | |||||
non-functional so another, less-preferred one can be tried. | |||||
""" | |||||
class APIError(ServiceError): | |||||
"""Couldn't connect to a site's API. | |||||
Perhaps the server doesn't exist, our URL is wrong or incomplete, or | |||||
there are temporary problems on their end. | |||||
Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`. | |||||
""" | |||||
class SQLError(ServiceError): | |||||
"""Some error involving SQL querying occurred. | |||||
Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`. | |||||
""" | |||||
class NoServiceError(WikiToolsetError): | |||||
"""No service is functioning to handle a specific task. | |||||
Raised by :py:meth:`Site.delegate <earwigbot.wiki.site.Site.delegate>`. | |||||
""" | |||||
class LoginError(WikiToolsetError): | |||||
"""An error occured while trying to login. | |||||
Perhaps the username/password is incorrect. | |||||
Raised by :py:meth:`Site._login <earwigbot.wiki.site.Site._login>`. | |||||
""" | |||||
class NamespaceNotFoundError(WikiToolsetError): | |||||
"""A requested namespace name or namespace ID does not exist. | |||||
Raised by :py:meth:`Site.namespace_id_to_name | |||||
<earwigbot.wiki.site.Site.namespace_id_to_name>` and | |||||
:py:meth:`Site.namespace_name_to_id | |||||
<earwigbot.wiki.site.Site.namespace_name_to_id>`. | |||||
""" | |||||
class PageNotFoundError(WikiToolsetError): | |||||
"""Attempted to get information about a page that does not exist. | |||||
Raised by :py:class:`~earwigbot.wiki.page.Page`. | |||||
""" | |||||
class InvalidPageError(WikiToolsetError): | |||||
"""Attempted to get information about a page whose title is invalid. | |||||
Raised by :py:class:`~earwigbot.wiki.page.Page`. | |||||
""" | |||||
class RedirectError(WikiToolsetError): | |||||
"""A redirect-only method was called on a malformed or non-redirect page. | |||||
Raised by :py:meth:`Page.get_redirect_target | |||||
<earwigbot.wiki.page.Page.get_redirect_target>`. | |||||
""" | |||||
class UserNotFoundError(WikiToolsetError): | |||||
"""Attempted to get certain information about a user that does not exist. | |||||
Raised by :py:class:`~earwigbot.wiki.user.User`. | |||||
""" | |||||
class EditError(WikiToolsetError): | |||||
"""An error occured while editing. | |||||
This is used as a base class for all editing errors; this one specifically | |||||
is used only when a generic error occurs that we don't know about. | |||||
Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and | |||||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||||
""" | |||||
class PermissionsError(EditError): | |||||
"""A permissions error ocurred while editing. | |||||
We tried to do something we don't have permission to, like trying to delete | |||||
a page as a non-admin, or trying to edit a page without login information | |||||
and AssertEdit enabled. | |||||
Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and | |||||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||||
""" | |||||
class EditConflictError(EditError): | |||||
"""We gotten an edit conflict or a (rarer) delete/recreate conflict. | |||||
Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and | |||||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||||
""" | |||||
class NoContentError(EditError): | |||||
"""We tried to create a page or new section with no content. | |||||
Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and | |||||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||||
""" | |||||
class ContentTooBigError(EditError): | |||||
"""The edit we tried to push exceeded the article size limit. | |||||
Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and | |||||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||||
""" | |||||
class SpamDetectedError(EditError): | |||||
"""The spam filter refused our edit. | |||||
Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and | |||||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||||
""" | |||||
class FilteredError(EditError): | |||||
"""The edit filter refused our edit. | |||||
Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and | |||||
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`. | |||||
""" | |||||
class CopyvioCheckError(WikiToolsetError): | |||||
"""An error occured when checking a page for copyright violations. | |||||
This is a base class for multiple exceptions; usually one of those will be | |||||
raised instead of this. | |||||
Raised by :py:meth:`Page.copyvio_check | |||||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>` and | |||||
:py:meth:`Page.copyvio_compare | |||||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`. | |||||
""" | |||||
class UnknownSearchEngineError(CopyvioCheckError): | |||||
"""Attempted to do a copyvio check with an unknown search engine. | |||||
Search engines are specified in :file:`config.yml` as | |||||
:py:attr:`config.wiki["search"]["engine"]`. | |||||
Raised by :py:meth:`Page.copyvio_check | |||||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>` and | |||||
:py:meth:`Page.copyvio_compare | |||||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`. | |||||
""" | |||||
class UnsupportedSearchEngineError(CopyvioCheckError): | |||||
"""Attmpted to do a copyvio check using an unavailable engine. | |||||
This might occur if, for example, an engine requires oauth2 but the package | |||||
couldn't be imported. | |||||
Raised by :py:meth:`Page.copyvio_check | |||||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>` and | |||||
:py:meth:`Page.copyvio_compare | |||||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`. | |||||
""" | |||||
class SearchQueryError(CopyvioCheckError): | |||||
"""Some error ocurred while doing a search query. | |||||
Raised by :py:meth:`Page.copyvio_check | |||||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>` and | |||||
:py:meth:`Page.copyvio_compare | |||||
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`. | |||||
""" |
@@ -0,0 +1,27 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from earwigbot.irc.connection import * | |||||
from earwigbot.irc.data import * | |||||
from earwigbot.irc.frontend import * | |||||
from earwigbot.irc.rc import * | |||||
from earwigbot.irc.watcher import * |
@@ -0,0 +1,259 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import socket | |||||
from threading import Lock | |||||
from time import sleep, time | |||||
from earwigbot.exceptions import BrokenSocketError | |||||
__all__ = ["IRCConnection"] | |||||
class IRCConnection(object): | |||||
"""Interface with an IRC server.""" | |||||
def __init__(self, host, port, nick, ident, realname, logger): | |||||
self._host = host | |||||
self._port = port | |||||
self._nick = nick | |||||
self._ident = ident | |||||
self._realname = realname | |||||
self.logger = logger | |||||
self._is_running = False | |||||
self._send_lock = Lock() | |||||
self._last_recv = time() | |||||
self._last_send = 0 | |||||
self._last_ping = 0 | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the IRCConnection.""" | |||||
res = "IRCConnection(host={0!r}, port={1!r}, nick={2!r}, ident={3!r}, realname={4!r})" | |||||
return res.format(self.host, self.port, self.nick, self.ident, | |||||
self.realname) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the IRCConnection.""" | |||||
res = "<IRCConnection {0}!{1} at {2}:{3}>" | |||||
return res.format(self.nick, self.ident, self.host, self.port) | |||||
def _connect(self): | |||||
"""Connect to our IRC server.""" | |||||
self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |||||
try: | |||||
self._sock.connect((self.host, self.port)) | |||||
except socket.error: | |||||
self.logger.exception("Couldn't connect to IRC server; retrying") | |||||
sleep(8) | |||||
self._connect() | |||||
self._send("NICK {0}".format(self.nick)) | |||||
self._send("USER {0} {1} * :{2}".format(self.ident, self.host, self.realname)) | |||||
def _close(self): | |||||
"""Completely close our connection with the IRC server.""" | |||||
try: | |||||
self._sock.shutdown(socket.SHUT_RDWR) # Shut down connection first | |||||
except socket.error: | |||||
pass # Ignore if the socket is already down | |||||
self._sock.close() | |||||
def _get(self, size=4096): | |||||
"""Receive (i.e. get) data from the server.""" | |||||
data = self._sock.recv(size) | |||||
if not data: | |||||
# Socket isn't giving us any data, so it is dead or broken: | |||||
raise BrokenSocketError() | |||||
return data | |||||
def _send(self, msg, hidelog=False): | |||||
"""Send data to the server.""" | |||||
with self._send_lock: | |||||
time_since_last = time() - self._last_send | |||||
if time_since_last < 0.75: | |||||
sleep(0.75 - time_since_last) | |||||
try: | |||||
self._sock.sendall(msg + "\r\n") | |||||
except socket.error: | |||||
self._is_running = False | |||||
else: | |||||
if not hidelog: | |||||
self.logger.debug(msg) | |||||
self._last_send = time() | |||||
def _split(self, msgs, maxlen, maxsplits=3): | |||||
"""Split a large message into multiple messages smaller than maxlen.""" | |||||
words = msgs.split(" ") | |||||
splits = 0 | |||||
while words and splits < maxsplits: | |||||
splits += 1 | |||||
if len(words[0]) > maxlen: | |||||
word = words.pop(0) | |||||
yield word[:maxlen] | |||||
words.insert(0, word[maxlen:]) | |||||
else: | |||||
msg = [] | |||||
while words and len(" ".join(msg + [words[0]])) <= maxlen: | |||||
msg.append(words.pop(0)) | |||||
yield " ".join(msg) | |||||
def _quit(self, msg=None): | |||||
"""Issue a quit message to the server. Doesn't close the connection.""" | |||||
if msg: | |||||
self._send("QUIT :{0}".format(msg)) | |||||
else: | |||||
self._send("QUIT") | |||||
def _process_defaults(self, line): | |||||
"""Default process hooks for lines received on IRC.""" | |||||
self._last_recv = time() | |||||
if line[0] == "PING": # If we are pinged, pong back | |||||
self.pong(line[1][1:]) | |||||
def _process_message(self, line): | |||||
"""To be overridden in subclasses.""" | |||||
raise NotImplementedError() | |||||
@property | |||||
def host(self): | |||||
"""The hostname of the IRC server, like ``"irc.freenode.net"``.""" | |||||
return self._host | |||||
@property | |||||
def port(self): | |||||
"""The port of the IRC server, like ``6667``.""" | |||||
return self._port | |||||
@property | |||||
def nick(self): | |||||
"""Our nickname on the server, like ``"EarwigBot"``.""" | |||||
return self._nick | |||||
@property | |||||
def ident(self): | |||||
"""Our ident on the server, like ``"earwig"``. | |||||
See http://en.wikipedia.org/wiki/Ident. | |||||
""" | |||||
return self._ident | |||||
@property | |||||
def realname(self): | |||||
"""Our realname (gecos field) on the server.""" | |||||
return self._realname | |||||
def say(self, target, msg, hidelog=False): | |||||
"""Send a private message to a target on the server.""" | |||||
for msg in self._split(msg, 400): | |||||
msg = "PRIVMSG {0} :{1}".format(target, msg) | |||||
self._send(msg, hidelog) | |||||
def reply(self, data, msg, hidelog=False): | |||||
"""Send a private message as a reply to a user on the server.""" | |||||
if data.is_private: | |||||
self.say(data.chan, msg, hidelog) | |||||
else: | |||||
msg = "\x02{0}\x0F: {1}".format(data.nick, msg) | |||||
self.say(data.chan, msg, hidelog) | |||||
def action(self, target, msg, hidelog=False): | |||||
"""Send a private message to a target on the server as an action.""" | |||||
msg = "\x01ACTION {0}\x01".format(msg) | |||||
self.say(target, msg, hidelog) | |||||
def notice(self, target, msg, hidelog=False): | |||||
"""Send a notice to a target on the server.""" | |||||
for msg in self._split(msg, 400): | |||||
msg = "NOTICE {0} :{1}".format(target, msg) | |||||
self._send(msg, hidelog) | |||||
def join(self, chan, hidelog=False): | |||||
"""Join a channel on the server.""" | |||||
msg = "JOIN {0}".format(chan) | |||||
self._send(msg, hidelog) | |||||
def part(self, chan, msg=None, hidelog=False): | |||||
"""Part from a channel on the server, optionally using an message.""" | |||||
if msg: | |||||
self._send("PART {0} :{1}".format(chan, msg), hidelog) | |||||
else: | |||||
self._send("PART {0}".format(chan), hidelog) | |||||
def mode(self, target, level, msg, hidelog=False): | |||||
"""Send a mode message to the server.""" | |||||
msg = "MODE {0} {1} {2}".format(target, level, msg) | |||||
self._send(msg, hidelog) | |||||
def ping(self, target, hidelog=False): | |||||
"""Ping another entity on the server.""" | |||||
msg = "PING {0}".format(target) | |||||
self._send(msg, hidelog) | |||||
def pong(self, target, hidelog=False): | |||||
"""Pong another entity on the server.""" | |||||
msg = "PONG {0}".format(target) | |||||
self._send(msg, hidelog) | |||||
def loop(self): | |||||
"""Main loop for the IRC connection.""" | |||||
self._is_running = True | |||||
read_buffer = "" | |||||
while 1: | |||||
try: | |||||
read_buffer += self._get() | |||||
except BrokenSocketError: | |||||
self._is_running = False | |||||
break | |||||
lines = read_buffer.split("\n") | |||||
read_buffer = lines.pop() | |||||
for line in lines: | |||||
line = line.strip().split() | |||||
self._process_defaults(line) | |||||
self._process_message(line) | |||||
if self.is_stopped(): | |||||
break | |||||
self._close() | |||||
def keep_alive(self): | |||||
"""Ensure that we stay connected, stopping if the connection breaks.""" | |||||
now = time() | |||||
if now - self._last_recv > 120: | |||||
if self._last_ping < self._last_recv: | |||||
log = "Last message was received over 120 seconds ago. Pinging." | |||||
self.logger.debug(log) | |||||
self.ping(self.host) | |||||
self._last_ping = now | |||||
elif now - self._last_ping > 60: | |||||
self.logger.debug("No ping response in 60 seconds. Stopping.") | |||||
self.stop() | |||||
def stop(self, msg=None): | |||||
"""Request the IRC connection to close at earliest convenience.""" | |||||
if self._is_running: | |||||
self._quit(msg) | |||||
self._is_running = False | |||||
def is_stopped(self): | |||||
"""Return whether the IRC connection has been (or is to be) closed.""" | |||||
return not self._is_running |
@@ -0,0 +1,212 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import re | |||||
__all__ = ["Data"] | |||||
class Data(object): | |||||
"""Store data from an individual line received on IRC.""" | |||||
def __init__(self, bot, my_nick, line, msgtype): | |||||
self._bot = bot | |||||
self._my_nick = my_nick.lower() | |||||
self._line = line | |||||
self._is_private = self._is_command = False | |||||
self._msg = self._command = self._trigger = None | |||||
self._args = [] | |||||
self._kwargs = {} | |||||
self._parse(msgtype) | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the Data.""" | |||||
res = "Data(bot={0!r}, my_nick={1!r}, line={2!r})" | |||||
return res.format(self._bot, self.my_nick, self.line) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the Data.""" | |||||
return "<Data of {0!r}>".format(" ".join(self.line)) | |||||
def _parse(self, msgtype): | |||||
"""Parse a line from IRC into its components as instance attributes.""" | |||||
sender = re.findall(r":(.*?)!(.*?)@(.*?)\Z", self.line[0])[0] | |||||
self._nick, self._ident, self._host = sender | |||||
self._chan = self.line[2] | |||||
if msgtype == "PRIVMSG": | |||||
if self.chan.lower() == self.my_nick: | |||||
# This is a privmsg to us, so set 'chan' as the nick of the | |||||
# sender instead of the 'channel', which is ourselves: | |||||
self._chan = self._nick | |||||
self._is_private = True | |||||
self._msg = " ".join(self.line[3:])[1:] | |||||
self._parse_args() | |||||
self._parse_kwargs() | |||||
def _parse_args(self): | |||||
"""Parse command arguments from the message. | |||||
self.msg is converted into the string self.command and the argument | |||||
list self.args if the message starts with a "trigger" ("!", ".", or the | |||||
bot's name); self.is_command will be set to True, and self.trigger will | |||||
store the trigger string. Otherwise, is_command will be set to False. | |||||
""" | |||||
self._args = self.msg.strip().split() | |||||
try: | |||||
self._command = self.args.pop(0).lower() | |||||
except IndexError: | |||||
return | |||||
if self.command.startswith("!") or self.command.startswith("."): | |||||
# e.g. "!command arg1 arg2" | |||||
self._is_command = True | |||||
self._trigger = self.command[0] | |||||
self._command = self.command[1:] # Strip the "!" or "." | |||||
elif re.match(r"{0}\W*?$".format(re.escape(self.my_nick)), | |||||
self.command, re.U): | |||||
# e.g. "EarwigBot, command arg1 arg2" | |||||
self._is_command = True | |||||
self._trigger = self.my_nick | |||||
try: | |||||
self._command = self.args.pop(0).lower() | |||||
except IndexError: | |||||
self._command = "" | |||||
else: | |||||
try: | |||||
if self.msg[-1] == "." and self.msg[-2] != ".": | |||||
if self.args: | |||||
self.args[-1] = self.args[-1][:-1] | |||||
else: | |||||
self._command = self.command[:-1] | |||||
except IndexError: | |||||
pass | |||||
def _parse_kwargs(self): | |||||
"""Parse keyword arguments embedded in self.args. | |||||
Parse a command given as "!command key1=value1 key2=value2..." into a | |||||
dict, self.kwargs, like {'key1': 'value2', 'key2': 'value2'...}. | |||||
""" | |||||
for arg in self.args: | |||||
try: | |||||
key, value = re.findall(r"^(.*?)\=(.*?)$", arg)[0] | |||||
except IndexError: | |||||
continue | |||||
if key and value: | |||||
self.kwargs[key] = value | |||||
@property | |||||
def my_nick(self): | |||||
"""Our nickname, *not* the nickname of the sender.""" | |||||
return self._my_nick | |||||
@property | |||||
def line(self): | |||||
"""The full message received on IRC, including escape characters.""" | |||||
return self._line | |||||
@property | |||||
def chan(self): | |||||
"""Channel the message was sent from. | |||||
This will be equal to :py:attr:`nick` if the message is a private | |||||
message. | |||||
""" | |||||
return self._chan | |||||
@property | |||||
def nick(self): | |||||
"""Nickname of the sender.""" | |||||
return self._nick | |||||
@property | |||||
def ident(self): | |||||
"""`Ident <http://en.wikipedia.org/wiki/Ident>`_ of the sender.""" | |||||
return self._ident | |||||
@property | |||||
def host(self): | |||||
"""Hostname of the sender.""" | |||||
return self._host | |||||
@property | |||||
def msg(self): | |||||
"""Text of the sent message, if it is a message, else ``None``.""" | |||||
return self._msg | |||||
@property | |||||
def is_private(self): | |||||
"""``True`` if this message was sent to us *only*, else ``False``.""" | |||||
return self._is_private | |||||
@property | |||||
def is_command(self): | |||||
"""Boolean telling whether or not this message is a bot command. | |||||
A message is considered a command if and only if it begins with the | |||||
character ``"!"``, ``"."``, or the bot's name followed by optional | |||||
punctuation and a space (so ``EarwigBot: do something``, ``EarwigBot, | |||||
do something``, and ``EarwigBot do something`` are all valid). | |||||
""" | |||||
return self._is_command | |||||
@property | |||||
def command(self): | |||||
"""If the message is a command, this is the name of the command used. | |||||
See :py:attr:`is_command <self.is_command>` for when a message is | |||||
considered a command. If it's not a command, this will be set to | |||||
``None``. | |||||
""" | |||||
return self._command | |||||
@property | |||||
def trigger(self): | |||||
"""If this message is a command, this is what triggered it. | |||||
It can be either "!" (``"!help"``), "." (``".help"``), or the bot's | |||||
name (``"EarwigBot: help"``). Otherwise, it will be ``None``.""" | |||||
return self._trigger | |||||
@property | |||||
def args(self): | |||||
"""List of all arguments given to this command. | |||||
For example, the message ``"!command arg1 arg2 arg3=val3"`` will | |||||
produce the args ``["arg1", "arg2", "arg3=val3"]``. This is empty if | |||||
the message was not a command or if it doesn't have arguments. | |||||
""" | |||||
return self._args | |||||
@property | |||||
def kwargs(self): | |||||
"""Dictionary of keyword arguments given to this command. | |||||
For example, the message ``"!command arg1=val1 arg2=val2"`` will | |||||
produce the kwargs ``{"arg1": "val1", "arg2": "val2"}``. This is empty | |||||
if the message was not a command or if it doesn't have keyword | |||||
arguments. | |||||
""" | |||||
return self._kwargs |
@@ -0,0 +1,86 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from earwigbot.irc import IRCConnection, Data | |||||
__all__ = ["Frontend"] | |||||
class Frontend(IRCConnection): | |||||
""" | |||||
**EarwigBot: IRC Frontend Component** | |||||
The IRC frontend runs on a normal IRC server and expects users to interact | |||||
with it and give it commands. Commands are stored as "command classes", | |||||
subclasses of :py:class:`~earwigbot.commands.Command`. All command classes | |||||
are automatically imported by :py:meth:`commands.load() | |||||
<earwigbot.managers._ResourceManager.load>` if they are in | |||||
:py:mod:`earwigbot.commands` or the bot's custom command directory | |||||
(explained in the :doc:`documentation </customizing>`). | |||||
""" | |||||
def __init__(self, bot): | |||||
self.bot = bot | |||||
cf = bot.config.irc["frontend"] | |||||
base = super(Frontend, self) | |||||
base.__init__(cf["host"], cf["port"], cf["nick"], cf["ident"], | |||||
cf["realname"], bot.logger.getChild("frontend")) | |||||
self._connect() | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the Frontend.""" | |||||
res = "Frontend(host={0!r}, port={1!r}, nick={2!r}, ident={3!r}, realname={4!r}, bot={5!r})" | |||||
return res.format(self.host, self.port, self.nick, self.ident, | |||||
self.realname, self.bot) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the Frontend.""" | |||||
res = "<Frontend {0}!{1} at {2}:{3}>" | |||||
return res.format(self.nick, self.ident, self.host, self.port) | |||||
def _process_message(self, line): | |||||
"""Process a single message from IRC.""" | |||||
if line[1] == "JOIN": | |||||
data = Data(self.bot, self.nick, line, msgtype="JOIN") | |||||
self.bot.commands.call("join", data) | |||||
elif line[1] == "PRIVMSG": | |||||
data = Data(self.bot, self.nick, line, msgtype="PRIVMSG") | |||||
if data.is_private: | |||||
self.bot.commands.call("msg_private", data) | |||||
else: | |||||
self.bot.commands.call("msg_public", data) | |||||
self.bot.commands.call("msg", data) | |||||
elif line[1] == "376": # On successful connection to the server | |||||
# If we're supposed to auth to NickServ, do that: | |||||
try: | |||||
username = self.bot.config.irc["frontend"]["nickservUsername"] | |||||
password = self.bot.config.irc["frontend"]["nickservPassword"] | |||||
except KeyError: | |||||
pass | |||||
else: | |||||
msg = "IDENTIFY {0} {1}".format(username, password) | |||||
self.say("NickServ", msg, hidelog=True) | |||||
# Join all of our startup channels: | |||||
for chan in self.bot.config.irc["frontend"]["channels"]: | |||||
self.join(chan) |
@@ -0,0 +1,96 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import re | |||||
__all__ = ["RC"] | |||||
class RC(object): | |||||
"""Store data from an event received from our IRC watcher.""" | |||||
re_color = re.compile("\x03([0-9]{1,2}(,[0-9]{1,2})?)?") | |||||
re_edit = re.compile("\A\[\[(.*?)\]\]\s(.*?)\s(http://.*?)\s\*\s(.*?)\s\*\s(.*?)\Z") | |||||
re_log = re.compile("\A\[\[(.*?)\]\]\s(.*?)\s\s\*\s(.*?)\s\*\s(.*?)\Z") | |||||
pretty_edit = "\x02New {0}\x0F: \x0314[[\x0307{1}\x0314]]\x0306 * \x0303{2}\x0306 * \x0302{3}\x0306 * \x0310{4}" | |||||
pretty_log = "\x02New {0}\x0F: \x0303{1}\x0306 * \x0302{2}\x0306 * \x0310{3}" | |||||
def __init__(self, chan, msg): | |||||
self.chan = chan | |||||
self.msg = msg | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the RC.""" | |||||
return "RC(chan={0!r}, msg={1!r})".format(self.chan, self.msg) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the RC.""" | |||||
return "<RC of {0!r} on {1}>".format(self.msg, self.chan) | |||||
def parse(self): | |||||
"""Parse a recent change event into some variables.""" | |||||
# Strip IRC color codes; we don't want or need 'em: | |||||
self.msg = self.re_color.sub("", self.msg).strip() | |||||
msg = self.msg | |||||
self.is_edit = True | |||||
# Flags: 'M' for minor edit, 'B' for bot edit, 'create' for a user | |||||
# creation log entry, etc: | |||||
try: | |||||
page, self.flags, url, user, comment = self.re_edit.findall(msg)[0] | |||||
except IndexError: | |||||
# We're probably missing the http:// part, because it's a log | |||||
# entry, which lacks a URL: | |||||
page, flags, user, comment = self.re_log.findall(msg)[0] | |||||
url = "http://{0}.org/wiki/{1}".format(self.chan[1:], page) | |||||
self.is_edit = False # This is a log entry, not edit | |||||
# Flags tends to have extra whitespace at the end when they're | |||||
# log entries: | |||||
self.flags = flags.strip() | |||||
self.page, self.url, self.user, self.comment = page, url, user, comment | |||||
def prettify(self): | |||||
"""Make a nice, colorful message to send back to the IRC front-end.""" | |||||
flags = self.flags | |||||
if self.is_edit: | |||||
if "N" in flags: | |||||
event = "page" # "New page:" | |||||
else: | |||||
event = "edit" # "New edit:" | |||||
if "B" in flags: | |||||
event = "bot edit" # "New bot edit:" | |||||
if "M" in flags: | |||||
event = "minor " + event # "New minor (bot)? edit:" | |||||
return self.pretty_edit.format(event, self.page, self.user, | |||||
self.url, self.comment) | |||||
if flags == "delete": | |||||
event = "deletion" # "New deletion:" | |||||
elif flags == "protect": | |||||
event = "protection" # "New protection:" | |||||
elif flags == "create": | |||||
event = "user" # "New user:" | |||||
else: | |||||
event = flags # Works for "move", "block", etc | |||||
return self.pretty_log.format(event, self.user, self.url, self.comment) |
@@ -0,0 +1,129 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import imp | |||||
import os | |||||
from earwigbot.irc import IRCConnection, RC | |||||
__all__ = ["Watcher"] | |||||
class Watcher(IRCConnection): | |||||
""" | |||||
**EarwigBot: IRC Watcher Component** | |||||
The IRC watcher runs on a wiki recent-changes server and listens for | |||||
edits. Users cannot interact with this part of the bot. When an event | |||||
occurs, we run it through some rules stored in our working directory under | |||||
:file:`rules.py`, which can result in wiki bot tasks being started or | |||||
messages being sent to channels on the IRC frontend. | |||||
""" | |||||
def __init__(self, bot): | |||||
self.bot = bot | |||||
cf = bot.config.irc["watcher"] | |||||
base = super(Watcher, self) | |||||
base.__init__(cf["host"], cf["port"], cf["nick"], cf["ident"], | |||||
cf["realname"], bot.logger.getChild("watcher")) | |||||
self._prepare_process_hook() | |||||
self._connect() | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the Watcher.""" | |||||
res = "Watcher(host={0!r}, port={1!r}, nick={2!r}, ident={3!r}, realname={4!r}, bot={5!r})" | |||||
return res.format(self.host, self.port, self.nick, self.ident, | |||||
self.realname, self.bot) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the Watcher.""" | |||||
res = "<Watcher {0}!{1} at {2}:{3}>" | |||||
return res.format(self.nick, self.ident, self.host, self.port) | |||||
def _process_message(self, line): | |||||
"""Process a single message from IRC.""" | |||||
if line[1] == "PRIVMSG": | |||||
chan = line[2] | |||||
# Ignore messages originating from channels not in our list, to | |||||
# prevent someone PMing us false data: | |||||
if chan not in self.bot.config.irc["watcher"]["channels"]: | |||||
return | |||||
msg = " ".join(line[3:])[1:] | |||||
rc = RC(chan, msg) # New RC object to store this event's data | |||||
rc.parse() # Parse a message into pagenames, usernames, etc. | |||||
self._process_rc_event(rc) | |||||
# When we've finished starting up, join all watcher channels: | |||||
elif line[1] == "376": | |||||
for chan in self.bot.config.irc["watcher"]["channels"]: | |||||
self.join(chan) | |||||
def _prepare_process_hook(self): | |||||
"""Create our RC event process hook from information in rules.py. | |||||
This will get put in the function self._process_hook, which takes the | |||||
Bot object and an RC object and returns a list of frontend channels to | |||||
report this event to. | |||||
""" | |||||
# Set a default RC process hook that does nothing: | |||||
self._process_hook = lambda bot, rc: () | |||||
path = self.bot.config.root_dir | |||||
try: | |||||
f, path, desc = imp.find_module("rules", [path]) | |||||
except ImportError: | |||||
return | |||||
try: | |||||
module = imp.load_module("rules", f, path, desc) | |||||
except Exception: | |||||
return | |||||
finally: | |||||
f.close() | |||||
self._process_hook_module = module | |||||
try: | |||||
self._process_hook = module.process | |||||
except AttributeError: | |||||
e = "RC event rules imported correctly, but no process(bot, rc) function was found" | |||||
self.logger.error(e) | |||||
return | |||||
def _process_rc_event(self, rc): | |||||
"""Process a recent change event from IRC (or, an RC object). | |||||
The actual processing is configurable, so we don't have that hard-coded | |||||
here. We simply call our process hook (self._process_hook), created by | |||||
self._prepare_process_hook() from information in the "rules" section of | |||||
our config. | |||||
""" | |||||
chans = self._process_hook(self.bot, rc) | |||||
with self.bot.component_lock: | |||||
frontend = self.bot.frontend | |||||
if chans and frontend and not frontend.is_stopped(): | |||||
pretty = rc.prettify() | |||||
if len(pretty) > 400: | |||||
msg = pretty[:397] + "..." | |||||
else: | |||||
msg = pretty[:400] | |||||
for chan in chans: | |||||
frontend.say(chan, msg) |
@@ -0,0 +1,81 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
""" | |||||
Implements a hierarchy of importing classes as defined in PEP 302 to load | |||||
modules in a safe yet lazy manner. | |||||
""" | |||||
from imp import acquire_lock, release_lock | |||||
import sys | |||||
from types import ModuleType | |||||
__all__ = ["LazyImporter"] | |||||
def _getattribute(self, attr): | |||||
_load(self) | |||||
return self.__getattribute__(attr) | |||||
def _setattr(self, attr, value): | |||||
_load(self) | |||||
self.__setattr__(attr, value) | |||||
def _load(self): | |||||
type(self).__getattribute__ = ModuleType.__getattribute__ | |||||
type(self).__setattr__ = ModuleType.__setattr__ | |||||
reload(self) | |||||
class _LazyModule(type): | |||||
def __new__(cls, name): | |||||
acquire_lock() | |||||
try: | |||||
if name not in sys.modules: | |||||
attributes = { | |||||
"__name__": name, | |||||
"__getattribute__": _getattribute, | |||||
"__setattr__": _setattr | |||||
} | |||||
parents = (ModuleType,) | |||||
klass = type.__new__(cls, "module", parents, attributes) | |||||
sys.modules[name] = klass(name) | |||||
return sys.modules[name] | |||||
finally: | |||||
release_lock() | |||||
class LazyImporter(object): | |||||
def __init__(self): | |||||
self._modules = {} | |||||
sys.meta_path.append(self) | |||||
def new(self, name): | |||||
module = _LazyModule(name) | |||||
self._modules[name] = module | |||||
return module | |||||
def find_module(self, fullname, path=None): | |||||
if fullname in self._modules and fullname not in sys.modules: | |||||
return self | |||||
def load_module(self, fullname): | |||||
return self._modules.pop(fullname) |
@@ -0,0 +1,269 @@ | |||||
#! /usr/bin/env python | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import imp | |||||
from os import listdir, path | |||||
from re import sub | |||||
from threading import RLock, Thread | |||||
from time import gmtime, strftime | |||||
from earwigbot.commands import Command | |||||
from earwigbot.tasks import Task | |||||
__all__ = ["CommandManager", "TaskManager"] | |||||
class _ResourceManager(object): | |||||
""" | |||||
**EarwigBot: Resource Manager** | |||||
Resources are essentially objects dynamically loaded by the bot, both | |||||
packaged with it (built-in resources) and created by users (plugins, aka | |||||
custom resources). Currently, the only two types of resources are IRC | |||||
commands and bot tasks. These are both loaded from two locations: the | |||||
:py:mod:`earwigbot.commands` and :py:mod:`earwigbot.tasks packages`, and | |||||
the :file:`commands/` and :file:`tasks/` directories within the bot's | |||||
working directory. | |||||
This class handles the low-level tasks of (re)loading resources via | |||||
:py:meth:`load`, retrieving specific resources via :py:meth:`get`, and | |||||
iterating over all resources via :py:meth:`__iter__`. | |||||
""" | |||||
def __init__(self, bot, name, base): | |||||
self.bot = bot | |||||
self.logger = bot.logger.getChild(name) | |||||
self._resources = {} | |||||
self._resource_name = name # e.g. "commands" or "tasks" | |||||
self._resource_base = base # e.g. Command or Task | |||||
self._resource_access_lock = RLock() | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the manager.""" | |||||
res = "{0}(bot={1!r}, name={2!r}, base={3!r})" | |||||
return res.format(self.__class__.__name__, self.bot, | |||||
self._resource_name, self._resource_base) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the manager.""" | |||||
return "<{0} of {1}>".format(self.__class__.__name__, self.bot) | |||||
def __iter__(self): | |||||
with self.lock: | |||||
for resource in self._resources.itervalues(): | |||||
yield resource | |||||
def _load_resource(self, name, path, klass): | |||||
"""Instantiate a resource class and add it to the dictionary.""" | |||||
res_type = self._resource_name[:-1] # e.g. "command" or "task" | |||||
if hasattr(klass, "name"): | |||||
res_config = getattr(self.bot.config, self._resource_name) | |||||
if getattr(klass, "name") in res_config.get("disable", []): | |||||
log = "Skipping disabled {0} {1}" | |||||
self.logger.debug(log.format(res_type, getattr(klass, "name"))) | |||||
return | |||||
try: | |||||
resource = klass(self.bot) # Create instance of resource | |||||
except Exception: | |||||
e = "Error instantiating {0} class in '{1}' (from {2})" | |||||
self.logger.exception(e.format(res_type, name, path)) | |||||
else: | |||||
self._resources[resource.name] = resource | |||||
self.logger.debug("Loaded {0} {1}".format(res_type, resource.name)) | |||||
def _load_module(self, name, path): | |||||
"""Load a specific resource from a module, identified by name and path. | |||||
We'll first try to import it using imp magic, and if that works, make | |||||
instances of any classes inside that are subclasses of the base | |||||
(:py:attr:`self._resource_base <_resource_base>`), add them to the | |||||
resources dictionary with :py:meth:`self._load_resource() | |||||
<_load_resource>`, and finally log the addition. Any problems along | |||||
the way will either be ignored or logged. | |||||
""" | |||||
f, path, desc = imp.find_module(name, [path]) | |||||
try: | |||||
module = imp.load_module(name, f, path, desc) | |||||
except Exception: | |||||
e = "Couldn't load module '{0}' (from {1})" | |||||
self.logger.exception(e.format(name, path)) | |||||
return | |||||
finally: | |||||
f.close() | |||||
for obj in vars(module).values(): | |||||
if type(obj) is type: | |||||
isresource = issubclass(obj, self._resource_base) | |||||
if isresource and not obj is self._resource_base: | |||||
self._load_resource(name, path, obj) | |||||
def _load_directory(self, dir): | |||||
"""Load all valid resources in a given directory.""" | |||||
self.logger.debug("Loading directory {0}".format(dir)) | |||||
res_config = getattr(self.bot.config, self._resource_name) | |||||
disabled = res_config.get("disable", []) | |||||
processed = [] | |||||
for name in listdir(dir): | |||||
if not name.endswith(".py") and not name.endswith(".pyc"): | |||||
continue | |||||
if name.startswith("_") or name.startswith("."): | |||||
continue | |||||
modname = sub("\.pyc?$", "", name) # Remove extension | |||||
if modname in disabled: | |||||
log = "Skipping disabled module {0}".format(modname) | |||||
self.logger.debug(log) | |||||
continue | |||||
if modname not in processed: | |||||
self._load_module(modname, dir) | |||||
processed.append(modname) | |||||
@property | |||||
def lock(self): | |||||
"""The resource access/modify lock.""" | |||||
return self._resource_access_lock | |||||
def load(self): | |||||
"""Load (or reload) all valid resources into :py:attr:`_resources`.""" | |||||
name = self._resource_name # e.g. "commands" or "tasks" | |||||
with self.lock: | |||||
self._resources.clear() | |||||
builtin_dir = path.join(path.dirname(__file__), name) | |||||
plugins_dir = path.join(self.bot.config.root_dir, name) | |||||
if getattr(self.bot.config, name).get("disable") is True: | |||||
log = "Skipping disabled builtins directory: {0}" | |||||
self.logger.debug(log.format(builtin_dir)) | |||||
else: | |||||
self._load_directory(builtin_dir) # Built-in resources | |||||
if path.exists(plugins_dir) and path.isdir(plugins_dir): | |||||
self._load_directory(plugins_dir) # Custom resources, plugins | |||||
else: | |||||
log = "Skipping nonexistent plugins directory: {0}" | |||||
self.logger.debug(log.format(plugins_dir)) | |||||
if self._resources: | |||||
msg = "Loaded {0} {1}: {2}" | |||||
resources = ", ".join(self._resources.keys()) | |||||
self.logger.info(msg.format(len(self._resources), name, resources)) | |||||
else: | |||||
self.logger.info("Loaded 0 {0}".format(name)) | |||||
def get(self, key): | |||||
"""Return the class instance associated with a certain resource. | |||||
Will raise :py:exc:`KeyError` if the resource (a command or task) is | |||||
not found. | |||||
""" | |||||
with self.lock: | |||||
return self._resources[key] | |||||
class CommandManager(_ResourceManager): | |||||
""" | |||||
Manages (i.e., loads, reloads, and calls) IRC commands. | |||||
""" | |||||
def __init__(self, bot): | |||||
super(CommandManager, self).__init__(bot, "commands", Command) | |||||
def _wrap_check(self, command, data): | |||||
"""Check whether a command should be called, catching errors.""" | |||||
try: | |||||
return command.check(data) | |||||
except Exception: | |||||
e = "Error checking command '{0}' with data: {1}:" | |||||
self.logger.exception(e.format(command.name, data)) | |||||
def _wrap_process(self, command, data): | |||||
"""process() the message, catching and reporting any errors.""" | |||||
try: | |||||
command.process(data) | |||||
except Exception: | |||||
e = "Error executing command '{0}':" | |||||
self.logger.exception(e.format(command.name)) | |||||
def call(self, hook, data): | |||||
"""Respond to a hook type and a :py:class:`Data` object.""" | |||||
for command in self: | |||||
if hook in command.hooks and self._wrap_check(command, data): | |||||
thread = Thread(target=self._wrap_process, | |||||
args=(command, data)) | |||||
start_time = strftime("%b %d %H:%M:%S") | |||||
thread.name = "irc:{0} ({1})".format(command.name, start_time) | |||||
thread.daemon = True | |||||
thread.start() | |||||
return | |||||
class TaskManager(_ResourceManager): | |||||
""" | |||||
Manages (i.e., loads, reloads, schedules, and runs) wiki bot tasks. | |||||
""" | |||||
def __init__(self, bot): | |||||
super(TaskManager, self).__init__(bot, "tasks", Task) | |||||
def _wrapper(self, task, **kwargs): | |||||
"""Wrapper for task classes: run the task and catch any errors.""" | |||||
try: | |||||
task.run(**kwargs) | |||||
except Exception: | |||||
msg = "Task '{0}' raised an exception and had to stop:" | |||||
self.logger.exception(msg.format(task.name)) | |||||
else: | |||||
msg = "Task '{0}' finished successfully" | |||||
self.logger.info(msg.format(task.name)) | |||||
def start(self, task_name, **kwargs): | |||||
"""Start a given task in a new daemon thread, and return the thread. | |||||
kwargs are passed to :py:meth:`task.run() <earwigbot.tasks.Task.run>`. | |||||
If the task is not found, ``None`` will be returned and an error will | |||||
be logged. | |||||
""" | |||||
msg = "Starting task '{0}' in a new thread" | |||||
self.logger.info(msg.format(task_name)) | |||||
try: | |||||
task = self.get(task_name) | |||||
except KeyError: | |||||
e = "Couldn't find task '{0}'" | |||||
self.logger.error(e.format(task_name)) | |||||
return | |||||
task_thread = Thread(target=self._wrapper, args=(task,), kwargs=kwargs) | |||||
start_time = strftime("%b %d %H:%M:%S") | |||||
task_thread.name = "{0} ({1})".format(task_name, start_time) | |||||
task_thread.daemon = True | |||||
task_thread.start() | |||||
return task_thread | |||||
def schedule(self, now=None): | |||||
"""Start all tasks that are supposed to be run at a given time.""" | |||||
if not now: | |||||
now = gmtime() | |||||
# Get list of tasks to run this turn: | |||||
tasks = self.bot.config.schedule(now.tm_min, now.tm_hour, now.tm_mday, | |||||
now.tm_mon, now.tm_wday) | |||||
for task in tasks: | |||||
if isinstance(task, list): # They've specified kwargs, | |||||
self.start(task[0], **task[1]) # so pass those to start | |||||
else: # Otherwise, just pass task_name | |||||
self.start(task) |
@@ -0,0 +1,143 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from earwigbot import exceptions | |||||
from earwigbot import wiki | |||||
__all__ = ["Task"] | |||||
class Task(object): | |||||
""" | |||||
**EarwigBot: Base Bot Task** | |||||
This package provides built-in wiki bot "tasks" EarwigBot runs. Additional | |||||
tasks can be installed as plugins in the bot's working directory. | |||||
This class (import with ``from earwigbot.tasks import Task``) can be | |||||
subclassed to create custom bot tasks. | |||||
To run a task, use :py:meth:`bot.tasks.start(name, **kwargs) | |||||
<earwigbot.managers.TaskManager.start>`. ``**kwargs`` get passed to the | |||||
Task's :meth:`run` method. | |||||
""" | |||||
name = None | |||||
number = 0 | |||||
def __init__(self, bot): | |||||
"""Constructor for new tasks. | |||||
This is called once immediately after the task class is loaded by | |||||
the task manager (in :py:meth:`tasks.load() | |||||
<earwigbot.managers._ResourceManager.load>`). Don't override this | |||||
directly; if you do, remember to place ``super(Task, self).__init()`` | |||||
first. Use :py:meth:`setup` for typical task-init/setup needs. | |||||
""" | |||||
self.bot = bot | |||||
self.config = bot.config | |||||
self.logger = bot.tasks.logger.getChild(self.name) | |||||
self.setup() | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the Task.""" | |||||
res = "Task(name={0!r}, number={1!r}, bot={2!r})" | |||||
return res.format(self.name, self.number, self.bot) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the Task.""" | |||||
res = "<Task {0} ({1}) of {2}>" | |||||
return res.format(self.name, self.number, self.bot) | |||||
def setup(self): | |||||
"""Hook called immediately after the task is loaded. | |||||
Does nothing by default; feel free to override. | |||||
""" | |||||
pass | |||||
def run(self, **kwargs): | |||||
"""Main entry point to run a given task. | |||||
This is called directly by :py:meth:`tasks.start() | |||||
<earwigbot.managers.TaskManager.start>` and is the main way to make a | |||||
task do stuff. *kwargs* will be any keyword arguments passed to | |||||
:py:meth:`~earwigbot.managers.TaskManager.start`, which are entirely | |||||
optional. | |||||
""" | |||||
pass | |||||
def make_summary(self, comment): | |||||
"""Make an edit summary by filling in variables in a config value. | |||||
:py:attr:`config.wiki["summary"] <earwigbot.config.BotConfig.wiki>` is | |||||
used, where ``$2`` is replaced by the main summary body, given by the | |||||
*comment* argument, and ``$1`` is replaced by the task number. | |||||
If the config value is not found, we'll just return *comment* as-is. | |||||
""" | |||||
try: | |||||
summary = self.bot.config.wiki["summary"] | |||||
except KeyError: | |||||
return comment | |||||
return summary.replace("$1", str(self.number)).replace("$2", comment) | |||||
def shutoff_enabled(self, site=None): | |||||
"""Return whether on-wiki shutoff is enabled for this task. | |||||
We check a certain page for certain content. This is determined by | |||||
our config file: :py:attr:`config.wiki["shutoff"]["page"] | |||||
<earwigbot.config.BotConfig.wiki>` is used as the title, with any | |||||
embedded ``$1`` replaced by our username and ``$2`` replaced by the | |||||
task number; and :py:attr:`config.wiki["shutoff"]["disabled"] | |||||
<earwigbot.config.BotConfig.wiki>` is used as the content. | |||||
If the page has that exact content or the page does not exist, then | |||||
shutoff is "disabled", meaning the bot is supposed to run normally, and | |||||
we return ``False``. If the page's content is something other than | |||||
what we expect, shutoff is enabled, and we return ``True``. | |||||
If a site is not provided, we'll try to use :py:attr:`self.site <site>` | |||||
if it's set. Otherwise, we'll use our default site. | |||||
""" | |||||
if not site: | |||||
if hasattr(self, "site"): | |||||
site = getattr(self, "site") | |||||
else: | |||||
site = self.bot.wiki.get_site() | |||||
try: | |||||
cfg = self.config.wiki["shutoff"] | |||||
except KeyError: | |||||
return False | |||||
title = cfg.get("page", "User:$1/Shutoff/Task $2") | |||||
username = site.get_user().name | |||||
title = title.replace("$1", username).replace("$2", str(self.number)) | |||||
page = site.get_page(title) | |||||
try: | |||||
content = page.get() | |||||
except exceptions.PageNotFoundError: | |||||
return False | |||||
if content == cfg.get("disabled", "run"): | |||||
return False | |||||
self.logger.warn("Emergency task shutoff has been enabled!") | |||||
return True |
@@ -0,0 +1,329 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import re | |||||
from earwigbot import exceptions | |||||
from earwigbot.tasks import Task | |||||
from earwigbot.wiki import constants | |||||
class WikiProjectTagger(Task): | |||||
"""A task to tag talk pages with WikiProject banners. | |||||
Usage: :command:`earwigbot -t wikiproject_tagger PATH | |||||
--banner BANNER (--category CAT | --file FILE) [--summary SUM] | |||||
[--append TEXT] [--autoassess] [--nocreate] [--recursive NUM] | |||||
[--site SITE]` | |||||
.. glossary:: | |||||
``--banner BANNER`` | |||||
the page name of the banner to add, without a namespace (unless the | |||||
namespace is something other than ``Template``) so | |||||
``--banner WikiProject Biography`` for ``{{WikiProject Biography}}`` | |||||
``--category CAT`` or ``--file FILE`` | |||||
determines which pages to tag; either all pages in a category (to | |||||
include subcategories as well, see ``--recursive``) or all | |||||
pages/categories in a file (utf-8 encoded and path relative to the | |||||
current directory) | |||||
``--summary SUM`` | |||||
an optional edit summary to use; defaults to | |||||
``"Adding WikiProject banner {{BANNER}}."`` | |||||
``--append TEXT`` | |||||
optional text to append to the banner (after an autoassessment, if | |||||
any), like ``|importance=low`` | |||||
``--autoassess`` | |||||
try to assess each article's class automatically based on the class of | |||||
other banners on the same page | |||||
``--nocreate`` | |||||
don't create new talk pages with just a banner if the page doesn't | |||||
already exist | |||||
``--recursive NUM`` | |||||
recursively go through subcategories up to a maximum depth of ``NUM``, | |||||
or if ``NUM`` isn't provided, go infinitely (this can be dangerous) | |||||
``--site SITE`` | |||||
the ID of the site to tag pages on, defaulting to the... default site | |||||
""" | |||||
name = "wikiproject_tagger" | |||||
# Regexes for template names that should always go above the banner, based | |||||
# on [[Wikipedia:Talk page layout]]: | |||||
TOP_TEMPS = [ | |||||
r"skip ?to ?(toc|talk|toctalk)$", | |||||
r"ga ?nominee$", | |||||
r"(user ?)?talk ?(header|page|page ?header)$", | |||||
r"community ?article ?probation$", | |||||
r"censor(-nudity)?$", | |||||
r"blp(o| ?others?)?$", | |||||
r"controvers(ial2?|y)$", | |||||
r"(not ?(a ?)?)?forum$", | |||||
r"tv(episode|series)talk$", | |||||
r"recurring ?themes$", | |||||
r"faq$", | |||||
r"(round ?in ?)?circ(les|ular)$", | |||||
r"ar(ti|it)cle ?(history|milestones)$", | |||||
r"failed ?ga$", | |||||
r"old ?prod( ?full)?$", | |||||
r"(old|previous) ?afd$", | |||||
r"((wikiproject|wp) ?)?bio(graph(y|ies))?$", | |||||
] | |||||
def _upperfirst(self, text): | |||||
"""Try to uppercase the first letter of a string.""" | |||||
try: | |||||
return text[0].upper() + text[1:] | |||||
except IndexError: | |||||
return text | |||||
def run(self, **kwargs): | |||||
"""Main entry point for the bot task.""" | |||||
if "file" not in kwargs and "category" not in kwargs: | |||||
log = "No pages to tag; I need either a 'category' or a 'file' passed as kwargs" | |||||
self.logger.error(log) | |||||
return | |||||
if "banner" not in kwargs: | |||||
log = "Needs a banner to add passed as the 'banner' kwarg" | |||||
self.logger.error(log) | |||||
return | |||||
site = self.bot.wiki.get_site(name=kwargs.get("site")) | |||||
banner = kwargs["banner"] | |||||
summary = kwargs.get("summary", "Adding WikiProject banner $3.") | |||||
append = kwargs.get("append") | |||||
autoassess = kwargs.get("autoassess", False) | |||||
nocreate = kwargs.get("nocreate", False) | |||||
recursive = kwargs.get("recursive", 0) | |||||
banner, names = self.get_names(site, banner) | |||||
if not names: | |||||
return | |||||
job = _Job(banner, names, summary, append, autoassess, nocreate) | |||||
try: | |||||
self.run_job(kwargs, site, job, recursive) | |||||
except _ShutoffEnabled: | |||||
return | |||||
def run_job(self, kwargs, site, job, recursive): | |||||
"""Run a tagging *job* on a given *site*.""" | |||||
if "category" in kwargs: | |||||
title = kwargs["category"] | |||||
title = self.guess_namespace(site, title, constants.NS_CATEGORY) | |||||
self.process_category(site.get_page(title), job, recursive) | |||||
if "file" in kwargs: | |||||
with open(kwargs["file"], "r") as fileobj: | |||||
for line in fileobj: | |||||
if line.strip(): | |||||
line = line.decode("utf8") | |||||
if line.startswith("[[") and line.endswith("]]"): | |||||
line = line[2:-2] | |||||
page = site.get_page(line) | |||||
if page.namespace == constants.NS_CATEGORY: | |||||
self.process_category(page, job, recursive) | |||||
else: | |||||
self.process_page(page, job) | |||||
def guess_namespace(self, site, title, assumed): | |||||
"""If the given *title* does not have an explicit namespace, guess it. | |||||
For example, when transcluding templates, the namespace is guessed to | |||||
be ``NS_TEMPLATE`` unless one is explicitly declared (so ``{{foo}}`` -> | |||||
``[[Template:Foo]]``, but ``{{:foo}}`` -> ``[[Foo]]``). | |||||
""" | |||||
prefix = title.split(":", 1)[0] | |||||
if prefix == title: | |||||
return u":".join((site.namespace_id_to_name(assumed), title)) | |||||
try: | |||||
site.namespace_name_to_id(prefix) | |||||
except exceptions.NamespaceNotFoundError: | |||||
return u":".join((site.namespace_id_to_name(assumed), title)) | |||||
return title | |||||
def get_names(self, site, banner): | |||||
"""Return all possible aliases for a given *banner* template.""" | |||||
title = self.guess_namespace(site, banner, constants.NS_TEMPLATE) | |||||
if title == banner: | |||||
banner = banner.split(":", 1)[1] | |||||
page = site.get_page(title) | |||||
if page.exists != page.PAGE_EXISTS: | |||||
self.logger.error(u"Banner [[{0}]] does not exist".format(title)) | |||||
return banner, None | |||||
if banner == title: | |||||
names = [self._upperfirst(banner)] | |||||
else: | |||||
names = [self._upperfirst(banner), self._upperfirst(title)] | |||||
result = site.api_query(action="query", list="backlinks", bllimit=500, | |||||
blfilterredir="redirects", bltitle=title) | |||||
for backlink in result["query"]["backlinks"]: | |||||
names.append(backlink["title"]) | |||||
if backlink["ns"] == constants.NS_TEMPLATE: | |||||
names.append(backlink["title"].split(":", 1)[1]) | |||||
log = u"Found {0} aliases for banner [[{1}]]".format(len(names), title) | |||||
self.logger.debug(log) | |||||
return banner, names | |||||
def process_category(self, page, job, recursive): | |||||
"""Try to tag all pages in the given category.""" | |||||
self.logger.info(u"Processing category: [[{0]]".format(page.title)) | |||||
for member in page.get_members(): | |||||
if member.namespace == constants.NS_CATEGORY: | |||||
if recursive is True: | |||||
self.process_category(member, job, True) | |||||
elif recursive: | |||||
self.process_category(member, job, recursive - 1) | |||||
else: | |||||
self.process_page(member, job) | |||||
def process_page(self, page, job): | |||||
"""Try to tag a specific *page* using the *job* description.""" | |||||
if job.counter % 10 == 0: # Do a shutoff check every ten pages | |||||
if self.shutoff_enabled(page.site): | |||||
raise _ShutoffEnabled() | |||||
job.counter += 1 | |||||
if not page.is_talkpage: | |||||
page = page.toggle_talk() | |||||
try: | |||||
code = page.parse() | |||||
except exceptions.PageNotFoundError: | |||||
if job.nocreate: | |||||
log = u"Skipping nonexistent page: [[{0}]]".format(page.title) | |||||
self.logger.info(log) | |||||
else: | |||||
log = u"Tagging new page: [[{0}]]".format(page.title) | |||||
self.logger.info(log) | |||||
banner = "{{" + job.banner + job.append + "}}" | |||||
summary = job.summary.replace("$3", banner) | |||||
page.edit(banner, self.make_summary(summary)) | |||||
return | |||||
except exceptions.InvalidPageError: | |||||
log = u"Skipping invalid page: [[{0}]]".format(page.title) | |||||
self.logger.error(log) | |||||
return | |||||
for template in code.ifilter_templates(recursive=True): | |||||
name = self._upperfirst(template.name.strip()) | |||||
if name in job.names: | |||||
log = u"Skipping page: [[{0}]]; already tagged with '{1}'" | |||||
self.logger.info(log.format(page.title, name)) | |||||
return | |||||
banner = self.make_banner(job, code) | |||||
shell = self.get_banner_shell(code) | |||||
if shell: | |||||
if shell.has_param(1): | |||||
shell.get(1).value.insert(0, banner + "\n") | |||||
else: | |||||
shell.add(1, banner) | |||||
else: | |||||
self.add_banner(code, banner) | |||||
self.apply_genfixes(code) | |||||
self.logger.info(u"Tagging page: [[{0}]]".format(page.title)) | |||||
summary = job.summary.replace("$3", banner) | |||||
page.edit(unicode(code), self.make_summary(summary)) | |||||
def make_banner(self, job, code): | |||||
"""Return banner text to add based on a *job* and a page's *code*.""" | |||||
banner = "{{" + job.banner | |||||
if job.autoassess: | |||||
classes = {"fa": 0, "fl": 0, "ga": 0, "a": 0, "b": 0, "start": 0, | |||||
"stub": 0, "list": 0, "dab": 0, "c": 0, "redirect": 0, | |||||
"book": 0, "template": 0, "category": 0} | |||||
for template in code.ifilter_templates(recursive=True): | |||||
if template.has_param("class"): | |||||
value = unicode(template.get("class").value).lower() | |||||
if value in classes: | |||||
classes[value] += 1 | |||||
values = tuple(classes.values()) | |||||
best = max(values) | |||||
confidence = float(best) / sum(values) | |||||
if confidence > 0.75: | |||||
rank = tuple(classes.keys())[values.index(best)] | |||||
if rank in ("fa", "fl", "ga"): | |||||
banner += "|class=" + rank.upper() | |||||
else: | |||||
banner += "|class=" + self._upperfirst(rank) | |||||
return banner + job.append + "}}" | |||||
def get_banner_shell(self, code): | |||||
"""Return the banner shell template within *code*, else ``None``.""" | |||||
regex = r"^\{\{\s*((WikiProject|WP)[ _]?Banner[ _]?S(hell)?|W(BPS|PBS|PB)|Shell)" | |||||
shells = code.filter_templates(matches=regex) | |||||
if not shells: | |||||
shells = code.filter_templates(matches=regex, recursive=True) | |||||
if shells: | |||||
log = u"Inserting banner into shell: {0}" | |||||
self.logger.debug(log.format(shells[0].name)) | |||||
return shells[0] | |||||
def add_banner(self, code, banner): | |||||
"""Add *banner* to *code*, following template order conventions.""" | |||||
index = 0 | |||||
for i, template in enumerate(code.ifilter_templates()): | |||||
name = template.name.lower().replace("_", " ") | |||||
for regex in self.TOP_TEMPS: | |||||
if re.match(regex, name): | |||||
self.logger.info("Skipping top template: {0}".format(name)) | |||||
index = i + 1 | |||||
self.logger.debug(u"Inserting banner at index {0}".format(index)) | |||||
code.insert(index, banner) | |||||
def apply_genfixes(self, code): | |||||
"""Apply general fixes to *code*, such as template substitution.""" | |||||
regex = r"^\{\{\s*((un|no)?s(i((gn|ng)(ed3?)?|g))?|usu|tilde|forgot to sign|without signature)" | |||||
for template in code.ifilter_templates(matches=regex): | |||||
self.logger.debug("Applying genfix: substitute {{unsigned}}") | |||||
template.name = "subst:unsigned" | |||||
class _Job(object): | |||||
"""Represents a single wikiproject-tagging task. | |||||
Stores information on the banner to add, the edit summary to use, whether | |||||
or not to autoassess and create new pages from scratch, and a counter of | |||||
the number of pages edited. | |||||
""" | |||||
def __init__(self, banner, names, summary, append, autoassess, nocreate): | |||||
self.banner = banner | |||||
self.names = names | |||||
self.summary = summary | |||||
self.append = append | |||||
self.autoassess = autoassess | |||||
self.nocreate = nocreate | |||||
self.counter = 0 | |||||
class _ShutoffEnabled(Exception): | |||||
"""Raised by process_page() if shutoff is enabled. Caught by run(), which | |||||
will then stop the task.""" | |||||
pass |
@@ -0,0 +1,157 @@ | |||||
#! /usr/bin/env python | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
""" | |||||
usage: :command:`earwigbot [-h] [-v] [-d | -q] [-t NAME] [PATH] ...` | |||||
This is EarwigBot's command-line utility, enabling you to easily start the bot | |||||
or run specific tasks. | |||||
.. glossary:: | |||||
``PATH`` | |||||
path to the bot's working directory, which will be created if it doesn't | |||||
exist; current directory assumed if not specified | |||||
``-h``, ``--help`` | |||||
show this help message and exit | |||||
``-v``, ``--version`` | |||||
show program's version number and exit | |||||
``-d``, ``--debug`` | |||||
print all logs, including ``DEBUG``-level messages | |||||
``-q``, ``--quiet`` | |||||
don't print any logs except warnings and errors | |||||
``-t NAME``, ``--task NAME`` | |||||
given the name of a task, the bot will run it instead of the main bot and | |||||
then exit | |||||
``TASK_ARGS`` | |||||
with --task, will pass any remaining arguments to the task's | |||||
:py:meth:`.Task.run` method | |||||
""" | |||||
from argparse import Action, ArgumentParser, REMAINDER | |||||
import logging | |||||
from os import path | |||||
from time import sleep | |||||
from earwigbot import __version__ | |||||
from earwigbot.bot import Bot | |||||
__all__ = ["main"] | |||||
class _StoreTaskArg(Action): | |||||
"""A custom argparse action to read remaining command-line arguments.""" | |||||
def __call__(self, parser, namespace, values, option_string=None): | |||||
kwargs = {} | |||||
name = None | |||||
for value in values: | |||||
if value.startswith("-") and "=" in value: | |||||
key, value = value.split("=", 1) | |||||
self.insert(kwargs, key.lstrip("-"), value) | |||||
elif name: | |||||
if value.startswith("-"): | |||||
if name not in kwargs: | |||||
kwargs[name] = True | |||||
name = value.lstrip("-") | |||||
else: | |||||
self.insert(kwargs, name, value) | |||||
name = None | |||||
else: | |||||
if value.startswith("-"): | |||||
name = value.lstrip("-") | |||||
if name and name not in kwargs: | |||||
kwargs[name] = True | |||||
namespace.task_args = kwargs | |||||
def insert(self, kwargs, key, value): | |||||
"""Add a key/value pair to kwargs; support multiple values per key.""" | |||||
if key in kwargs: | |||||
try: | |||||
kwargs[key].append(value) | |||||
except AttributeError: | |||||
kwargs[key] = [kwargs[key], value] | |||||
else: | |||||
kwargs[key] = value | |||||
def main(): | |||||
"""Main entry point for the command-line utility.""" | |||||
version = "EarwigBot v{0}".format(__version__) | |||||
desc = """This is EarwigBot's command-line utility, enabling you to easily | |||||
start the bot or run specific tasks.""" | |||||
parser = ArgumentParser(description=desc) | |||||
parser.add_argument("path", nargs="?", metavar="PATH", default=path.curdir, | |||||
help="""path to the bot's working directory, which will | |||||
be created if it doesn't exist; current | |||||
directory assumed if not specified""") | |||||
parser.add_argument("-v", "--version", action="version", version=version) | |||||
logger = parser.add_mutually_exclusive_group() | |||||
logger.add_argument("-d", "--debug", action="store_true", | |||||
help="print all logs, including DEBUG-level messages") | |||||
logger.add_argument("-q", "--quiet", action="store_true", | |||||
help="don't print any logs except warnings and errors") | |||||
parser.add_argument("-t", "--task", metavar="NAME", | |||||
help="""given the name of a task, the bot will run it | |||||
instead of the main bot and then exit""") | |||||
parser.add_argument("task_args", nargs=REMAINDER, action=_StoreTaskArg, | |||||
metavar="TASK_ARGS", | |||||
help="""with --task, will pass these arguments to the | |||||
task's run() method""") | |||||
args = parser.parse_args() | |||||
if not args.task and args.task_args: | |||||
unrecognized = " ".join(args.task_args) | |||||
parser.error("unrecognized arguments: {0}".format(unrecognized)) | |||||
level = logging.INFO | |||||
if args.debug: | |||||
level = logging.DEBUG | |||||
elif args.quiet: | |||||
level = logging.WARNING | |||||
print version | |||||
bot = Bot(path.abspath(args.path), level=level) | |||||
if args.task: | |||||
thread = bot.tasks.start(args.task, **args.task_args) | |||||
if not thread: | |||||
return | |||||
try: | |||||
while thread.is_alive(): # Keep it alive; it's a daemon | |||||
sleep(1) | |||||
except KeyboardInterrupt: | |||||
pass | |||||
finally: | |||||
if thread.is_alive(): | |||||
bot.tasks.logger.warn("The task is will be killed") | |||||
else: | |||||
try: | |||||
bot.run() | |||||
except KeyboardInterrupt: | |||||
pass | |||||
finally: | |||||
if bot.is_running: | |||||
bot.stop() | |||||
if __name__ == "__main__": | |||||
main() |
@@ -0,0 +1,51 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
""" | |||||
**EarwigBot: Wiki Toolset** | |||||
This is a collection of classes and functions to read from and write to | |||||
Wikipedia and other wiki sites. No connection whatsoever to `python-wikitools | |||||
<http://code.google.com/p/python-wikitools/>`_ written by `Mr.Z-man | |||||
<http://en.wikipedia.org/wiki/User:Mr.Z-man>`_, other than a similar purpose. | |||||
We share no code. | |||||
Import the toolset directly with ``from earwigbot import wiki``. If using the | |||||
built-in integration with the rest of the bot, :py:class:`~earwigbot.bot.Bot` | |||||
objects contain a :py:attr:`~earwigbot.bot.Bot.wiki` attribute, which is a | |||||
:py:class:`~earwigbot.wiki.sitesdb.SitesDB` object tied to the :file:`sites.db` | |||||
file located in the same directory as :file:`config.yml`. That object has the | |||||
principal methods :py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site`, | |||||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site`, and | |||||
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.remove_site` that should handle all | |||||
of your :py:class:`~earwigbot.wiki.site.Site` (and thus, | |||||
:py:class:`~earwigbot.wiki.page.Page`, | |||||
:py:class:`~earwigbot.wiki.category.Category`, and | |||||
:py:class:`~earwigbot.wiki.user.User`) needs. | |||||
""" | |||||
from earwigbot.wiki.category import * | |||||
from earwigbot.wiki.constants import * | |||||
from earwigbot.wiki.page import * | |||||
from earwigbot.wiki.site import * | |||||
from earwigbot.wiki.sitesdb import * | |||||
from earwigbot.wiki.user import * |
@@ -0,0 +1,205 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from earwigbot.wiki.page import Page | |||||
__all__ = ["Category"] | |||||
class Category(Page): | |||||
""" | |||||
**EarwigBot: Wiki Toolset: Category** | |||||
Represents a category on a given :py:class:`~earwigbot.wiki.site.Site`, a | |||||
subclass of :py:class:`~earwigbot.wiki.page.Page`. Provides additional | |||||
methods, but :py:class:`~earwigbot.wiki.page.Page`'s own methods should | |||||
work fine on :py:class:`Category` objects. :py:meth:`site.get_page() | |||||
<earwigbot.wiki.site.Site.get_page>` will return a :py:class:`Category` | |||||
instead of a :py:class:`~earwigbot.wiki.page.Page` if the given title is in | |||||
the category namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category` | |||||
is shorthand, accepting category names without the namespace prefix. | |||||
*Attributes:* | |||||
- :py:attr:`size`: the total number of members in the category | |||||
- :py:attr:`pages`: the number of pages in the category | |||||
- :py:attr:`files`: the number of files in the category | |||||
- :py:attr:`subcats`: the number of subcategories in the category | |||||
*Public methods:* | |||||
- :py:meth:`get_members`: iterates over Pages in the category | |||||
""" | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the Category.""" | |||||
res = "Category(title={0!r}, follow_redirects={1!r}, site={2!r})" | |||||
return res.format(self._title, self._follow_redirects, self._site) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the Category.""" | |||||
return '<Category "{0}" of {1}>'.format(self.title, str(self.site)) | |||||
def _get_members_via_api(self, limit, follow): | |||||
"""Iterate over Pages in the category using the API.""" | |||||
params = {"action": "query", "list": "categorymembers", | |||||
"cmtitle": self.title} | |||||
while 1: | |||||
params["cmlimit"] = limit if limit else "max" | |||||
result = self.site.api_query(**params) | |||||
for member in result["query"]["categorymembers"]: | |||||
title = member["title"] | |||||
yield self.site.get_page(title, follow_redirects=follow) | |||||
if "query-continue" in result: | |||||
qcontinue = result["query-continue"]["categorymembers"] | |||||
params["cmcontinue"] = qcontinue["cmcontinue"] | |||||
if limit: | |||||
limit -= len(result["query"]["categorymembers"]) | |||||
else: | |||||
break | |||||
def _get_members_via_sql(self, limit, follow): | |||||
"""Iterate over Pages in the category using SQL.""" | |||||
query = """SELECT page_title, page_namespace, page_id FROM page | |||||
JOIN categorylinks ON page_id = cl_from | |||||
WHERE cl_to = ?""" | |||||
title = self.title.replace(" ", "_").split(":", 1)[1] | |||||
if limit: | |||||
query += " LIMIT ?" | |||||
result = self.site.sql_query(query, (title, limit)) | |||||
else: | |||||
result = self.site.sql_query(query, (title,)) | |||||
members = list(result) | |||||
for row in members: | |||||
base = row[0].replace("_", " ").decode("utf8") | |||||
namespace = self.site.namespace_id_to_name(row[1]) | |||||
if namespace: | |||||
title = u":".join((namespace, base)) | |||||
else: # Avoid doing a silly (albeit valid) ":Pagename" thing | |||||
title = base | |||||
yield self.site.get_page(title, follow_redirects=follow, | |||||
pageid=row[2]) | |||||
def _get_size_via_api(self, member_type): | |||||
"""Return the size of the category using the API.""" | |||||
result = self.site.api_query(action="query", prop="categoryinfo", | |||||
titles=self.title) | |||||
info = result["query"]["pages"].values()[0]["categoryinfo"] | |||||
return info[member_type] | |||||
def _get_size_via_sql(self, member_type): | |||||
"""Return the size of the category using SQL.""" | |||||
query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?" | |||||
title = self.title.replace(" ", "_").split(":", 1)[1] | |||||
if member_type == "size": | |||||
result = self.site.sql_query(query, (title,)) | |||||
else: | |||||
query += " AND cl_type = ?" | |||||
result = self.site.sql_query(query, (title, member_type[:-1])) | |||||
return list(result)[0][0] | |||||
def _get_size(self, member_type): | |||||
"""Return the size of the category.""" | |||||
services = { | |||||
self.site.SERVICE_API: self._get_size_via_api, | |||||
self.site.SERVICE_SQL: self._get_size_via_sql | |||||
} | |||||
return self.site.delegate(services, (member_type,)) | |||||
@property | |||||
def size(self): | |||||
"""The total number of members in the category. | |||||
Includes pages, files, and subcats. Equal to :py:attr:`pages` + | |||||
:py:attr:`files` + :py:attr:`subcats`. This will use either the API or | |||||
SQL depending on which are enabled and the amount of lag on each. This | |||||
is handled by :py:meth:`site.delegate() | |||||
<earwigbot.wiki.site.Site.delegate>`. | |||||
""" | |||||
return self._get_size("size") | |||||
@property | |||||
def pages(self): | |||||
"""The number of pages in the category. | |||||
This will use either the API or SQL depending on which are enabled and | |||||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||||
<earwigbot.wiki.site.Site.delegate>`. | |||||
""" | |||||
return self._get_size("pages") | |||||
@property | |||||
def files(self): | |||||
"""The number of files in the category. | |||||
This will use either the API or SQL depending on which are enabled and | |||||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||||
<earwigbot.wiki.site.Site.delegate>`. | |||||
""" | |||||
return self._get_size("files") | |||||
@property | |||||
def subcats(self): | |||||
"""The number of subcategories in the category. | |||||
This will use either the API or SQL depending on which are enabled and | |||||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||||
<earwigbot.wiki.site.Site.delegate>`. | |||||
""" | |||||
return self._get_size("subcats") | |||||
def get_members(self, limit=None, follow_redirects=None): | |||||
"""Iterate over Pages in the category. | |||||
If *limit* is given, we will provide this many pages, or less if the | |||||
category is smaller. By default, *limit* is ``None``, meaning we will | |||||
keep iterating over members until the category is exhausted. | |||||
*follow_redirects* is passed directly to :py:meth:`site.get_page() | |||||
<earwigbot.wiki.site.Site.get_page>`; it defaults to ``None``, which | |||||
will use the value passed to our :py:meth:`__init__`. | |||||
This will use either the API or SQL depending on which are enabled and | |||||
the amount of lag on each. This is handled by :py:meth:`site.delegate() | |||||
<earwigbot.wiki.site.Site.delegate>`. | |||||
.. note:: | |||||
Be careful when iterating over very large categories with no limit. | |||||
If using the API, at best, you will make one query per 5000 pages, | |||||
which can add up significantly for categories with hundreds of | |||||
thousands of members. As for SQL, note that *all page titles are | |||||
stored internally* as soon as the query is made, so the site-wide | |||||
SQL lock can be freed and unrelated queries can be made without | |||||
requiring a separate connection to be opened. This is generally not | |||||
an issue unless your category's size approaches several hundred | |||||
thousand, in which case the sheer number of titles in memory becomes | |||||
problematic. | |||||
""" | |||||
services = { | |||||
self.site.SERVICE_API: self._get_members_via_api, | |||||
self.site.SERVICE_SQL: self._get_members_via_sql | |||||
} | |||||
if follow_redirects is None: | |||||
follow_redirects = self._follow_redirects | |||||
return self.site.delegate(services, (limit, follow_redirects)) |
@@ -0,0 +1,61 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
""" | |||||
**EarwigBot: Wiki Toolset: Constants** | |||||
This module defines some useful constants: | |||||
- :py:const:`USER_AGENT`: our default User Agent when making API queries | |||||
- :py:const:`NS_*`: default namespace IDs for easy lookup | |||||
Import directly with ``from earwigbot.wiki import constants`` or | |||||
``from earwigbot.wiki.constants import *``. These are also available from | |||||
:py:mod:`earwigbot.wiki` directly (e.g. ``earwigbot.wiki.USER_AGENT``). | |||||
""" | |||||
# Default User Agent when making API queries: | |||||
from earwigbot import __version__ as _v | |||||
from platform import python_version as _p | |||||
USER_AGENT = "EarwigBot/{0} (Python/{1}; https://github.com/earwig/earwigbot)" | |||||
USER_AGENT = USER_AGENT.format(_v, _p()) | |||||
del _v, _p | |||||
# Default namespace IDs: | |||||
NS_MAIN = 0 | |||||
NS_TALK = 1 | |||||
NS_USER = 2 | |||||
NS_USER_TALK = 3 | |||||
NS_PROJECT = 4 | |||||
NS_PROJECT_TALK = 5 | |||||
NS_FILE = 6 | |||||
NS_FILE_TALK = 7 | |||||
NS_MEDIAWIKI = 8 | |||||
NS_MEDIAWIKI_TALK = 9 | |||||
NS_TEMPLATE = 10 | |||||
NS_TEMPLATE_TALK = 11 | |||||
NS_HELP = 12 | |||||
NS_HELP_TALK = 13 | |||||
NS_CATEGORY = 14 | |||||
NS_CATEGORY_TALK = 15 | |||||
NS_SPECIAL = -1 | |||||
NS_MEDIA = -2 |
@@ -0,0 +1,229 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from gzip import GzipFile | |||||
from socket import timeout | |||||
from StringIO import StringIO | |||||
from time import sleep, time | |||||
from urllib2 import build_opener, URLError | |||||
import oauth2 as oauth | |||||
from earwigbot import exceptions | |||||
from earwigbot.wiki.copyvios.markov import MarkovChain, MarkovChainIntersection | |||||
from earwigbot.wiki.copyvios.parsers import ArticleTextParser, HTMLTextParser | |||||
from earwigbot.wiki.copyvios.result import CopyvioCheckResult | |||||
from earwigbot.wiki.copyvios.search import YahooBOSSSearchEngine | |||||
__all__ = ["CopyvioMixIn"] | |||||
class CopyvioMixIn(object): | |||||
""" | |||||
**EarwigBot: Wiki Toolset: Copyright Violation MixIn** | |||||
This is a mixin that provides two public methods, :py:meth:`copyvio_check` | |||||
and :py:meth:`copyvio_compare`. The former checks the page for copyright | |||||
violations using a search engine API, and the latter compares the page | |||||
against a given URL. Credentials for the search engine API are stored in | |||||
the :py:class:`~earwigbot.wiki.site.Site`'s config. | |||||
""" | |||||
def __init__(self, site): | |||||
self._search_config = site._search_config | |||||
self._exclusions_db = self._search_config.get("exclusions_db") | |||||
self._opener = build_opener() | |||||
self._opener.addheaders = site._opener.addheaders | |||||
def _open_url_ignoring_errors(self, url): | |||||
"""Open a URL using self._opener and return its content, or None. | |||||
Will decompress the content if the headers contain "gzip" as its | |||||
content encoding, and will return None if URLError is raised while | |||||
opening the URL. IOErrors while gunzipping a compressed response are | |||||
ignored, and the original content is returned. | |||||
""" | |||||
try: | |||||
response = self._opener.open(url.encode("utf8"), timeout=5) | |||||
except (URLError, timeout): | |||||
return None | |||||
result = response.read() | |||||
if response.headers.get("Content-Encoding") == "gzip": | |||||
stream = StringIO(result) | |||||
gzipper = GzipFile(fileobj=stream) | |||||
try: | |||||
result = gzipper.read() | |||||
except IOError: | |||||
pass | |||||
return result | |||||
def _select_search_engine(self): | |||||
"""Return a function that can be called to do web searches. | |||||
The function takes one argument, a search query, and returns a list of | |||||
URLs, ranked by importance. The underlying logic depends on the | |||||
*engine* argument within our config; for example, if *engine* is | |||||
"Yahoo! BOSS", we'll use YahooBOSSSearchEngine for querying. | |||||
Raises UnknownSearchEngineError if the 'engine' listed in our config is | |||||
unknown to us, and UnsupportedSearchEngineError if we are missing a | |||||
required package or module, like oauth2 for "Yahoo! BOSS". | |||||
""" | |||||
engine = self._search_config["engine"] | |||||
credentials = self._search_config["credentials"] | |||||
if engine == "Yahoo! BOSS": | |||||
if not oauth: | |||||
e = "The package 'oauth2' could not be imported" | |||||
raise exceptions.UnsupportedSearchEngineError(e) | |||||
return YahooBOSSSearchEngine(credentials) | |||||
raise exceptions.UnknownSearchEngineError(engine) | |||||
def _copyvio_compare_content(self, article, url): | |||||
"""Return a number comparing an article and a URL. | |||||
The *article* is a Markov chain, whereas the *url* is just a string | |||||
that we'll try to open and read ourselves. | |||||
""" | |||||
html = self._open_url_ignoring_errors(url) | |||||
if not html: | |||||
return 0 | |||||
source = MarkovChain(HTMLTextParser(html).strip()) | |||||
delta = MarkovChainIntersection(article, source) | |||||
return float(delta.size()) / article.size(), (source, delta) | |||||
def copyvio_check(self, min_confidence=0.5, max_queries=-1, | |||||
interquery_sleep=1): | |||||
"""Check the page for copyright violations. | |||||
Returns a | |||||
:py:class:`~earwigbot.wiki.copyvios.result.CopyvioCheckResult` object | |||||
with information on the results of the check. | |||||
*max_queries* is self-explanatory; we will never make more than this | |||||
number of queries in a given check. If it's lower than 0, we will not | |||||
limit the number of queries. | |||||
*interquery_sleep* is the minimum amount of time we will sleep between | |||||
search engine queries, in seconds. | |||||
Raises :py:exc:`~earwigbot.exceptions.CopyvioCheckError` or subclasses | |||||
(:py:exc:`~earwigbot.exceptions.UnknownSearchEngineError`, | |||||
:py:exc:`~earwigbot.exceptions.SearchQueryError`, ...) on errors. | |||||
""" | |||||
searcher = self._select_search_engine() | |||||
if self._exclusions_db: | |||||
self._exclusions_db.sync(self.site.name) | |||||
handled_urls = [] | |||||
best_confidence = 0 | |||||
best_match = None | |||||
num_queries = 0 | |||||
empty = MarkovChain("") | |||||
best_chains = (empty, MarkovChainIntersection(empty, empty)) | |||||
parser = ArticleTextParser(self.get()) | |||||
clean = parser.strip() | |||||
chunks = parser.chunk(self._search_config["nltk_dir"], max_queries) | |||||
article_chain = MarkovChain(clean) | |||||
last_query = time() | |||||
if article_chain.size() < 20: # Auto-fail very small articles | |||||
return CopyvioCheckResult(False, best_confidence, best_match, | |||||
num_queries, article_chain, best_chains) | |||||
while (chunks and best_confidence < min_confidence and | |||||
(max_queries < 0 or num_queries < max_queries)): | |||||
chunk = chunks.pop(0) | |||||
log = u"[[{0}]] -> querying {1} for {2!r}" | |||||
self._logger.debug(log.format(self.title, searcher.name, chunk)) | |||||
urls = searcher.search(chunk) | |||||
urls = [url for url in urls if url not in handled_urls] | |||||
for url in urls: | |||||
handled_urls.append(url) | |||||
if self._exclusions_db: | |||||
if self._exclusions_db.check(self.site.name, url): | |||||
continue | |||||
conf, chains = self._copyvio_compare_content(article_chain, url) | |||||
if conf > best_confidence: | |||||
best_confidence = conf | |||||
best_match = url | |||||
best_chains = chains | |||||
num_queries += 1 | |||||
diff = time() - last_query | |||||
if diff < interquery_sleep: | |||||
sleep(interquery_sleep - diff) | |||||
last_query = time() | |||||
if best_confidence >= min_confidence: | |||||
is_violation = True | |||||
log = u"Violation detected for [[{0}]] (confidence: {1}; URL: {2}; using {3} queries)" | |||||
self._logger.debug(log.format(self.title, best_confidence, | |||||
best_match, num_queries)) | |||||
else: | |||||
is_violation = False | |||||
log = u"No violation for [[{0}]] (confidence: {1}; using {2} queries)" | |||||
self._logger.debug(log.format(self.title, best_confidence, | |||||
num_queries)) | |||||
return CopyvioCheckResult(is_violation, best_confidence, best_match, | |||||
num_queries, article_chain, best_chains) | |||||
def copyvio_compare(self, url, min_confidence=0.5): | |||||
"""Check the page like :py:meth:`copyvio_check` against a specific URL. | |||||
This is essentially a reduced version of the above - a copyivo | |||||
comparison is made using Markov chains and the result is returned in a | |||||
:py:class:`~earwigbot.wiki.copyvios.result.CopyvioCheckResult` object - | |||||
but without using a search engine, since the suspected "violated" URL | |||||
is supplied from the start. | |||||
Its primary use is to generate a result when the URL is retrieved from | |||||
a cache, like the one used in EarwigBot's Toolserver site. After a | |||||
search is done, the resulting URL is stored in a cache for 24 hours so | |||||
future checks against that page will not require another set of | |||||
time-and-money-consuming search engine queries. However, the comparison | |||||
itself (which includes the article's and the source's content) cannot | |||||
be stored for data retention reasons, so a fresh comparison is made | |||||
using this function. | |||||
Since no searching is done, neither | |||||
:py:exc:`~earwigbot.exceptions.UnknownSearchEngineError` nor | |||||
:py:exc:`~earwigbot.exceptions.SearchQueryError` will be raised. | |||||
""" | |||||
content = self.get() | |||||
clean = ArticleTextParser(content).strip() | |||||
article_chain = MarkovChain(clean) | |||||
confidence, chains = self._copyvio_compare_content(article_chain, url) | |||||
if confidence >= min_confidence: | |||||
is_violation = True | |||||
log = u"Violation detected for [[{0}]] (confidence: {1}; URL: {2})" | |||||
self._logger.debug(log.format(self.title, confidence, url)) | |||||
else: | |||||
is_violation = False | |||||
log = u"No violation for [[{0}]] (confidence: {1}; URL: {2})" | |||||
self._logger.debug(log.format(self.title, confidence, url)) | |||||
return CopyvioCheckResult(is_violation, confidence, url, 0, | |||||
article_chain, chains) |
@@ -0,0 +1,171 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import re | |||||
import sqlite3 as sqlite | |||||
from threading import Lock | |||||
from time import time | |||||
from urlparse import urlparse | |||||
from earwigbot import exceptions | |||||
__all__ = ["ExclusionsDB"] | |||||
default_sources = { | |||||
"enwiki": [ | |||||
"Wikipedia:Mirrors and forks/Abc", "Wikipedia:Mirrors and forks/Def", | |||||
"Wikipedia:Mirrors and forks/Ghi", "Wikipedia:Mirrors and forks/Jkl", | |||||
"Wikipedia:Mirrors and forks/Mno", "Wikipedia:Mirrors and forks/Pqr", | |||||
"Wikipedia:Mirrors and forks/Stu", "Wikipedia:Mirrors and forks/Vwxyz", | |||||
"User:EarwigBot/Copyvios/Exclusions" | |||||
] | |||||
} | |||||
class ExclusionsDB(object): | |||||
""" | |||||
**EarwigBot: Wiki Toolset: Exclusions Database Manager** | |||||
Controls the :file:`exclusions.db` file, which stores URLs excluded from | |||||
copyright violation checks on account of being known mirrors, for example. | |||||
""" | |||||
def __init__(self, sitesdb, dbfile, logger): | |||||
self._sitesdb = sitesdb | |||||
self._dbfile = dbfile | |||||
self._logger = logger | |||||
self._db_access_lock = Lock() | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the ExclusionsDB.""" | |||||
res = "ExclusionsDB(sitesdb={0!r}, dbfile={1!r}, logger={2!r})" | |||||
return res.format(self._sitesdb, self._dbfile, self._logger) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the ExclusionsDB.""" | |||||
return "<ExclusionsDB at {0}>".format(self._dbfile) | |||||
def _create(self): | |||||
"""Initialize the exclusions database with its necessary tables.""" | |||||
script = """ | |||||
CREATE TABLE sources (source_sitename, source_page); | |||||
CREATE TABLE updates (update_sitename, update_time); | |||||
CREATE TABLE exclusions (exclusion_sitename, exclusion_url); | |||||
""" | |||||
query = "INSERT INTO sources VALUES (?, ?);" | |||||
sources = [] | |||||
for sitename, pages in default_sources.iteritems(): | |||||
[sources.append((sitename, page)) for page in pages] | |||||
with sqlite.connect(self._dbfile) as conn: | |||||
conn.executescript(script) | |||||
conn.executemany(query, sources) | |||||
def _load_source(self, site, source): | |||||
"""Load from a specific source and return a set of URLs.""" | |||||
urls = set() | |||||
try: | |||||
data = site.get_page(source).get() | |||||
except exceptions.PageNotFoundError: | |||||
return urls | |||||
regexes = [ | |||||
"url\s*=\s*<nowiki>(?:https?:)?(?://)?(.*)</nowiki>", | |||||
"\*\s*Site:\s*\[?(?:https?:)?(?://)?(.*)\]?" | |||||
] | |||||
for regex in regexes: | |||||
[urls.add(url.lower()) for (url,) in re.findall(regex, data, re.I)] | |||||
return urls | |||||
def _update(self, sitename): | |||||
"""Update the database from listed sources in the index.""" | |||||
query1 = "SELECT source_page FROM sources WHERE source_sitename = ?;" | |||||
query2 = "SELECT exclusion_url FROM exclusions WHERE exclusion_sitename = ?" | |||||
query3 = "DELETE FROM exclusions WHERE exclusion_sitename = ? AND exclusion_url = ?" | |||||
query4 = "INSERT INTO exclusions VALUES (?, ?);" | |||||
query5 = "SELECT 1 FROM updates WHERE update_sitename = ?;" | |||||
query6 = "UPDATE updates SET update_time = ? WHERE update_sitename = ?;" | |||||
query7 = "INSERT INTO updates VALUES (?, ?);" | |||||
site = self._sitesdb.get_site(sitename) | |||||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||||
urls = set() | |||||
for (source,) in conn.execute(query1, (sitename,)): | |||||
urls |= self._load_source(site, source) | |||||
for (url,) in conn.execute(query2, (sitename,)): | |||||
if url in urls: | |||||
urls.remove(url) | |||||
else: | |||||
conn.execute(query3, (sitename, url)) | |||||
conn.executemany(query4, [(sitename, url) for url in urls]) | |||||
if conn.execute(query5, (sitename,)).fetchone(): | |||||
conn.execute(query6, (int(time()), sitename)) | |||||
else: | |||||
conn.execute(query7, (sitename, int(time()))) | |||||
def _get_last_update(self, sitename): | |||||
"""Return the UNIX timestamp of the last time the db was updated.""" | |||||
query = "SELECT update_time FROM updates WHERE update_sitename = ?;" | |||||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||||
try: | |||||
result = conn.execute(query, (sitename,)).fetchone() | |||||
except sqlite.OperationalError: | |||||
self._create() | |||||
return 0 | |||||
return result[0] if result else 0 | |||||
def sync(self, sitename): | |||||
"""Update the database if it hasn't been updated in the past week. | |||||
This only updates the exclusions database for the *sitename* site. | |||||
""" | |||||
max_staleness = 60 * 60 * 24 * 7 | |||||
time_since_update = int(time() - self._get_last_update(sitename)) | |||||
if time_since_update > max_staleness: | |||||
log = u"Updating stale database: {0} (last updated {1} seconds ago)" | |||||
self._logger.info(log.format(sitename, time_since_update)) | |||||
self._update(sitename) | |||||
else: | |||||
log = u"Database for {0} is still fresh (last updated {1} seconds ago)" | |||||
self._logger.debug(log.format(sitename, time_since_update)) | |||||
def check(self, sitename, url): | |||||
"""Check whether a given URL is in the exclusions database. | |||||
Return ``True`` if the URL is in the database, or ``False`` otherwise. | |||||
""" | |||||
normalized = re.sub("https?://", "", url.lower()) | |||||
query = "SELECT exclusion_url FROM exclusions WHERE exclusion_sitename = ?" | |||||
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: | |||||
for (excl,) in conn.execute(query, (sitename,)): | |||||
if excl.startswith("*."): | |||||
netloc = urlparse(url.lower()).netloc | |||||
matches = True if excl[2:] in netloc else False | |||||
else: | |||||
matches = True if normalized.startswith(excl) else False | |||||
if matches: | |||||
log = u"Exclusion detected in {0} for {1}" | |||||
self._logger.debug(log.format(sitename, url)) | |||||
return True | |||||
log = u"No exclusions in {0} for {1}".format(sitename, url) | |||||
self._logger.debug(log) | |||||
return False |
@@ -0,0 +1,87 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from collections import defaultdict | |||||
from re import sub, UNICODE | |||||
__all__ = ["MarkovChain", "MarkovChainIntersection"] | |||||
class MarkovChain(object): | |||||
"""Implements a basic ngram Markov chain of words.""" | |||||
START = -1 | |||||
END = -2 | |||||
degree = 3 # 2 for bigrams, 3 for trigrams, etc. | |||||
def __init__(self, text): | |||||
self.text = text | |||||
self.chain = defaultdict(lambda: defaultdict(lambda: 0)) | |||||
words = sub("[^\w\s-]", "", text.lower(), flags=UNICODE).split() | |||||
padding = self.degree - 1 | |||||
words = ([self.START] * padding) + words + ([self.END] * padding) | |||||
for i in range(len(words) - self.degree + 1): | |||||
last = i + self.degree - 1 | |||||
self.chain[tuple(words[i:last])][words[last]] += 1 | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the MarkovChain.""" | |||||
return "MarkovChain(text={0!r})".format(self.text) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the MarkovChain.""" | |||||
return "<MarkovChain of size {0}>".format(self.size()) | |||||
def size(self): | |||||
"""Return the size of the Markov chain: the total number of nodes.""" | |||||
count = 0 | |||||
for node in self.chain.itervalues(): | |||||
for hits in node.itervalues(): | |||||
count += hits | |||||
return count | |||||
class MarkovChainIntersection(MarkovChain): | |||||
"""Implements the intersection of two chains (i.e., their shared nodes).""" | |||||
def __init__(self, mc1, mc2): | |||||
self.chain = defaultdict(lambda: defaultdict(lambda: 0)) | |||||
self.mc1, self.mc2 = mc1, mc2 | |||||
c1 = mc1.chain | |||||
c2 = mc2.chain | |||||
for word, nodes1 in c1.iteritems(): | |||||
if word in c2: | |||||
nodes2 = c2[word] | |||||
for node, count1 in nodes1.iteritems(): | |||||
if node in nodes2: | |||||
count2 = nodes2[node] | |||||
self.chain[word][node] = min(count1, count2) | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the intersection.""" | |||||
res = "MarkovChainIntersection(mc1={0!r}, mc2={1!r})" | |||||
return res.format(self.mc1, self.mc2) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the intersection.""" | |||||
res = "<MarkovChainIntersection of size {0} ({1} ^ {2})>" | |||||
return res.format(self.size(), self.mc1, self.mc2) |
@@ -0,0 +1,138 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from os import path | |||||
import bs4 | |||||
import mwparserfromhell | |||||
import nltk | |||||
__all__ = ["BaseTextParser", "ArticleTextParser", "HTMLTextParser"] | |||||
class BaseTextParser(object): | |||||
"""Base class for a parser that handles text.""" | |||||
def __init__(self, text): | |||||
self.text = text | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the text parser.""" | |||||
return "{0}(text={1!r})".format(self.__class__.__name__, self.text) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the text parser.""" | |||||
name = self.__class__.__name__ | |||||
return "<{0} of text with size {1}>".format(name, len(self.text)) | |||||
class ArticleTextParser(BaseTextParser): | |||||
"""A parser that can strip and chunk wikicode article text.""" | |||||
def strip(self): | |||||
"""Clean the page's raw text by removing templates and formatting. | |||||
Return the page's text with all HTML and wikicode formatting removed, | |||||
including templates, tables, and references. It retains punctuation | |||||
(spacing, paragraphs, periods, commas, (semi)-colons, parentheses, | |||||
quotes), original capitalization, and so forth. HTML entities are | |||||
replaced by their unicode equivalents. | |||||
The actual stripping is handled by :py:mod:`mwparserfromhell`. | |||||
""" | |||||
wikicode = mwparserfromhell.parse(self.text) | |||||
clean = wikicode.strip_code(normalize=True, collapse=True) | |||||
self.clean = clean.replace("\n\n", "\n") # Collapse extra newlines | |||||
return self.clean | |||||
def chunk(self, nltk_dir, max_chunks, max_query=256): | |||||
"""Convert the clean article text into a list of web-searchable chunks. | |||||
No greater than *max_chunks* will be returned. Each chunk will only be | |||||
a sentence or two long at most (no more than *max_query*). The idea is | |||||
to return a sample of the article text rather than the whole, so we'll | |||||
pick and choose from parts of it, especially if the article is large | |||||
and *max_chunks* is low, so we don't end up just searching for just the | |||||
first paragraph. | |||||
This is implemented using :py:mod:`nltk` (http://nltk.org/). A base | |||||
directory (*nltk_dir*) is required to store nltk's punctuation | |||||
database. This is typically located in the bot's working directory. | |||||
""" | |||||
datafile = path.join(nltk_dir, "tokenizers", "punkt", "english.pickle") | |||||
try: | |||||
tokenizer = nltk.data.load("file:" + datafile) | |||||
except LookupError: | |||||
nltk.download("punkt", nltk_dir) | |||||
tokenizer = nltk.data.load("file:" + datafile) | |||||
sentences = [] | |||||
for sentence in tokenizer.tokenize(self.clean): | |||||
if len(sentence) > max_query: | |||||
words = sentence.split() | |||||
while len(" ".join(words)) > max_query: | |||||
words.pop() | |||||
sentence = " ".join(words) | |||||
sentences.append(sentence) | |||||
if max_chunks >= len(sentences): | |||||
return sentences | |||||
chunks = [] | |||||
while len(chunks) < max_chunks: | |||||
if len(chunks) % 5 == 0: | |||||
chunk = sentences.pop(0) # Pop from beginning | |||||
elif len(chunks) % 5 == 1: | |||||
chunk = sentences.pop() # Pop from end | |||||
elif len(chunks) % 5 == 2: | |||||
chunk = sentences.pop(len(sentences) / 2) # Pop from Q2 | |||||
elif len(chunks) % 5 == 3: | |||||
chunk = sentences.pop(len(sentences) / 4) # Pop from Q1 | |||||
else: | |||||
chunk = sentences.pop(3 * len(sentences) / 4) # Pop from Q3 | |||||
chunks.append(chunk) | |||||
return chunks | |||||
class HTMLTextParser(BaseTextParser): | |||||
"""A parser that can extract the text from an HTML document.""" | |||||
hidden_tags = [ | |||||
"script", "style" | |||||
] | |||||
def strip(self): | |||||
"""Return the actual text contained within an HTML document. | |||||
Implemented using :py:mod:`BeautifulSoup <bs4>` | |||||
(http://www.crummy.com/software/BeautifulSoup/). | |||||
""" | |||||
try: | |||||
soup = bs4.BeautifulSoup(self.text, "lxml").body | |||||
except ValueError: | |||||
soup = bs4.BeautifulSoup(self.text).body | |||||
is_comment = lambda text: isinstance(text, bs4.element.Comment) | |||||
[comment.extract() for comment in soup.find_all(text=is_comment)] | |||||
for tag in self.hidden_tags: | |||||
[element.extract() for element in soup.find_all(tag)] | |||||
return "\n".join(soup.stripped_strings) |
@@ -0,0 +1,60 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
__all__ = ["CopyvioCheckResult"] | |||||
class CopyvioCheckResult(object): | |||||
""" | |||||
**EarwigBot: Wiki Toolset: Copyvio Check Result** | |||||
A class holding information about the results of a copyvio check. | |||||
*Attributes:* | |||||
- :py:attr:`violation`: ``True`` if this is a violation, else ``False`` | |||||
- :py:attr:`confidence`: a float between 0 and 1 indicating accuracy | |||||
- :py:attr:`url`: the URL of the violated page | |||||
- :py:attr:`queries`: the number of queries used to reach a result | |||||
- :py:attr:`article_chain`: the MarkovChain of the article text | |||||
- :py:attr:`source_chain`: the MarkovChain of the violated page text | |||||
- :py:attr:`delta_chain`: the MarkovChainIntersection comparing the two | |||||
""" | |||||
def __init__(self, violation, confidence, url, queries, article, chains): | |||||
self.violation = violation | |||||
self.confidence = confidence | |||||
self.url = url | |||||
self.queries = queries | |||||
self.article_chain = article | |||||
self.source_chain = chains[0] | |||||
self.delta_chain = chains[1] | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the result.""" | |||||
res = "CopyvioCheckResult(violation={0!r}, confidence={1!r}, url={2!r}, queries={3|r})" | |||||
return res.format(self.violation, self.confidence, self.url, | |||||
self.queries) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the result.""" | |||||
res = "<CopyvioCheckResult ({0} with {1} conf)>" | |||||
return res.format(self.violation, self.confidence) |
@@ -0,0 +1,91 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from json import loads | |||||
from urllib import quote_plus, urlencode | |||||
import oauth2 as oauth | |||||
from earwigbot.exceptions import SearchQueryError | |||||
__all__ = ["BaseSearchEngine", "YahooBOSSSearchEngine"] | |||||
class BaseSearchEngine(object): | |||||
"""Base class for a simple search engine interface.""" | |||||
name = "Base" | |||||
def __init__(self, cred): | |||||
"""Store credentials *cred* for searching later on.""" | |||||
self.cred = cred | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the search engine.""" | |||||
return "{0}()".format(self.__class__.__name__) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the search engine.""" | |||||
return "<{0}>".format(self.__class__.__name__) | |||||
def search(self, query): | |||||
"""Use this engine to search for *query*. | |||||
Not implemented in this base class; overridden in subclasses. | |||||
""" | |||||
raise NotImplementedError() | |||||
class YahooBOSSSearchEngine(BaseSearchEngine): | |||||
"""A search engine interface with Yahoo! BOSS.""" | |||||
name = "Yahoo! BOSS" | |||||
def search(self, query): | |||||
"""Do a Yahoo! BOSS web search for *query*. | |||||
Returns a list of URLs, no more than fifty, ranked by relevance (as | |||||
determined by Yahoo). Raises | |||||
:py:exc:`~earwigbot.exceptions.SearchQueryError` on errors. | |||||
""" | |||||
base_url = "http://yboss.yahooapis.com/ysearch/web" | |||||
query = quote_plus(query.join('"', '"')) | |||||
params = {"q": query, "type": "html,text", "format": "json"} | |||||
url = "{0}?{1}".format(base_url, urlencode(params)) | |||||
consumer = oauth.Consumer(key=self.cred["key"], | |||||
secret=self.cred["secret"]) | |||||
client = oauth.Client(consumer) | |||||
headers, body = client.request(url, "GET") | |||||
if headers["status"] != "200": | |||||
e = "Yahoo! BOSS Error: got response code '{0}':\n{1}'" | |||||
raise SearchQueryError(e.format(headers["status"], body)) | |||||
try: | |||||
res = loads(body) | |||||
except ValueError: | |||||
e = "Yahoo! BOSS Error: JSON could not be decoded" | |||||
raise SearchQueryError(e) | |||||
try: | |||||
results = res["bossresponse"]["web"]["results"] | |||||
except KeyError: | |||||
return [] | |||||
return [result["url"] for result in results] |
@@ -0,0 +1,787 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from hashlib import md5 | |||||
from logging import getLogger, NullHandler | |||||
import re | |||||
from time import gmtime, strftime | |||||
from urllib import quote | |||||
import mwparserfromhell | |||||
from earwigbot import exceptions | |||||
from earwigbot.wiki.copyvios import CopyvioMixIn | |||||
__all__ = ["Page"] | |||||
class Page(CopyvioMixIn): | |||||
""" | |||||
**EarwigBot: Wiki Toolset: Page** | |||||
Represents a page on a given :py:class:`~earwigbot.wiki.site.Site`. Has | |||||
methods for getting information about the page, getting page content, and | |||||
so on. :py:class:`~earwigbot.wiki.category.Category` is a subclass of | |||||
:py:class:`Page` with additional methods. | |||||
*Attributes:* | |||||
- :py:attr:`site`: the page's corresponding Site object | |||||
- :py:attr:`title`: the page's title, or pagename | |||||
- :py:attr:`exists`: whether or not the page exists | |||||
- :py:attr:`pageid`: an integer ID representing the page | |||||
- :py:attr:`url`: the page's URL | |||||
- :py:attr:`namespace`: the page's namespace as an integer | |||||
- :py:attr:`protection`: the page's current protection status | |||||
- :py:attr:`is_talkpage`: ``True`` if this is a talkpage, else ``False`` | |||||
- :py:attr:`is_redirect`: ``True`` if this is a redirect, else ``False`` | |||||
*Public methods:* | |||||
- :py:meth:`reload`: forcibly reloads the page's attributes | |||||
- :py:meth:`toggle_talk`: returns a content page's talk page, or vice versa | |||||
- :py:meth:`get`: returns the page's content | |||||
- :py:meth:`get_redirect_target`: returns the page's destination if it is a | |||||
redirect | |||||
- :py:meth:`get_creator`: returns a User object representing the first | |||||
person to edit the page | |||||
- :py:meth:`parse`: parses the page content for templates, links, etc | |||||
- :py:meth:`edit`: replaces the page's content or creates a new page | |||||
- :py:meth:`add_section`: adds a new section at the bottom of the page | |||||
- :py:meth:`check_exclusion`: checks whether or not we are allowed to edit | |||||
the page, per ``{{bots}}``/``{{nobots}}`` | |||||
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_check`: | |||||
checks the page for copyright violations | |||||
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_compare`: | |||||
checks the page like :py:meth:`copyvio_check`, but against a specific URL | |||||
""" | |||||
PAGE_UNKNOWN = 0 | |||||
PAGE_INVALID = 1 | |||||
PAGE_MISSING = 2 | |||||
PAGE_EXISTS = 3 | |||||
def __init__(self, site, title, follow_redirects=False, pageid=None, | |||||
logger=None): | |||||
"""Constructor for new Page instances. | |||||
Takes four arguments: a Site object, the Page's title (or pagename), | |||||
whether or not to follow redirects (optional, defaults to False), and | |||||
a page ID to supplement the title (optional, defaults to None - i.e., | |||||
we will have to query the API to get it). | |||||
As with User, site.get_page() is preferred. | |||||
__init__() will not do any API queries, but it will use basic namespace | |||||
logic to determine our namespace ID and if we are a talkpage. | |||||
""" | |||||
super(Page, self).__init__(site) | |||||
self._site = site | |||||
self._title = title.strip() | |||||
self._follow_redirects = self._keep_following = follow_redirects | |||||
self._pageid = pageid | |||||
# Set up our internal logger: | |||||
if logger: | |||||
self._logger = logger | |||||
else: # Just set up a null logger to eat up our messages: | |||||
self._logger = getLogger("earwigbot.wiki") | |||||
self._logger.addHandler(NullHandler()) | |||||
# Attributes to be loaded through the API: | |||||
self._exists = self.PAGE_UNKNOWN | |||||
self._is_redirect = None | |||||
self._lastrevid = None | |||||
self._protection = None | |||||
self._fullurl = None | |||||
self._content = None | |||||
self._creator = None | |||||
# Attributes used for editing/deleting/protecting/etc: | |||||
self._token = None | |||||
self._basetimestamp = None | |||||
self._starttimestamp = None | |||||
# Try to determine the page's namespace using our site's namespace | |||||
# converter: | |||||
prefix = self._title.split(":", 1)[0] | |||||
if prefix != title: # ignore a page that's titled "Category" or "User" | |||||
try: | |||||
self._namespace = self.site.namespace_name_to_id(prefix) | |||||
except exceptions.NamespaceNotFoundError: | |||||
self._namespace = 0 | |||||
else: | |||||
self._namespace = 0 | |||||
# Is this a talkpage? Talkpages have odd IDs, while content pages have | |||||
# even IDs, excluding the "special" namespaces: | |||||
if self._namespace < 0: | |||||
self._is_talkpage = False | |||||
else: | |||||
self._is_talkpage = self._namespace % 2 == 1 | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the Page.""" | |||||
res = "Page(title={0!r}, follow_redirects={1!r}, site={2!r})" | |||||
return res.format(self._title, self._follow_redirects, self._site) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the Page.""" | |||||
return '<Page "{0}" of {1}>'.format(self.title, str(self.site)) | |||||
def _assert_validity(self): | |||||
"""Used to ensure that our page's title is valid. | |||||
If this method is called when our page is not valid (and after | |||||
_load_attributes() has been called), InvalidPageError will be raised. | |||||
Note that validity != existence. If a page's title is invalid (e.g, it | |||||
contains "[") it will always be invalid, and cannot be edited. | |||||
""" | |||||
if self._exists == self.PAGE_INVALID: | |||||
e = u"Page '{0}' is invalid.".format(self._title) | |||||
raise exceptions.InvalidPageError(e) | |||||
def _assert_existence(self): | |||||
"""Used to ensure that our page exists. | |||||
If this method is called when our page doesn't exist (and after | |||||
_load_attributes() has been called), PageNotFoundError will be raised. | |||||
It will also call _assert_validity() beforehand. | |||||
""" | |||||
self._assert_validity() | |||||
if self._exists == self.PAGE_MISSING: | |||||
e = u"Page '{0}' does not exist.".format(self._title) | |||||
raise exceptions.PageNotFoundError(e) | |||||
def _load(self): | |||||
"""Call _load_attributes() and follows redirects if we're supposed to. | |||||
This method will only follow redirects if follow_redirects=True was | |||||
passed to __init__() (perhaps indirectly passed by site.get_page()). | |||||
It avoids the API's &redirects param in favor of manual following, | |||||
so we can act more realistically (we don't follow double redirects, and | |||||
circular redirects don't break us). | |||||
This will raise RedirectError if we have a problem following, but that | |||||
is a bug and should NOT happen. | |||||
If we're following a redirect, this will make a grand total of three | |||||
API queries. It's a lot, but each one is quite small. | |||||
""" | |||||
self._load_attributes() | |||||
if self._keep_following and self._is_redirect: | |||||
self._title = self.get_redirect_target() | |||||
self._keep_following = False # don't follow double redirects | |||||
self._content = None # reset the content we just loaded | |||||
self._load_attributes() | |||||
def _load_attributes(self, result=None): | |||||
"""Load various data from the API in a single query. | |||||
Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl, | |||||
._protection, ._namespace, ._is_talkpage, ._creator, ._lastrevid, | |||||
._token, and ._starttimestamp using the API. It will do a query of | |||||
its own unless *result* is provided, in which case we'll pretend | |||||
*result* is what the query returned. | |||||
Assuming the API is sound, this should not raise any exceptions. | |||||
""" | |||||
if not result: | |||||
query = self.site.api_query | |||||
result = query(action="query", rvprop="user", intoken="edit", | |||||
prop="info|revisions", rvlimit=1, rvdir="newer", | |||||
titles=self._title, inprop="protection|url") | |||||
res = result["query"]["pages"].values()[0] | |||||
self._title = res["title"] # Normalize our pagename/title | |||||
self._is_redirect = "redirect" in res | |||||
self._pageid = int(result["query"]["pages"].keys()[0]) | |||||
if self._pageid < 0: | |||||
if "missing" in res: | |||||
# If it has a negative ID and it's missing; we can still get | |||||
# data like the namespace, protection, and URL: | |||||
self._exists = self.PAGE_MISSING | |||||
else: | |||||
# If it has a negative ID and it's invalid, then break here, | |||||
# because there's no other data for us to get: | |||||
self._exists = self.PAGE_INVALID | |||||
return | |||||
else: | |||||
self._exists = self.PAGE_EXISTS | |||||
self._fullurl = res["fullurl"] | |||||
self._protection = res["protection"] | |||||
try: | |||||
self._token = res["edittoken"] | |||||
except KeyError: | |||||
pass | |||||
else: | |||||
self._starttimestamp = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()) | |||||
# We've determined the namespace and talkpage status in __init__() | |||||
# based on the title, but now we can be sure: | |||||
self._namespace = res["ns"] | |||||
self._is_talkpage = self._namespace % 2 == 1 # talkpages have odd IDs | |||||
# These last two fields will only be specified if the page exists: | |||||
self._lastrevid = res.get("lastrevid") | |||||
try: | |||||
self._creator = res['revisions'][0]['user'] | |||||
except KeyError: | |||||
pass | |||||
def _load_content(self, result=None): | |||||
"""Load current page content from the API. | |||||
If *result* is provided, we'll pretend that is the result of an API | |||||
query and try to get content from that. Otherwise, we'll do an API | |||||
query on our own. | |||||
Don't call this directly, ever; use reload() followed by get() if you | |||||
want to force content reloading. | |||||
""" | |||||
if not result: | |||||
query = self.site.api_query | |||||
result = query(action="query", prop="revisions", rvlimit=1, | |||||
rvprop="content|timestamp", titles=self._title) | |||||
res = result["query"]["pages"].values()[0] | |||||
try: | |||||
self._content = res["revisions"][0]["*"] | |||||
self._basetimestamp = res["revisions"][0]["timestamp"] | |||||
except KeyError: | |||||
# This can only happen if the page was deleted since we last called | |||||
# self._load_attributes(). In that case, some of our attributes are | |||||
# outdated, so force another self._load_attributes(): | |||||
self._load_attributes() | |||||
self._assert_existence() | |||||
def _edit(self, params=None, text=None, summary=None, minor=None, bot=None, | |||||
force=None, section=None, captcha_id=None, captcha_word=None, | |||||
tries=0): | |||||
"""Edit the page! | |||||
If *params* is given, we'll use it as our API query parameters. | |||||
Otherwise, we'll build params using the given kwargs via | |||||
_build_edit_params(). | |||||
We'll then try to do the API query, and catch any errors the API raises | |||||
in _handle_edit_errors(). We'll then throw these back as subclasses of | |||||
EditError. | |||||
""" | |||||
# Try to get our edit token, and die if we can't: | |||||
if not self._token: | |||||
self._load_attributes() | |||||
if not self._token: | |||||
e = "You don't have permission to edit this page." | |||||
raise exceptions.PermissionsError(e) | |||||
# Weed out invalid pages before we get too far: | |||||
self._assert_validity() | |||||
# Build our API query string: | |||||
if not params: | |||||
params = self._build_edit_params(text, summary, minor, bot, force, | |||||
section, captcha_id, captcha_word) | |||||
else: # Make sure we have the right token: | |||||
params["token"] = self._token | |||||
# Try the API query, catching most errors with our handler: | |||||
try: | |||||
result = self.site.api_query(**params) | |||||
except exceptions.APIError as error: | |||||
if not hasattr(error, "code"): | |||||
raise # We can only handle errors with a code attribute | |||||
result = self._handle_edit_errors(error, params, tries) | |||||
# If everything was successful, reset invalidated attributes: | |||||
if result["edit"]["result"] == "Success": | |||||
self._content = None | |||||
self._basetimestamp = None | |||||
self._exists = self.PAGE_UNKNOWN | |||||
return | |||||
# If we're here, then the edit failed. If it's because of AssertEdit, | |||||
# handle that. Otherwise, die - something odd is going on: | |||||
try: | |||||
assertion = result["edit"]["assert"] | |||||
except KeyError: | |||||
raise exceptions.EditError(result["edit"]) | |||||
self._handle_assert_edit(assertion, params, tries) | |||||
def _build_edit_params(self, text, summary, minor, bot, force, section, | |||||
captcha_id, captcha_word): | |||||
"""Given some keyword arguments, build an API edit query string.""" | |||||
unitxt = text.encode("utf8") if isinstance(text, unicode) else text | |||||
hashed = md5(unitxt).hexdigest() # Checksum to ensure text is correct | |||||
params = {"action": "edit", "title": self._title, "text": text, | |||||
"token": self._token, "summary": summary, "md5": hashed} | |||||
if section: | |||||
params["section"] = section | |||||
if captcha_id and captcha_word: | |||||
params["captchaid"] = captcha_id | |||||
params["captchaword"] = captcha_word | |||||
if minor: | |||||
params["minor"] = "true" | |||||
else: | |||||
params["notminor"] = "true" | |||||
if bot: | |||||
params["bot"] = "true" | |||||
if not force: | |||||
params["starttimestamp"] = self._starttimestamp | |||||
if self._basetimestamp: | |||||
params["basetimestamp"] = self._basetimestamp | |||||
if self._exists == self.PAGE_MISSING: | |||||
# Page does not exist; don't edit if it already exists: | |||||
params["createonly"] = "true" | |||||
else: | |||||
params["recreate"] = "true" | |||||
return params | |||||
def _handle_edit_errors(self, error, params, tries): | |||||
"""If our edit fails due to some error, try to handle it. | |||||
We'll either raise an appropriate exception (for example, if the page | |||||
is protected), or we'll try to fix it (for example, if we can't edit | |||||
due to being logged out, we'll try to log in). | |||||
""" | |||||
if error.code in ["noedit", "cantcreate", "protectedtitle", | |||||
"noimageredirect"]: | |||||
raise exceptions.PermissionsError(error.info) | |||||
elif error.code in ["noedit-anon", "cantcreate-anon", | |||||
"noimageredirect-anon"]: | |||||
if not all(self.site._login_info): | |||||
# Insufficient login info: | |||||
raise exceptions.PermissionsError(error.info) | |||||
if tries == 0: | |||||
# We have login info; try to login: | |||||
self.site._login(self.site._login_info) | |||||
self._token = None # Need a new token; old one is invalid now | |||||
return self._edit(params=params, tries=1) | |||||
else: | |||||
# We already tried to log in and failed! | |||||
e = "Although we should be logged in, we are not. This may be a cookie problem or an odd bug." | |||||
raise exceptions.LoginError(e) | |||||
elif error.code in ["editconflict", "pagedeleted", "articleexists"]: | |||||
# These attributes are now invalidated: | |||||
self._content = None | |||||
self._basetimestamp = None | |||||
self._exists = self.PAGE_UNKNOWN | |||||
raise exceptions.EditConflictError(error.info) | |||||
elif error.code in ["emptypage", "emptynewsection"]: | |||||
raise exceptions.NoContentError(error.info) | |||||
elif error.code == "contenttoobig": | |||||
raise exceptions.ContentTooBigError(error.info) | |||||
elif error.code == "spamdetected": | |||||
raise exceptions.SpamDetectedError(error.info) | |||||
elif error.code == "filtered": | |||||
raise exceptions.FilteredError(error.info) | |||||
raise exceptions.EditError(": ".join((error.code, error.info))) | |||||
def _handle_assert_edit(self, assertion, params, tries): | |||||
"""If we can't edit due to a failed AssertEdit assertion, handle that. | |||||
If the assertion was 'user' and we have valid login information, try to | |||||
log in. Otherwise, raise PermissionsError with details. | |||||
""" | |||||
if assertion == "user": | |||||
if not all(self.site._login_info): | |||||
# Insufficient login info: | |||||
e = "AssertEdit: user assertion failed, and no login info was provided." | |||||
raise exceptions.PermissionsError(e) | |||||
if tries == 0: | |||||
# We have login info; try to login: | |||||
self.site._login(self.site._login_info) | |||||
self._token = None # Need a new token; old one is invalid now | |||||
return self._edit(params=params, tries=1) | |||||
else: | |||||
# We already tried to log in and failed! | |||||
e = "Although we should be logged in, we are not. This may be a cookie problem or an odd bug." | |||||
raise exceptions.LoginError(e) | |||||
elif assertion == "bot": | |||||
if not all(self.site._login_info): | |||||
# Insufficient login info: | |||||
e = "AssertEdit: bot assertion failed, and no login info was provided." | |||||
raise exceptions.PermissionsError(e) | |||||
if tries == 0: | |||||
# Try to log in if we got logged out: | |||||
self.site._login(self.site._login_info) | |||||
self._token = None # Need a new token; old one is invalid now | |||||
return self._edit(params=params, tries=1) | |||||
else: | |||||
# We already tried to log in, so we don't have a bot flag: | |||||
e = "AssertEdit: bot assertion failed: we don't have a bot flag!" | |||||
raise exceptions.PermissionsError(e) | |||||
# Unknown assertion, maybe "true", "false", or "exists": | |||||
e = "AssertEdit: assertion '{0}' failed.".format(assertion) | |||||
raise exceptions.PermissionsError(e) | |||||
@property | |||||
def site(self): | |||||
"""The page's corresponding Site object.""" | |||||
return self._site | |||||
@property | |||||
def title(self): | |||||
"""The page's title, or "pagename". | |||||
This won't do any API queries on its own. Any other attributes or | |||||
methods that do API queries will reload the title, however, like | |||||
:py:attr:`exists` and :py:meth:`get`, potentially "normalizing" it or | |||||
following redirects if :py:attr:`self._follow_redirects` is ``True``. | |||||
""" | |||||
return self._title | |||||
@property | |||||
def exists(self): | |||||
"""Whether or not the page exists. | |||||
This will be a number; its value does not matter, but it will equal | |||||
one of :py:attr:`self.PAGE_INVALID <PAGE_INVALID>`, | |||||
:py:attr:`self.PAGE_MISSING <PAGE_MISSING>`, or | |||||
:py:attr:`self.PAGE_EXISTS <PAGE_EXISTS>`. | |||||
Makes an API query only if we haven't already made one. | |||||
""" | |||||
if self._exists == self.PAGE_UNKNOWN: | |||||
self._load() | |||||
return self._exists | |||||
@property | |||||
def pageid(self): | |||||
"""An integer ID representing the page. | |||||
Makes an API query only if we haven't already made one and the *pageid* | |||||
parameter to :py:meth:`__init__` was left as ``None``, which should be | |||||
true for all cases except when pages are returned by an SQL generator | |||||
(like :py:meth:`category.get_members() | |||||
<earwigbot.wiki.category.Category.get_members>`). | |||||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | |||||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||||
invalid or the page does not exist, respectively. | |||||
""" | |||||
if self._pageid: | |||||
return self._pageid | |||||
if self._exists == self.PAGE_UNKNOWN: | |||||
self._load() | |||||
self._assert_existence() # Missing pages do not have IDs | |||||
return self._pageid | |||||
@property | |||||
def url(self): | |||||
"""The page's URL. | |||||
Like :py:meth:`title`, this won't do any API queries on its own. If the | |||||
API was never queried for this page, we will attempt to determine the | |||||
URL ourselves based on the title. | |||||
""" | |||||
if self._fullurl: | |||||
return self._fullurl | |||||
else: | |||||
encoded = self._title.encode("utf8").replace(" ", "_") | |||||
slug = quote(encoded, safe="/:").decode("utf8") | |||||
path = self.site._article_path.replace("$1", slug) | |||||
return u"".join((self.site.url, path)) | |||||
@property | |||||
def namespace(self): | |||||
"""The page's namespace ID (an integer). | |||||
Like :py:meth:`title`, this won't do any API queries on its own. If the | |||||
API was never queried for this page, we will attempt to determine the | |||||
namespace ourselves based on the title. | |||||
""" | |||||
return self._namespace | |||||
@property | |||||
def protection(self): | |||||
"""The page's current protection status. | |||||
Makes an API query only if we haven't already made one. | |||||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` if the page | |||||
name is invalid. Won't raise an error if the page is missing because | |||||
those can still be create-protected. | |||||
""" | |||||
if self._exists == self.PAGE_UNKNOWN: | |||||
self._load() | |||||
self._assert_validity() # Invalid pages cannot be protected | |||||
return self._protection | |||||
@property | |||||
def is_talkpage(self): | |||||
"""``True`` if the page is a talkpage, otherwise ``False``. | |||||
Like :py:meth:`title`, this won't do any API queries on its own. If the | |||||
API was never queried for this page, we will attempt to determine | |||||
whether it is a talkpage ourselves based on its namespace. | |||||
""" | |||||
return self._is_talkpage | |||||
@property | |||||
def is_redirect(self): | |||||
"""``True`` if the page is a redirect, otherwise ``False``. | |||||
Makes an API query only if we haven't already made one. | |||||
We will return ``False`` even if the page does not exist or is invalid. | |||||
""" | |||||
if self._exists == self.PAGE_UNKNOWN: | |||||
self._load() | |||||
return self._is_redirect | |||||
def reload(self): | |||||
"""Forcibly reload the page's attributes. | |||||
Emphasis on *reload*: this is only necessary if there is reason to | |||||
believe they have changed. | |||||
""" | |||||
self._load() | |||||
if self._content is not None: | |||||
# Only reload content if it has already been loaded: | |||||
self._load_content() | |||||
def toggle_talk(self, follow_redirects=None): | |||||
"""Return a content page's talk page, or vice versa. | |||||
The title of the new page is determined by namespace logic, not API | |||||
queries. We won't make any API queries on our own. | |||||
If *follow_redirects* is anything other than ``None`` (the default), it | |||||
will be passed to the new :py:class:`~earwigbot.wiki.page.Page` | |||||
object's :py:meth:`__init__`. Otherwise, we'll use the value passed to | |||||
our own :py:meth:`__init__`. | |||||
Will raise :py:exc:`~earwigbot.exceptions.InvalidPageError` if we try | |||||
to get the talk page of a special page (in the ``Special:`` or | |||||
``Media:`` namespaces), but we won't raise an exception if our page is | |||||
otherwise missing or invalid. | |||||
""" | |||||
if self._namespace < 0: | |||||
ns = self.site.namespace_id_to_name(self._namespace) | |||||
e = u"Pages in the {0} namespace can't have talk pages.".format(ns) | |||||
raise exceptions.InvalidPageError(e) | |||||
if self._is_talkpage: | |||||
new_ns = self._namespace - 1 | |||||
else: | |||||
new_ns = self._namespace + 1 | |||||
try: | |||||
body = self._title.split(":", 1)[1] | |||||
except IndexError: | |||||
body = self._title | |||||
new_prefix = self.site.namespace_id_to_name(new_ns) | |||||
# If the new page is in namespace 0, don't do ":Title" (it's correct, | |||||
# but unnecessary), just do "Title": | |||||
if new_prefix: | |||||
new_title = u":".join((new_prefix, body)) | |||||
else: | |||||
new_title = body | |||||
if follow_redirects is None: | |||||
follow_redirects = self._follow_redirects | |||||
return Page(self.site, new_title, follow_redirects) | |||||
def get(self): | |||||
"""Return page content, which is cached if you try to call get again. | |||||
Raises InvalidPageError or PageNotFoundError if the page name is | |||||
invalid or the page does not exist, respectively. | |||||
""" | |||||
if self._exists == self.PAGE_UNKNOWN: | |||||
# Kill two birds with one stone by doing an API query for both our | |||||
# attributes and our page content: | |||||
query = self.site.api_query | |||||
result = query(action="query", rvlimit=1, titles=self._title, | |||||
prop="info|revisions", inprop="protection|url", | |||||
intoken="edit", rvprop="content|timestamp") | |||||
self._load_attributes(result=result) | |||||
self._assert_existence() | |||||
self._load_content(result=result) | |||||
# Follow redirects if we're told to: | |||||
if self._keep_following and self._is_redirect: | |||||
self._title = self.get_redirect_target() | |||||
self._keep_following = False # Don't follow double redirects | |||||
self._exists = self.PAGE_UNKNOWN # Force another API query | |||||
self.get() | |||||
return self._content | |||||
# Make sure we're dealing with a real page here. This may be outdated | |||||
# if the page was deleted since we last called self._load_attributes(), | |||||
# but self._load_content() can handle that: | |||||
self._assert_existence() | |||||
if self._content is None: | |||||
self._load_content() | |||||
return self._content | |||||
def get_redirect_target(self): | |||||
"""If the page is a redirect, return its destination. | |||||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | |||||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||||
invalid or the page does not exist, respectively. Raises | |||||
:py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a | |||||
redirect. | |||||
""" | |||||
re_redirect = "^\s*\#\s*redirect\s*\[\[(.*?)\]\]" | |||||
content = self.get() | |||||
try: | |||||
return re.findall(re_redirect, content, flags=re.I)[0] | |||||
except IndexError: | |||||
e = "The page does not appear to have a redirect target." | |||||
raise exceptions.RedirectError(e) | |||||
def get_creator(self): | |||||
"""Return the User object for the first person to edit the page. | |||||
Makes an API query only if we haven't already made one. Normally, we | |||||
can get the creator along with everything else (except content) in | |||||
:py:meth:`_load_attributes`. However, due to a limitation in the API | |||||
(can't get the editor of one revision and the content of another at | |||||
both ends of the history), if our other attributes were only loaded | |||||
through :py:meth:`get`, we'll have to do another API query. | |||||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | |||||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||||
invalid or the page does not exist, respectively. | |||||
""" | |||||
if self._exists == self.PAGE_UNKNOWN: | |||||
self._load() | |||||
self._assert_existence() | |||||
if not self._creator: | |||||
self._load() | |||||
self._assert_existence() | |||||
return self.site.get_user(self._creator) | |||||
def parse(self): | |||||
"""Parse the page content for templates, links, etc. | |||||
Actual parsing is handled by :py:mod:`mwparserfromhell`. Raises | |||||
:py:exc:`~earwigbot.exceptions.InvalidPageError` or | |||||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | |||||
invalid or the page does not exist, respectively. | |||||
""" | |||||
return mwparserfromhell.parse(self.get()) | |||||
def edit(self, text, summary, minor=False, bot=True, force=False): | |||||
"""Replace the page's content or creates a new page. | |||||
*text* is the new page content, with *summary* as the edit summary. | |||||
If *minor* is ``True``, the edit will be marked as minor. If *bot* is | |||||
``True``, the edit will be marked as a bot edit, but only if we | |||||
actually have a bot flag. | |||||
Use *force* to push the new content even if there's an edit conflict or | |||||
the page was deleted/recreated between getting our edit token and | |||||
editing our page. Be careful with this! | |||||
""" | |||||
self._edit(text=text, summary=summary, minor=minor, bot=bot, | |||||
force=force) | |||||
def add_section(self, text, title, minor=False, bot=True, force=False): | |||||
"""Add a new section to the bottom of the page. | |||||
The arguments for this are the same as those for :py:meth:`edit`, but | |||||
instead of providing a summary, you provide a section title. Likewise, | |||||
raised exceptions are the same as :py:meth:`edit`'s. | |||||
This should create the page if it does not already exist, with just the | |||||
new section as content. | |||||
""" | |||||
self._edit(text=text, summary=title, minor=minor, bot=bot, force=force, | |||||
section="new") | |||||
def check_exclusion(self, username=None, optouts=None): | |||||
"""Check whether or not we are allowed to edit the page. | |||||
Return ``True`` if we *are* allowed to edit this page, and ``False`` if | |||||
we aren't. | |||||
*username* is used to determine whether we are part of a specific list | |||||
of allowed or disallowed bots (e.g. ``{{bots|allow=EarwigBot}}`` or | |||||
``{{bots|deny=FooBot,EarwigBot}}``). It's ``None`` by default, which | |||||
will swipe our username from :py:meth:`site.get_user() | |||||
<earwigbot.wiki.site.Site.get_user>`.\ | |||||
:py:attr:`~earwigbot.wiki.user.User.name`. | |||||
*optouts* is a list of messages to consider this check as part of for | |||||
the purpose of opt-out; it defaults to ``None``, which ignores the | |||||
parameter completely. For example, if *optouts* is ``["nolicense"]``, | |||||
we'll return ``False`` on ``{{bots|optout=nolicense}}`` or | |||||
``{{bots|optout=all}}``, but `True` on | |||||
``{{bots|optout=orfud,norationale,replaceable}}``. | |||||
""" | |||||
def parse_param(template, param): | |||||
value = template.get(param).value | |||||
return [item.strip().lower() for item in value.split(",")] | |||||
if not username: | |||||
username = self.site.get_user().name | |||||
# Lowercase everything: | |||||
username = username.lower() | |||||
optouts = [optout.lower() for optout in optouts] if optouts else [] | |||||
r_bots = "\{\{\s*(no)?bots\s*(\||\}\})" | |||||
filter = self.parse().ifilter_templates(recursive=True, matches=r_bots) | |||||
for template in filter: | |||||
if template.has_param("deny"): | |||||
denies = parse_param(template, "deny") | |||||
if "all" in denies or username in denies: | |||||
return False | |||||
if template.has_param("allow"): | |||||
allows = parse_param(template, "allow") | |||||
if "all" in allows or username in allows: | |||||
continue | |||||
if optouts and template.has_param("optout"): | |||||
tasks = parse_param(template, "optout") | |||||
matches = [optout in tasks for optout in optouts] | |||||
if "all" in tasks or any(matches): | |||||
return False | |||||
if template.name.strip().lower() == "nobots": | |||||
return False | |||||
return True |
@@ -0,0 +1,849 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from cookielib import CookieJar | |||||
from gzip import GzipFile | |||||
from json import loads | |||||
from logging import getLogger, NullHandler | |||||
from os.path import expanduser | |||||
from StringIO import StringIO | |||||
from threading import Lock | |||||
from time import sleep, time | |||||
from urllib import quote_plus, unquote_plus | |||||
from urllib2 import build_opener, HTTPCookieProcessor, URLError | |||||
from urlparse import urlparse | |||||
import oursql | |||||
from earwigbot import exceptions | |||||
from earwigbot.wiki import constants | |||||
from earwigbot.wiki.category import Category | |||||
from earwigbot.wiki.page import Page | |||||
from earwigbot.wiki.user import User | |||||
__all__ = ["Site"] | |||||
class Site(object): | |||||
""" | |||||
**EarwigBot: Wiki Toolset: Site** | |||||
Represents a site, with support for API queries and returning | |||||
:py:class:`~earwigbot.wiki.page.Page`, | |||||
:py:class:`~earwigbot.wiki.user.User`, | |||||
and :py:class:`~earwigbot.wiki.category.Category` objects. The constructor | |||||
takes a bunch of arguments and you probably won't need to call it directly, | |||||
rather :py:meth:`wiki.get_site() <earwigbot.wiki.sitesdb.SitesDB.get_site>` | |||||
for returning :py:class:`Site` | |||||
instances, :py:meth:`wiki.add_site() | |||||
<earwigbot.wiki.sitesdb.SitesDB.add_site>` for adding new ones to our | |||||
database, and :py:meth:`wiki.remove_site() | |||||
<earwigbot.wiki.sitesdb.SitesDB.remove_site>` for removing old ones from | |||||
our database, should suffice. | |||||
*Attributes:* | |||||
- :py:attr:`name`: the site's name (or "wikiid"), like ``"enwiki"`` | |||||
- :py:attr:`project`: the site's project name, like ``"wikipedia"`` | |||||
- :py:attr:`lang`: the site's language code, like ``"en"`` | |||||
- :py:attr:`domain`: the site's web domain, like ``"en.wikipedia.org"`` | |||||
- :py:attr:`url`: the site's URL, like ``"https://en.wikipedia.org"`` | |||||
*Public methods:* | |||||
- :py:meth:`api_query`: does an API query with kwargs as params | |||||
- :py:meth:`sql_query`: does an SQL query and yields its results | |||||
- :py:meth:`get_maxlag`: returns the internal database lag | |||||
- :py:meth:`get_replag`: estimates the external database lag | |||||
- :py:meth:`namespace_id_to_name`: returns names associated with an NS id | |||||
- :py:meth:`namespace_name_to_id`: returns the ID associated with a NS name | |||||
- :py:meth:`get_page`: returns a Page for the given title | |||||
- :py:meth:`get_category`: returns a Category for the given title | |||||
- :py:meth:`get_user`: returns a User object for the given name | |||||
- :py:meth:`delegate`: controls when the API or SQL is used | |||||
""" | |||||
SERVICE_API = 1 | |||||
SERVICE_SQL = 2 | |||||
def __init__(self, name=None, project=None, lang=None, base_url=None, | |||||
article_path=None, script_path=None, sql=None, | |||||
namespaces=None, login=(None, None), cookiejar=None, | |||||
user_agent=None, use_https=False, assert_edit=None, | |||||
maxlag=None, wait_between_queries=2, logger=None, | |||||
search_config=None): | |||||
"""Constructor for new Site instances. | |||||
This probably isn't necessary to call yourself unless you're building a | |||||
Site that's not in your config and you don't want to add it - normally | |||||
all you need is wiki.get_site(name), which creates the Site for you | |||||
based on your config file and the sites database. We accept a bunch of | |||||
kwargs, but the only ones you really "need" are *base_url* and | |||||
*script_path*; this is enough to figure out an API url. *login*, a | |||||
tuple of (username, password), is highly recommended. *cookiejar* will | |||||
be used to store cookies, and we'll use a normal CookieJar if none is | |||||
given. | |||||
First, we'll store the given arguments as attributes, then set up our | |||||
URL opener. We'll load any of the attributes that weren't given from | |||||
the API, and then log in if a username/pass was given and we aren't | |||||
already logged in. | |||||
""" | |||||
# Attributes referring to site information, filled in by an API query | |||||
# if they are missing (and an API url can be determined): | |||||
self._name = name | |||||
self._project = project | |||||
self._lang = lang | |||||
self._base_url = base_url | |||||
self._article_path = article_path | |||||
self._script_path = script_path | |||||
self._namespaces = namespaces | |||||
# Attributes used for API queries: | |||||
self._use_https = use_https | |||||
self._assert_edit = assert_edit | |||||
self._maxlag = maxlag | |||||
self._wait_between_queries = wait_between_queries | |||||
self._max_retries = 6 | |||||
self._last_query_time = 0 | |||||
self._api_lock = Lock() | |||||
self._api_info_cache = {"maxlag": 0, "lastcheck": 0} | |||||
# Attributes used for SQL queries: | |||||
if sql: | |||||
self._sql_data = sql | |||||
else: | |||||
self._sql_data = {} | |||||
self._sql_conn = None | |||||
self._sql_lock = Lock() | |||||
self._sql_info_cache = {"replag": 0, "lastcheck": 0, "usable": None} | |||||
# Attribute used in copyright violation checks (see CopyrightMixIn): | |||||
if search_config: | |||||
self._search_config = search_config | |||||
else: | |||||
self._search_config = {} | |||||
# Set up cookiejar and URL opener for making API queries: | |||||
if cookiejar is not None: | |||||
self._cookiejar = cookiejar | |||||
else: | |||||
self._cookiejar = CookieJar() | |||||
if not user_agent: | |||||
user_agent = constants.USER_AGENT # Set default UA | |||||
self._opener = build_opener(HTTPCookieProcessor(self._cookiejar)) | |||||
self._opener.addheaders = [("User-Agent", user_agent), | |||||
("Accept-Encoding", "gzip")] | |||||
# Set up our internal logger: | |||||
if logger: | |||||
self._logger = logger | |||||
else: # Just set up a null logger to eat up our messages: | |||||
self._logger = getLogger("earwigbot.wiki") | |||||
self._logger.addHandler(NullHandler()) | |||||
# Get all of the above attributes that were not specified as arguments: | |||||
self._load_attributes() | |||||
# If we have a name/pass and the API says we're not logged in, log in: | |||||
self._login_info = name, password = login | |||||
if name and password: | |||||
logged_in_as = self._get_username_from_cookies() | |||||
if not logged_in_as or name.replace("_", " ") != logged_in_as: | |||||
self._login(login) | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the Site.""" | |||||
res = ", ".join(( | |||||
"Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", | |||||
"base_url={_base_url!r}", "article_path={_article_path!r}", | |||||
"script_path={_script_path!r}", "use_https={_use_https!r}", | |||||
"assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}", | |||||
"sql={_sql_data!r}", "login={0}", "user_agent={2!r}", | |||||
"cookiejar={1})")) | |||||
name, password = self._login_info | |||||
login = "({0}, {1})".format(repr(name), "hidden" if password else None) | |||||
cookies = self._cookiejar.__class__.__name__ | |||||
if hasattr(self._cookiejar, "filename"): | |||||
cookies += "({0!r})".format(getattr(self._cookiejar, "filename")) | |||||
else: | |||||
cookies += "()" | |||||
agent = self._opener.addheaders[0][1] | |||||
return res.format(login, cookies, agent, **self.__dict__) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the Site.""" | |||||
res = "<Site {0} ({1}:{2}) at {3}>" | |||||
return res.format(self.name, self.project, self.lang, self.domain) | |||||
def _unicodeify(self, value, encoding="utf8"): | |||||
"""Return input as unicode if it's not unicode to begin with.""" | |||||
if isinstance(value, unicode): | |||||
return value | |||||
return unicode(value, encoding) | |||||
def _urlencode_utf8(self, params): | |||||
"""Implement urllib.urlencode() with support for unicode input.""" | |||||
enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s) | |||||
args = [] | |||||
for key, val in params.iteritems(): | |||||
key = quote_plus(enc(key)) | |||||
val = quote_plus(enc(val)) | |||||
args.append(key + "=" + val) | |||||
return "&".join(args) | |||||
def _api_query(self, params, tries=0, wait=5, ignore_maxlag=False): | |||||
"""Do an API query with *params* as a dict of parameters. | |||||
See the documentation for :py:meth:`api_query` for full implementation | |||||
details. | |||||
""" | |||||
since_last_query = time() - self._last_query_time # Throttling support | |||||
if since_last_query < self._wait_between_queries: | |||||
wait_time = self._wait_between_queries - since_last_query | |||||
log = "Throttled: waiting {0} seconds".format(round(wait_time, 2)) | |||||
self._logger.debug(log) | |||||
sleep(wait_time) | |||||
self._last_query_time = time() | |||||
url, data = self._build_api_query(params, ignore_maxlag) | |||||
if "lgpassword" in params: | |||||
self._logger.debug("{0} -> <hidden>".format(url)) | |||||
else: | |||||
self._logger.debug("{0} -> {1}".format(url, data)) | |||||
try: | |||||
response = self._opener.open(url, data) | |||||
except URLError as error: | |||||
if hasattr(error, "reason"): | |||||
e = "API query failed: {0}.".format(error.reason) | |||||
elif hasattr(error, "code"): | |||||
e = "API query failed: got an error code of {0}." | |||||
e = e.format(error.code) | |||||
else: | |||||
e = "API query failed." | |||||
raise exceptions.APIError(e) | |||||
result = response.read() | |||||
if response.headers.get("Content-Encoding") == "gzip": | |||||
stream = StringIO(result) | |||||
gzipper = GzipFile(fileobj=stream) | |||||
result = gzipper.read() | |||||
return self._handle_api_query_result(result, params, tries, wait) | |||||
def _build_api_query(self, params, ignore_maxlag): | |||||
"""Given API query params, return the URL to query and POST data.""" | |||||
if not self._base_url or self._script_path is None: | |||||
e = "Tried to do an API query, but no API URL is known." | |||||
raise exceptions.APIError(e) | |||||
url = ''.join((self.url, self._script_path, "/api.php")) | |||||
params["format"] = "json" # This is the only format we understand | |||||
if self._assert_edit: # If requested, ensure that we're logged in | |||||
params["assert"] = self._assert_edit | |||||
if self._maxlag and not ignore_maxlag: | |||||
# If requested, don't overload the servers: | |||||
params["maxlag"] = self._maxlag | |||||
data = self._urlencode_utf8(params) | |||||
return url, data | |||||
def _handle_api_query_result(self, result, params, tries, wait): | |||||
"""Given the result of an API query, attempt to return useful data.""" | |||||
try: | |||||
res = loads(result) # Try to parse as a JSON object | |||||
except ValueError: | |||||
e = "API query failed: JSON could not be decoded." | |||||
raise exceptions.APIError(e) | |||||
try: | |||||
code = res["error"]["code"] | |||||
info = res["error"]["info"] | |||||
except (TypeError, KeyError): # Having these keys indicates a problem | |||||
return res # All is well; return the decoded JSON | |||||
if code == "maxlag": # We've been throttled by the server | |||||
if tries >= self._max_retries: | |||||
e = "Maximum number of retries reached ({0})." | |||||
raise exceptions.APIError(e.format(self._max_retries)) | |||||
tries += 1 | |||||
msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})' | |||||
self._logger.info(msg.format(info, wait, tries, self._max_retries)) | |||||
sleep(wait) | |||||
return self._api_query(params, tries=tries, wait=wait*2) | |||||
else: # Some unknown error occurred | |||||
e = 'API query failed: got error "{0}"; server says: "{1}".' | |||||
error = exceptions.APIError(e.format(code, info)) | |||||
error.code, error.info = code, info | |||||
raise error | |||||
def _load_attributes(self, force=False): | |||||
"""Load data about our Site from the API. | |||||
This function is called by __init__() when one of the site attributes | |||||
was not given as a keyword argument. We'll do an API query to get the | |||||
missing data, but only if there actually *is* missing data. | |||||
Additionally, you can call this with *force* set to True to forcibly | |||||
reload all attributes. | |||||
""" | |||||
# All attributes to be loaded, except _namespaces, which is a special | |||||
# case because it requires additional params in the API query: | |||||
attrs = [self._name, self._project, self._lang, self._base_url, | |||||
self._article_path, self._script_path] | |||||
params = {"action": "query", "meta": "siteinfo", "siprop": "general"} | |||||
if not self._namespaces or force: | |||||
params["siprop"] += "|namespaces|namespacealiases" | |||||
result = self.api_query(**params) | |||||
self._load_namespaces(result) | |||||
elif all(attrs): # Everything is already specified and we're not told | |||||
return # to force a reload, so do nothing | |||||
else: # We're only loading attributes other than _namespaces | |||||
result = self.api_query(**params) | |||||
res = result["query"]["general"] | |||||
self._name = res["wikiid"] | |||||
self._project = res["sitename"].lower() | |||||
self._lang = res["lang"] | |||||
self._base_url = res["server"] | |||||
self._article_path = res["articlepath"] | |||||
self._script_path = res["scriptpath"] | |||||
def _load_namespaces(self, result): | |||||
"""Fill self._namespaces with a dict of namespace IDs and names. | |||||
Called by _load_attributes() with API data as *result* when | |||||
self._namespaces was not given as an kwarg to __init__(). | |||||
""" | |||||
self._namespaces = {} | |||||
for namespace in result["query"]["namespaces"].values(): | |||||
ns_id = namespace["id"] | |||||
name = namespace["*"] | |||||
try: | |||||
canonical = namespace["canonical"] | |||||
except KeyError: | |||||
self._namespaces[ns_id] = [name] | |||||
else: | |||||
if name != canonical: | |||||
self._namespaces[ns_id] = [name, canonical] | |||||
else: | |||||
self._namespaces[ns_id] = [name] | |||||
for namespace in result["query"]["namespacealiases"]: | |||||
ns_id = namespace["id"] | |||||
alias = namespace["*"] | |||||
self._namespaces[ns_id].append(alias) | |||||
def _get_cookie(self, name, domain): | |||||
"""Return the named cookie unless it is expired or doesn't exist.""" | |||||
for cookie in self._cookiejar: | |||||
if cookie.name == name and cookie.domain == domain: | |||||
if cookie.is_expired(): | |||||
break | |||||
return cookie | |||||
def _get_username_from_cookies(self): | |||||
"""Try to return our username based solely on cookies. | |||||
First, we'll look for a cookie named self._name + "Token", like | |||||
"enwikiToken". If it exists and isn't expired, we'll assume it's valid | |||||
and try to return the value of the cookie self._name + "UserName" (like | |||||
"enwikiUserName"). This should work fine on wikis without single-user | |||||
login. | |||||
If `enwikiToken` doesn't exist, we'll try to find a cookie named | |||||
`centralauth_Token`. If this exists and is not expired, we'll try to | |||||
return the value of `centralauth_User`. | |||||
If we didn't get any matches, we'll return None. Our goal here isn't to | |||||
return the most likely username, or what we *want* our username to be | |||||
(for that, we'd do self._login_info[0]), but rather to get our current | |||||
username without an unnecessary ?action=query&meta=userinfo API query. | |||||
""" | |||||
name = ''.join((self._name, "Token")) | |||||
cookie = self._get_cookie(name, self.domain) | |||||
if cookie: | |||||
name = ''.join((self._name, "UserName")) | |||||
user_name = self._get_cookie(name, self.domain) | |||||
if user_name: | |||||
return unquote_plus(user_name.value) | |||||
for cookie in self._cookiejar: | |||||
if cookie.name != "centralauth_Token" or cookie.is_expired(): | |||||
continue | |||||
base = cookie.domain | |||||
if base.startswith(".") and not cookie.domain_initial_dot: | |||||
base = base[1:] | |||||
if self.domain.endswith(base): | |||||
user_name = self._get_cookie("centralauth_User", cookie.domain) | |||||
if user_name: | |||||
return unquote_plus(user_name.value) | |||||
def _get_username_from_api(self): | |||||
"""Do a simple API query to get our username and return it. | |||||
This is a reliable way to make sure we are actually logged in, because | |||||
it doesn't deal with annoying cookie logic, but it results in an API | |||||
query that is unnecessary in some cases. | |||||
Called by _get_username() (in turn called by get_user() with no | |||||
username argument) when cookie lookup fails, probably indicating that | |||||
we are logged out. | |||||
""" | |||||
result = self.api_query(action="query", meta="userinfo") | |||||
return result["query"]["userinfo"]["name"] | |||||
def _get_username(self): | |||||
"""Return the name of the current user, whether logged in or not. | |||||
First, we'll try to deduce it solely from cookies, to avoid an | |||||
unnecessary API query. For the cookie-detection method, see | |||||
_get_username_from_cookies()'s docs. | |||||
If our username isn't in cookies, then we're probably not logged in, or | |||||
something fishy is going on (like forced logout). In this case, do a | |||||
single API query for our username (or IP address) and return that. | |||||
""" | |||||
name = self._get_username_from_cookies() | |||||
if name: | |||||
return name | |||||
return self._get_username_from_api() | |||||
def _save_cookiejar(self): | |||||
"""Try to save our cookiejar after doing a (normal) login or logout. | |||||
Calls the standard .save() method with no filename. Don't fret if our | |||||
cookiejar doesn't support saving (CookieJar raises AttributeError, | |||||
FileCookieJar raises NotImplementedError) or no default filename was | |||||
given (LWPCookieJar and MozillaCookieJar raise ValueError). | |||||
""" | |||||
if hasattr(self._cookiejar, "save"): | |||||
try: | |||||
getattr(self._cookiejar, "save")() | |||||
except (NotImplementedError, ValueError): | |||||
pass | |||||
def _login(self, login, token=None, attempt=0): | |||||
"""Safely login through the API. | |||||
Normally, this is called by __init__() if a username and password have | |||||
been provided and no valid login cookies were found. The only other | |||||
time it needs to be called is when those cookies expire, which is done | |||||
automatically by api_query() if a query fails. | |||||
Recent versions of MediaWiki's API have fixed a CSRF vulnerability, | |||||
requiring login to be done in two separate requests. If the response | |||||
from from our initial request is "NeedToken", we'll do another one with | |||||
the token. If login is successful, we'll try to save our cookiejar. | |||||
Raises LoginError on login errors (duh), like bad passwords and | |||||
nonexistent usernames. | |||||
*login* is a (username, password) tuple. *token* is the token returned | |||||
from our first request, and *attempt* is to prevent getting stuck in a | |||||
loop if MediaWiki isn't acting right. | |||||
""" | |||||
name, password = login | |||||
if token: | |||||
result = self.api_query(action="login", lgname=name, | |||||
lgpassword=password, lgtoken=token) | |||||
else: | |||||
result = self.api_query(action="login", lgname=name, | |||||
lgpassword=password) | |||||
res = result["login"]["result"] | |||||
if res == "Success": | |||||
self._save_cookiejar() | |||||
elif res == "NeedToken" and attempt == 0: | |||||
token = result["login"]["token"] | |||||
return self._login(login, token, attempt=1) | |||||
else: | |||||
if res == "Illegal": | |||||
e = "The provided username is illegal." | |||||
elif res == "NotExists": | |||||
e = "The provided username does not exist." | |||||
elif res == "EmptyPass": | |||||
e = "No password was given." | |||||
elif res == "WrongPass" or res == "WrongPluginPass": | |||||
e = "The given password is incorrect." | |||||
else: | |||||
e = "Couldn't login; server says '{0}'.".format(res) | |||||
raise exceptions.LoginError(e) | |||||
def _logout(self): | |||||
"""Safely logout through the API. | |||||
We'll do a simple API request (api.php?action=logout), clear our | |||||
cookiejar (which probably contains now-invalidated cookies) and try to | |||||
save it, if it supports that sort of thing. | |||||
""" | |||||
self.api_query(action="logout") | |||||
self._cookiejar.clear() | |||||
self._save_cookiejar() | |||||
def _sql_connect(self, **kwargs): | |||||
"""Attempt to establish a connection with this site's SQL database. | |||||
oursql.connect() will be called with self._sql_data as its kwargs. | |||||
Any kwargs given to this function will be passed to connect() and will | |||||
have precedence over the config file. | |||||
Will raise SQLError() if the module "oursql" is not available. oursql | |||||
may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot | |||||
establish a connection. | |||||
""" | |||||
if not oursql: | |||||
e = "Module 'oursql' is required for SQL queries." | |||||
raise exceptions.SQLError(e) | |||||
args = self._sql_data | |||||
for key, value in kwargs.iteritems(): | |||||
args[key] = value | |||||
if "read_default_file" not in args and "user" not in args and "passwd" not in args: | |||||
args["read_default_file"] = expanduser("~/.my.cnf") | |||||
if "autoping" not in args: | |||||
args["autoping"] = True | |||||
if "autoreconnect" not in args: | |||||
args["autoreconnect"] = True | |||||
self._sql_conn = oursql.connect(**args) | |||||
def _get_service_order(self): | |||||
"""Return a preferred order for using services (e.g. the API and SQL). | |||||
A list is returned, starting with the most preferred service first and | |||||
ending with the least preferred one. Currently, there are only two | |||||
services. SERVICE_API will always be included since the API is expected | |||||
to be always usable. In normal circumstances, self.SERVICE_SQL will be | |||||
first (with the API second), since using SQL directly is easier on the | |||||
servers than making web queries with the API. self.SERVICE_SQL will be | |||||
second if replag is greater than three minutes (a cached value updated | |||||
every two minutes at most), *unless* API lag is also very high. | |||||
self.SERVICE_SQL will not be included in the list if we cannot form a | |||||
proper SQL connection. | |||||
""" | |||||
now = time() | |||||
if now - self._sql_info_cache["lastcheck"] > 120: | |||||
self._sql_info_cache["lastcheck"] = now | |||||
try: | |||||
self._sql_info_cache["replag"] = sqllag = self.get_replag() | |||||
except (exceptions.SQLError, oursql.Error): | |||||
self._sql_info_cache["usable"] = False | |||||
return [self.SERVICE_API] | |||||
self._sql_info_cache["usable"] = True | |||||
else: | |||||
if not self._sql_info_cache["usable"]: | |||||
return [self.SERVICE_API] | |||||
sqllag = self._sql_info_cache["replag"] | |||||
if sqllag > 300: | |||||
if not self._maxlag: | |||||
return [self.SERVICE_API, self.SERVICE_SQL] | |||||
if now - self._api_info_cache["lastcheck"] > 300: | |||||
self._api_info_cache["lastcheck"] = now | |||||
try: | |||||
self._api_info_cache["maxlag"] = apilag = self.get_maxlag() | |||||
except exceptions.APIError: | |||||
self._api_info_cache["maxlag"] = apilag = 0 | |||||
else: | |||||
apilag = self._api_info_cache["maxlag"] | |||||
if apilag > self._maxlag: | |||||
return [self.SERVICE_SQL, self.SERVICE_API] | |||||
return [self.SERVICE_API, self.SERVICE_SQL] | |||||
return [self.SERVICE_SQL, self.SERVICE_API] | |||||
@property | |||||
def name(self): | |||||
"""The Site's name (or "wikiid" in the API), like ``"enwiki"``.""" | |||||
return self._name | |||||
@property | |||||
def project(self): | |||||
"""The Site's project name in lowercase, like ``"wikipedia"``.""" | |||||
return self._project | |||||
@property | |||||
def lang(self): | |||||
"""The Site's language code, like ``"en"`` or ``"es"``.""" | |||||
return self._lang | |||||
@property | |||||
def domain(self): | |||||
"""The Site's web domain, like ``"en.wikipedia.org"``.""" | |||||
return urlparse(self._base_url).netloc | |||||
@property | |||||
def url(self): | |||||
"""The Site's full base URL, like ``"https://en.wikipedia.org"``.""" | |||||
url = self._base_url | |||||
if url.startswith("//"): # Protocol-relative URLs from 1.18 | |||||
if self._use_https: | |||||
url = "https:" + url | |||||
else: | |||||
url = "http:" + url | |||||
return url | |||||
def api_query(self, **kwargs): | |||||
"""Do an API query with `kwargs` as the parameters. | |||||
This will first attempt to construct an API url from | |||||
:py:attr:`self._base_url` and :py:attr:`self._script_path`. We need | |||||
both of these, or else we'll raise | |||||
:py:exc:`~earwigbot.exceptions.APIError`. If | |||||
:py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki | |||||
1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is | |||||
``True``, otherwise HTTP. | |||||
We'll encode the given params, adding ``format=json`` along the way, as | |||||
well as ``&assert=`` and ``&maxlag=`` based on | |||||
:py:attr:`self._assert_edit` and :py:attr:`_maxlag` respectively. | |||||
Additionally, we'll sleep a bit if the last query was made fewer than | |||||
:py:attr:`self._wait_between_queries` seconds ago. The request is made | |||||
through :py:attr:`self._opener`, which has cookie support | |||||
(:py:attr:`self._cookiejar`), a ``User-Agent`` | |||||
(:py:const:`earwigbot.wiki.constants.USER_AGENT`), and | |||||
``Accept-Encoding`` set to ``"gzip"``. | |||||
Assuming everything went well, we'll gunzip the data (if compressed), | |||||
load it as a JSON object, and return it. | |||||
If our request failed for some reason, we'll raise | |||||
:py:exc:`~earwigbot.exceptions.APIError` with details. If that | |||||
reason was due to maxlag, we'll sleep for a bit and then repeat the | |||||
query until we exceed :py:attr:`self._max_retries`. | |||||
There is helpful MediaWiki API documentation at `MediaWiki.org | |||||
<http://www.mediawiki.org/wiki/API>`_. | |||||
""" | |||||
with self._api_lock: | |||||
return self._api_query(kwargs) | |||||
def sql_query(self, query, params=(), plain_query=False, dict_cursor=False, | |||||
cursor_class=None, show_table=False): | |||||
"""Do an SQL query and yield its results. | |||||
If *plain_query* is ``True``, we will force an unparameterized query. | |||||
Specifying both *params* and *plain_query* will cause an error. If | |||||
*dict_cursor* is ``True``, we will use :py:class:`oursql.DictCursor` as | |||||
our cursor, otherwise the default :py:class:`oursql.Cursor`. If | |||||
*cursor_class* is given, it will override this option. If *show_table* | |||||
is True, the name of the table will be prepended to the name of the | |||||
column. This will mainly affect an :py:class:`~oursql.DictCursor`. | |||||
Example usage:: | |||||
>>> query = "SELECT user_id, user_registration FROM user WHERE user_name = ?" | |||||
>>> params = ("The Earwig",) | |||||
>>> result1 = site.sql_query(query, params) | |||||
>>> result2 = site.sql_query(query, params, dict_cursor=True) | |||||
>>> for row in result1: print row | |||||
(7418060L, '20080703215134') | |||||
>>> for row in result2: print row | |||||
{'user_id': 7418060L, 'user_registration': '20080703215134'} | |||||
This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of | |||||
oursql's exceptions (:py:exc:`oursql.ProgrammingError`, | |||||
:py:exc:`oursql.InterfaceError`, ...) if there were problems with the | |||||
query. | |||||
See :py:meth:`_sql_connect` for information on how a connection is | |||||
acquired. Also relevant is `oursql's documentation | |||||
<http://packages.python.org/oursql>`_ for details on that package. | |||||
""" | |||||
if not cursor_class: | |||||
if dict_cursor: | |||||
cursor_class = oursql.DictCursor | |||||
else: | |||||
cursor_class = oursql.Cursor | |||||
klass = cursor_class | |||||
with self._sql_lock: | |||||
if not self._sql_conn: | |||||
self._sql_connect() | |||||
with self._sql_conn.cursor(klass, show_table=show_table) as cur: | |||||
cur.execute(query, params, plain_query) | |||||
for result in cur: | |||||
yield result | |||||
def get_maxlag(self, showall=False): | |||||
"""Return the internal database replication lag in seconds. | |||||
In a typical setup, this function returns the replication lag *within* | |||||
the WMF's cluster, *not* external replication lag affecting the | |||||
Toolserver (see :py:meth:`get_replag` for that). This is useful when | |||||
combined with the ``maxlag`` API query param (added by config), in | |||||
which queries will be halted and retried if the lag is too high, | |||||
usually above five seconds. | |||||
With *showall*, will return a list of the lag for all servers in the | |||||
cluster, not just the one with the highest lag. | |||||
""" | |||||
params = {"action": "query", "meta": "siteinfo", "siprop": "dbrepllag"} | |||||
if showall: | |||||
params["sishowalldb"] = 1 | |||||
with self._api_lock: | |||||
result = self._api_query(params, ignore_maxlag=True) | |||||
if showall: | |||||
return [server["lag"] for server in result["query"]["dbrepllag"]] | |||||
return result["query"]["dbrepllag"][0]["lag"] | |||||
def get_replag(self): | |||||
"""Return the estimated external database replication lag in seconds. | |||||
Requires SQL access. This function only makes sense on a replicated | |||||
database (e.g. the Wikimedia Toolserver) and on a wiki that receives a | |||||
large number of edits (ideally, at least one per second), or the result | |||||
may be larger than expected, since it works by subtracting the current | |||||
time from the timestamp of the latest recent changes event. | |||||
This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of | |||||
oursql's exceptions (:py:exc:`oursql.ProgrammingError`, | |||||
:py:exc:`oursql.InterfaceError`, ...) if there were problems. | |||||
""" | |||||
query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM | |||||
recentchanges ORDER BY rc_timestamp DESC LIMIT 1""" | |||||
result = list(self.sql_query(query)) | |||||
return result[0][0] | |||||
def namespace_id_to_name(self, ns_id, all=False): | |||||
"""Given a namespace ID, returns associated namespace names. | |||||
If *all* is ``False`` (default), we'll return the first name in the | |||||
list, which is usually the localized version. Otherwise, we'll return | |||||
the entire list, which includes the canonical name. For example, this | |||||
returns ``u"Wikipedia"`` if *ns_id* = ``4`` and *all* is ``False`` on | |||||
``enwiki``; returns ``[u"Wikipedia", u"Project", u"WP"]`` if *ns_id* = | |||||
``4`` and *all* is ``True``. | |||||
Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the ID | |||||
is not found. | |||||
""" | |||||
try: | |||||
if all: | |||||
return self._namespaces[ns_id] | |||||
else: | |||||
return self._namespaces[ns_id][0] | |||||
except KeyError: | |||||
e = "There is no namespace with id {0}.".format(ns_id) | |||||
raise exceptions.NamespaceNotFoundError(e) | |||||
def namespace_name_to_id(self, name): | |||||
"""Given a namespace name, returns the associated ID. | |||||
Like :py:meth:`namespace_id_to_name`, but reversed. Case is ignored, | |||||
because namespaces are assumed to be case-insensitive. | |||||
Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the | |||||
name is not found. | |||||
""" | |||||
lname = name.lower() | |||||
for ns_id, names in self._namespaces.items(): | |||||
lnames = [n.lower() for n in names] # Be case-insensitive | |||||
if lname in lnames: | |||||
return ns_id | |||||
e = "There is no namespace with name '{0}'.".format(name) | |||||
raise exceptions.NamespaceNotFoundError(e) | |||||
def get_page(self, title, follow_redirects=False, pageid=None): | |||||
"""Return a :py:class:`Page` object for the given title. | |||||
*follow_redirects* is passed directly to | |||||
:py:class:`~earwigbot.wiki.page.Page`'s constructor. Also, this will | |||||
return a :py:class:`~earwigbot.wiki.category.Category` object instead | |||||
if the given title is in the category namespace. As | |||||
:py:class:`~earwigbot.wiki.category.Category` is a subclass of | |||||
:py:class:`~earwigbot.wiki.page.Page`, this should not cause problems. | |||||
Note that this doesn't do any direct checks for existence or | |||||
redirect-following: :py:class:`~earwigbot.wiki.page.Page`'s methods | |||||
provide that. | |||||
""" | |||||
title = self._unicodeify(title) | |||||
prefixes = self.namespace_id_to_name(constants.NS_CATEGORY, all=True) | |||||
prefix = title.split(":", 1)[0] | |||||
if prefix != title: # Avoid a page that is simply "Category" | |||||
if prefix in prefixes: | |||||
return Category(self, title, follow_redirects, pageid, | |||||
self._logger) | |||||
return Page(self, title, follow_redirects, pageid, self._logger) | |||||
def get_category(self, catname, follow_redirects=False, pageid=None): | |||||
"""Return a :py:class:`Category` object for the given category name. | |||||
*catname* should be given *without* a namespace prefix. This method is | |||||
really just shorthand for :py:meth:`get_page("Category:" + catname) | |||||
<get_page>`. | |||||
""" | |||||
catname = self._unicodeify(catname) | |||||
prefix = self.namespace_id_to_name(constants.NS_CATEGORY) | |||||
pagename = u':'.join((prefix, catname)) | |||||
return Category(self, pagename, follow_redirects, pageid, self._logger) | |||||
def get_user(self, username=None): | |||||
"""Return a :py:class:`User` object for the given username. | |||||
If *username* is left as ``None``, then a | |||||
:py:class:`~earwigbot.wiki.user.User` object representing the currently | |||||
logged-in (or anonymous!) user is returned. | |||||
""" | |||||
if username: | |||||
username = self._unicodeify(username) | |||||
else: | |||||
username = self._get_username() | |||||
return User(self, username, self._logger) | |||||
def delegate(self, services, args=None, kwargs=None): | |||||
"""Delegate a task to either the API or SQL depending on conditions. | |||||
*services* should be a dictionary in which the key is the service name | |||||
(:py:attr:`self.SERVICE_API <SERVICE_API>` or | |||||
:py:attr:`self.SERVICE_SQL <SERVICE_SQL>`), and the value is the | |||||
function to call for this service. All functions will be passed the | |||||
same arguments the tuple *args* and the dict **kwargs**, which are both | |||||
empty by default. The service order is determined by | |||||
:py:meth:`_get_service_order`. | |||||
Not every service needs an entry in the dictionary. Will raise | |||||
:py:exc:`~earwigbot.exceptions.NoServiceError` if an appropriate | |||||
service cannot be found. | |||||
""" | |||||
if not args: | |||||
args = () | |||||
if not kwargs: | |||||
kwargs = {} | |||||
order = self._get_service_order() | |||||
for srv in order: | |||||
if srv in services: | |||||
try: | |||||
return services[srv](*args, **kwargs) | |||||
except exceptions.ServiceError: | |||||
continue | |||||
raise exceptions.NoServiceError(services) |
@@ -0,0 +1,438 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from collections import OrderedDict | |||||
from cookielib import LWPCookieJar, LoadError | |||||
import errno | |||||
from os import chmod, path | |||||
from platform import python_version | |||||
import stat | |||||
import sqlite3 as sqlite | |||||
from earwigbot import __version__ | |||||
from earwigbot.exceptions import SiteNotFoundError | |||||
from earwigbot.wiki.copyvios.exclusions import ExclusionsDB | |||||
from earwigbot.wiki.site import Site | |||||
__all__ = ["SitesDB"] | |||||
class SitesDB(object): | |||||
""" | |||||
**EarwigBot: Wiki Toolset: Sites Database Manager** | |||||
This class controls the :file:`sites.db` file, which stores information | |||||
about all wiki sites known to the bot. Three public methods act as bridges | |||||
between the bot's config files and :py:class:`~earwigbot.wiki.site.Site` | |||||
objects: | |||||
- :py:meth:`get_site`: returns a Site object corresponding to a site | |||||
- :py:meth:`add_site`: stores a site in the database | |||||
- :py:meth:`remove_site`: removes a site from the database | |||||
There's usually no need to use this class directly. All public methods | |||||
here are available as :py:meth:`bot.wiki.get_site`, | |||||
:py:meth:`bot.wiki.add_site`, and :py:meth:`bot.wiki.remove_site`, which | |||||
use a :file:`sites.db` file located in the same directory as our | |||||
:file:`config.yml` file. Lower-level access can be achieved by importing | |||||
the manager class (``from earwigbot.wiki import SitesDB``). | |||||
""" | |||||
def __init__(self, bot): | |||||
"""Set up the manager with an attribute for the base Bot object.""" | |||||
self.config = bot.config | |||||
self._logger = bot.logger.getChild("wiki") | |||||
self._sites = {} # Internal site cache | |||||
self._sitesdb = path.join(bot.config.root_dir, "sites.db") | |||||
self._cookie_file = path.join(bot.config.root_dir, ".cookies") | |||||
self._cookiejar = None | |||||
excl_db = path.join(bot.config.root_dir, "exclusions.db") | |||||
excl_logger = self._logger.getChild("exclusionsdb") | |||||
self._exclusions_db = ExclusionsDB(self, excl_db, excl_logger) | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the SitesDB.""" | |||||
res = "SitesDB(config={0!r}, sitesdb={1!r}, cookie_file={2!r})" | |||||
return res.format(self.config, self._sitesdb, self._cookie_file) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the SitesDB.""" | |||||
return "<SitesDB at {0}>".format(self._sitesdb) | |||||
def _get_cookiejar(self): | |||||
"""Return a LWPCookieJar object loaded from our .cookies file. | |||||
The same .cookies file is returned every time, located in the project | |||||
root, same directory as config.yml and bot.py. If it doesn't exist, we | |||||
will create the file and set it to be readable and writeable only by | |||||
us. If it exists but the information inside is bogus, we'll ignore it. | |||||
This is normally called by _make_site_object() (in turn called by | |||||
get_site()), and the cookiejar is passed to our Site's constructor, | |||||
used when it makes API queries. This way, we can easily preserve | |||||
cookies between sites (e.g., for CentralAuth), making logins easier. | |||||
""" | |||||
if self._cookiejar: | |||||
return self._cookiejar | |||||
self._cookiejar = LWPCookieJar(self._cookie_file) | |||||
try: | |||||
self._cookiejar.load() | |||||
except LoadError: | |||||
pass # File contains bad data, so ignore it completely | |||||
except IOError as e: | |||||
if e.errno == errno.ENOENT: # "No such file or directory" | |||||
# Create the file and restrict reading/writing only to the | |||||
# owner, so others can't peak at our cookies: | |||||
open(self._cookie_file, "w").close() | |||||
chmod(self._cookie_file, stat.S_IRUSR|stat.S_IWUSR) | |||||
else: | |||||
raise | |||||
return self._cookiejar | |||||
def _create_sitesdb(self): | |||||
"""Initialize the sitesdb file with its three necessary tables.""" | |||||
script = """ | |||||
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, | |||||
site_article_path, site_script_path); | |||||
CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value); | |||||
CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name); | |||||
""" | |||||
with sqlite.connect(self._sitesdb) as conn: | |||||
conn.executescript(script) | |||||
def _get_site_object(self, name): | |||||
"""Return the site from our cache, or create it if it doesn't exist. | |||||
This is essentially just a wrapper around _make_site_object that | |||||
returns the same object each time a specific site is asked for. | |||||
""" | |||||
try: | |||||
return self._sites[name] | |||||
except KeyError: | |||||
site = self._make_site_object(name) | |||||
self._sites[name] = site | |||||
return site | |||||
def _load_site_from_sitesdb(self, name): | |||||
"""Return all information stored in the sitesdb relating to given site. | |||||
The information will be returned as a tuple, containing the site's | |||||
name, project, language, base URL, article path, script path, SQL | |||||
connection data, and namespaces, in that order. If the site is not | |||||
found in the database, SiteNotFoundError will be raised. An empty | |||||
database will be created before the exception is raised if none exists. | |||||
""" | |||||
query1 = "SELECT * FROM sites WHERE site_name = ?" | |||||
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" | |||||
query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?" | |||||
error = "Site '{0}' not found in the sitesdb.".format(name) | |||||
with sqlite.connect(self._sitesdb) as conn: | |||||
try: | |||||
site_data = conn.execute(query1, (name,)).fetchone() | |||||
except sqlite.OperationalError: | |||||
self._create_sitesdb() | |||||
raise SiteNotFoundError(error) | |||||
if not site_data: | |||||
raise SiteNotFoundError(error) | |||||
sql_data = conn.execute(query2, (name,)).fetchall() | |||||
ns_data = conn.execute(query3, (name,)).fetchall() | |||||
name, project, lang, base_url, article_path, script_path = site_data | |||||
sql = dict(sql_data) | |||||
namespaces = {} | |||||
for ns_id, ns_name, ns_is_primary_name in ns_data: | |||||
try: | |||||
if ns_is_primary_name: # "Primary" name goes first in list | |||||
namespaces[ns_id].insert(0, ns_name) | |||||
else: # Ordering of the aliases doesn't matter | |||||
namespaces[ns_id].append(ns_name) | |||||
except KeyError: | |||||
namespaces[ns_id] = [ns_name] | |||||
return (name, project, lang, base_url, article_path, script_path, sql, | |||||
namespaces) | |||||
def _make_site_object(self, name): | |||||
"""Return a Site object associated with the site *name* in our sitesdb. | |||||
This calls _load_site_from_sitesdb(), so SiteNotFoundError will be | |||||
raised if the site is not in our sitesdb. | |||||
""" | |||||
cookiejar = self._get_cookiejar() | |||||
(name, project, lang, base_url, article_path, script_path, sql, | |||||
namespaces) = self._load_site_from_sitesdb(name) | |||||
config = self.config | |||||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||||
user_agent = config.wiki.get("userAgent") | |||||
use_https = config.wiki.get("useHTTPS", False) | |||||
assert_edit = config.wiki.get("assert") | |||||
maxlag = config.wiki.get("maxlag") | |||||
wait_between_queries = config.wiki.get("waitTime", 2) | |||||
logger = self._logger.getChild(name) | |||||
search_config = config.wiki.get("search", OrderedDict()).copy() | |||||
if user_agent: | |||||
user_agent = user_agent.replace("$1", __version__) | |||||
user_agent = user_agent.replace("$2", python_version()) | |||||
if search_config: | |||||
nltk_dir = path.join(self.config.root_dir, ".nltk") | |||||
search_config["nltk_dir"] = nltk_dir | |||||
search_config["exclusions_db"] = self._exclusions_db | |||||
if not sql: | |||||
sql = config.wiki.get("sql", OrderedDict()).copy() | |||||
for key, value in sql.iteritems(): | |||||
if isinstance(value, basestring) and "$1" in value: | |||||
sql[key] = value.replace("$1", name) | |||||
return Site(name=name, project=project, lang=lang, base_url=base_url, | |||||
article_path=article_path, script_path=script_path, | |||||
sql=sql, namespaces=namespaces, login=login, | |||||
cookiejar=cookiejar, user_agent=user_agent, | |||||
use_https=use_https, assert_edit=assert_edit, | |||||
maxlag=maxlag, wait_between_queries=wait_between_queries, | |||||
logger=logger, search_config=search_config) | |||||
def _get_site_name_from_sitesdb(self, project, lang): | |||||
"""Return the name of the first site with the given project and lang. | |||||
If we can't find the site with the given information, we'll also try | |||||
searching for a site whose base_url contains "{lang}.{project}". There | |||||
are a few sites, like the French Wikipedia, that set their project to | |||||
something other than the expected "wikipedia" ("wikipédia" in this | |||||
case), but we should correctly find them when doing get_site(lang="fr", | |||||
project="wikipedia"). | |||||
If the site is not found, return None. An empty sitesdb will be created | |||||
if none exists. | |||||
""" | |||||
query1 = "SELECT site_name FROM sites WHERE site_project = ? and site_lang = ?" | |||||
query2 = "SELECT site_name FROM sites WHERE site_base_url LIKE ?" | |||||
with sqlite.connect(self._sitesdb) as conn: | |||||
try: | |||||
site = conn.execute(query1, (project, lang)).fetchone() | |||||
if site: | |||||
return site[0] | |||||
else: | |||||
url = "%{0}.{1}%".format(lang, project) | |||||
site = conn.execute(query2, (url,)).fetchone() | |||||
return site[0] if site else None | |||||
except sqlite.OperationalError: | |||||
self._create_sitesdb() | |||||
def _add_site_to_sitesdb(self, site): | |||||
"""Extract relevant info from a Site object and add it to the sitesdb. | |||||
Works like a reverse _load_site_from_sitesdb(); the site's project, | |||||
language, base URL, article path, script path, SQL connection data, and | |||||
namespaces are extracted from the site and inserted into the sites | |||||
database. If the sitesdb doesn't exist, we'll create it first. | |||||
""" | |||||
name = site.name | |||||
sites_data = (name, site.project, site.lang, site._base_url, | |||||
site._article_path, site._script_path) | |||||
sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()] | |||||
ns_data = [] | |||||
for ns_id, ns_names in site._namespaces.iteritems(): | |||||
ns_data.append((name, ns_id, ns_names.pop(0), True)) | |||||
for ns_name in ns_names: | |||||
ns_data.append((name, ns_id, ns_name, False)) | |||||
with sqlite.connect(self._sitesdb) as conn: | |||||
check_exists = "SELECT 1 FROM sites WHERE site_name = ?" | |||||
try: | |||||
exists = conn.execute(check_exists, (name,)).fetchone() | |||||
except sqlite.OperationalError: | |||||
self._create_sitesdb() | |||||
else: | |||||
if exists: | |||||
conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||||
conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data) | |||||
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) | |||||
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) | |||||
def _remove_site_from_sitesdb(self, name): | |||||
"""Remove a site by name from the sitesdb and the internal cache.""" | |||||
try: | |||||
del self._sites[name] | |||||
except KeyError: | |||||
pass | |||||
with sqlite.connect(self._sitesdb) as conn: | |||||
cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||||
if cursor.rowcount == 0: | |||||
return False | |||||
else: | |||||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||||
self._logger.info("Removed site '{0}'".format(name)) | |||||
return True | |||||
def get_site(self, name=None, project=None, lang=None): | |||||
"""Return a Site instance based on information from the sitesdb. | |||||
With no arguments, return the default site as specified by our config | |||||
file. This is ``config.wiki["defaultSite"]``. | |||||
With *name* specified, return the site with that name. This is | |||||
equivalent to the site's ``wikiid`` in the API, like *enwiki*. | |||||
With *project* and *lang* specified, return the site whose project and | |||||
language match these values. If there are multiple sites with the same | |||||
values (unlikely), this is not a reliable way of loading a site. Call | |||||
the function with an explicit *name* in that case. | |||||
We will attempt to login to the site automatically using | |||||
``config.wiki["username"]`` and ``config.wiki["password"]`` if both are | |||||
defined. | |||||
Specifying a project without a lang or a lang without a project will | |||||
raise :py:exc:`TypeError`. If all three args are specified, *name* will | |||||
be first tried, then *project* and *lang* if *name* doesn't work. If a | |||||
site cannot be found in the sitesdb, | |||||
:py:exc:`~earwigbot.exceptions.SiteNotFoundError` will be raised. An | |||||
empty sitesdb will be created if none is found. | |||||
""" | |||||
# Someone specified a project without a lang, or vice versa: | |||||
if (project and not lang) or (not project and lang): | |||||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||||
raise TypeError(e) | |||||
# No args given, so return our default site: | |||||
if not name and not project and not lang: | |||||
try: | |||||
default = self.config.wiki["defaultSite"] | |||||
except KeyError: | |||||
e = "Default site is not specified in config." | |||||
raise SiteNotFoundError(e) | |||||
return self._get_site_object(default) | |||||
# Name arg given, but don't look at others unless `name` isn't found: | |||||
if name: | |||||
try: | |||||
return self._get_site_object(name) | |||||
except SiteNotFoundError: | |||||
if project and lang: | |||||
name = self._get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return self._get_site_object(name) | |||||
raise | |||||
# If we end up here, then project and lang are the only args given: | |||||
name = self._get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return self._get_site_object(name) | |||||
e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) | |||||
raise SiteNotFoundError(e) | |||||
def add_site(self, project=None, lang=None, base_url=None, | |||||
script_path="/w", sql=None): | |||||
"""Add a site to the sitesdb so it can be retrieved with get_site(). | |||||
If only a project and a lang are given, we'll guess the *base_url* as | |||||
``"//{lang}.{project}.org"`` (which is protocol-relative, becoming | |||||
``"https"`` if *useHTTPS* is ``True`` in config otherwise ``"http"``). | |||||
If this is wrong, provide the correct *base_url* as an argument (in | |||||
which case project and lang are ignored). Most wikis use ``"/w"`` as | |||||
the script path (meaning the API is located at | |||||
``"{base_url}{script_path}/api.php"`` -> | |||||
``"//{lang}.{project}.org/w/api.php"``), so this is the default. If | |||||
your wiki is different, provide the script_path as an argument. SQL | |||||
connection settings are guessed automatically using config's template | |||||
value. If this is wrong or not specified, provide a dict of kwargs as | |||||
*sql* and Site will pass it to :py:func:`oursql.connect(**sql) | |||||
<oursql.connect>`, allowing you to make queries with | |||||
:py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`. | |||||
Returns ``True`` if the site was added successfully or ``False`` if the | |||||
site is already in our sitesdb (this can be done purposefully to update | |||||
old site info). Raises :py:exc:`~earwigbot.exception.SiteNotFoundError` | |||||
if not enough information has been provided to identify the site (e.g. | |||||
a *project* but not a *lang*). | |||||
""" | |||||
if not base_url: | |||||
if not project or not lang: | |||||
e = "Without a base_url, both a project and a lang must be given." | |||||
raise SiteNotFoundError(e) | |||||
base_url = "//{0}.{1}.org".format(lang, project) | |||||
cookiejar = self._get_cookiejar() | |||||
config = self.config | |||||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||||
user_agent = config.wiki.get("userAgent") | |||||
use_https = config.wiki.get("useHTTPS", True) | |||||
assert_edit = config.wiki.get("assert") | |||||
maxlag = config.wiki.get("maxlag") | |||||
wait_between_queries = config.wiki.get("waitTime", 2) | |||||
if user_agent: | |||||
user_agent = user_agent.replace("$1", __version__) | |||||
user_agent = user_agent.replace("$2", python_version()) | |||||
# Create a Site object to log in and load the other attributes: | |||||
site = Site(base_url=base_url, script_path=script_path, sql=sql, | |||||
login=login, cookiejar=cookiejar, user_agent=user_agent, | |||||
use_https=use_https, assert_edit=assert_edit, | |||||
maxlag=maxlag, wait_between_queries=wait_between_queries) | |||||
self._logger.info("Added site '{0}'".format(site.name)) | |||||
self._add_site_to_sitesdb(site) | |||||
return self._get_site_object(site.name) | |||||
def remove_site(self, name=None, project=None, lang=None): | |||||
"""Remove a site from the sitesdb. | |||||
Returns ``True`` if the site was removed successfully or ``False`` if | |||||
the site was not in our sitesdb originally. If all three args (*name*, | |||||
*project*, and *lang*) are given, we'll first try *name* and then try | |||||
the latter two if *name* wasn't found in the database. Raises | |||||
:py:exc:`TypeError` if a project was given but not a language, or vice | |||||
versa. Will create an empty sitesdb if none was found. | |||||
""" | |||||
# Someone specified a project without a lang, or vice versa: | |||||
if (project and not lang) or (not project and lang): | |||||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||||
raise TypeError(e) | |||||
if name: | |||||
was_removed = self._remove_site_from_sitesdb(name) | |||||
if not was_removed: | |||||
if project and lang: | |||||
name = self._get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return self._remove_site_from_sitesdb(name) | |||||
return was_removed | |||||
if project and lang: | |||||
name = self._get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return self._remove_site_from_sitesdb(name) | |||||
return False |
@@ -0,0 +1,316 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from logging import getLogger, NullHandler | |||||
from time import gmtime, strptime | |||||
from socket import AF_INET, AF_INET6, error as socket_error, inet_pton | |||||
from earwigbot.exceptions import UserNotFoundError | |||||
from earwigbot.wiki import constants | |||||
from earwigbot.wiki.page import Page | |||||
__all__ = ["User"] | |||||
class User(object): | |||||
""" | |||||
**EarwigBot: Wiki Toolset: User** | |||||
Represents a user on a given :py:class:`~earwigbot.wiki.site.Site`. Has | |||||
methods for getting a bunch of information about the user, such as | |||||
editcount and user rights, methods for returning the user's userpage and | |||||
talkpage, etc. | |||||
*Attributes:* | |||||
- :py:attr:`site`: the user's corresponding Site object | |||||
- :py:attr:`name`: the user's username | |||||
- :py:attr:`exists`: ``True`` if the user exists, else ``False`` | |||||
- :py:attr:`userid`: an integer ID representing the user | |||||
- :py:attr:`blockinfo`: information about any current blocks on the user | |||||
- :py:attr:`groups`: a list of the user's groups | |||||
- :py:attr:`rights`: a list of the user's rights | |||||
- :py:attr:`editcount`: the number of edits made by the user | |||||
- :py:attr:`registration`: the time the user registered | |||||
- :py:attr:`emailable`: ``True`` if you can email the user, or ``False`` | |||||
- :py:attr:`gender`: the user's gender ("male"/"female"/"unknown") | |||||
- :py:attr:`is_ip`: ``True`` if this is an IP address, or ``False`` | |||||
*Public methods:* | |||||
- :py:meth:`reload`: forcibly reloads the user's attributes | |||||
- :py:meth:`get_userpage`: returns a Page object representing the user's | |||||
userpage | |||||
- :py:meth:`get_talkpage`: returns a Page object representing the user's | |||||
talkpage | |||||
""" | |||||
def __init__(self, site, name, logger=None): | |||||
"""Constructor for new User instances. | |||||
Takes two arguments, a Site object (necessary for doing API queries), | |||||
and the name of the user, preferably without "User:" in front, although | |||||
this prefix will be automatically removed by the API if given. | |||||
You can also use site.get_user() instead, which returns a User object, | |||||
and is preferred. | |||||
We won't do any API queries yet for basic information about the user - | |||||
save that for when the information is requested. | |||||
""" | |||||
self._site = site | |||||
self._name = name | |||||
# Set up our internal logger: | |||||
if logger: | |||||
self._logger = logger | |||||
else: # Just set up a null logger to eat up our messages: | |||||
self._logger = getLogger("earwigbot.wiki") | |||||
self._logger.addHandler(NullHandler()) | |||||
def __repr__(self): | |||||
"""Return the canonical string representation of the User.""" | |||||
return "User(name={0!r}, site={1!r})".format(self._name, self._site) | |||||
def __str__(self): | |||||
"""Return a nice string representation of the User.""" | |||||
return '<User "{0}" of {1}>'.format(self.name, str(self.site)) | |||||
def _get_attribute(self, attr): | |||||
"""Internally used to get an attribute by name. | |||||
We'll call _load_attributes() to get this (and all other attributes) | |||||
from the API if it is not already defined. | |||||
Raises UserNotFoundError if a nonexistant user prevents us from | |||||
returning a certain attribute. | |||||
""" | |||||
if not hasattr(self, attr): | |||||
self._load_attributes() | |||||
if not self._exists: | |||||
e = u"User '{0}' does not exist.".format(self._name) | |||||
raise UserNotFoundError(e) | |||||
return getattr(self, attr) | |||||
def _load_attributes(self): | |||||
"""Internally used to load all attributes from the API. | |||||
Normally, this is called by _get_attribute() when a requested attribute | |||||
is not defined. This defines it. | |||||
""" | |||||
props = "blockinfo|groups|rights|editcount|registration|emailable|gender" | |||||
result = self.site.api_query(action="query", list="users", | |||||
ususers=self._name, usprop=props) | |||||
res = result["query"]["users"][0] | |||||
# normalize our username in case it was entered oddly | |||||
self._name = res["name"] | |||||
try: | |||||
self._userid = res["userid"] | |||||
except KeyError: # userid is missing, so user does not exist | |||||
self._exists = False | |||||
return | |||||
self._exists = True | |||||
try: | |||||
self._blockinfo = { | |||||
"by": res["blockedby"], | |||||
"reason": res["blockreason"], | |||||
"expiry": res["blockexpiry"] | |||||
} | |||||
except KeyError: | |||||
self._blockinfo = False | |||||
self._groups = res["groups"] | |||||
try: | |||||
self._rights = res["rights"].values() | |||||
except AttributeError: | |||||
self._rights = res["rights"] | |||||
self._editcount = res["editcount"] | |||||
reg = res["registration"] | |||||
try: | |||||
self._registration = strptime(reg, "%Y-%m-%dT%H:%M:%SZ") | |||||
except TypeError: | |||||
# Sometimes the API doesn't give a date; the user's probably really | |||||
# old. There's nothing else we can do! | |||||
self._registration = gmtime(0) | |||||
try: | |||||
res["emailable"] | |||||
except KeyError: | |||||
self._emailable = False | |||||
else: | |||||
self._emailable = True | |||||
self._gender = res["gender"] | |||||
@property | |||||
def site(self): | |||||
"""The user's corresponding Site object.""" | |||||
return self._site | |||||
@property | |||||
def name(self): | |||||
"""The user's username. | |||||
This will never make an API query on its own, but if one has already | |||||
been made by the time this is retrieved, the username may have been | |||||
"normalized" from the original input to the constructor, converted into | |||||
a Unicode object, with underscores removed, etc. | |||||
""" | |||||
return self._name | |||||
@property | |||||
def exists(self): | |||||
"""``True`` if the user exists, or ``False`` if they do not. | |||||
Makes an API query only if we haven't made one already. | |||||
""" | |||||
if not hasattr(self, "_exists"): | |||||
self._load_attributes() | |||||
return self._exists | |||||
@property | |||||
def userid(self): | |||||
"""An integer ID used by MediaWiki to represent the user. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_userid") | |||||
@property | |||||
def blockinfo(self): | |||||
"""Information about any current blocks on the user. | |||||
If the user is not blocked, returns ``False``. If they are, returns a | |||||
dict with three keys: ``"by"`` is the blocker's username, ``"reason"`` | |||||
is the reason why they were blocked, and ``"expiry"`` is when the block | |||||
expires. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_blockinfo") | |||||
@property | |||||
def groups(self): | |||||
"""A list of groups this user is in, including ``"*"``. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_groups") | |||||
@property | |||||
def rights(self): | |||||
"""A list of this user's rights. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_rights") | |||||
@property | |||||
def editcount(self): | |||||
"""Returns the number of edits made by the user. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_editcount") | |||||
@property | |||||
def registration(self): | |||||
"""The time the user registered as a :py:class:`time.struct_time`. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_registration") | |||||
@property | |||||
def emailable(self): | |||||
"""``True`` if the user can be emailed, or ``False`` if they cannot. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_emailable") | |||||
@property | |||||
def gender(self): | |||||
"""The user's gender. | |||||
Can return either ``"male"``, ``"female"``, or ``"unknown"``, if they | |||||
did not specify it. | |||||
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user | |||||
does not exist. Makes an API query only if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_gender") | |||||
@property | |||||
def is_ip(self): | |||||
"""``True`` if the user is an IP address, or ``False`` otherwise. | |||||
This tests for IPv4 and IPv6 using :py:func:`socket.inet_pton` on the | |||||
username. No API queries are made. | |||||
""" | |||||
try: | |||||
inet_pton(AF_INET, self.name) | |||||
except socket_error: | |||||
try: | |||||
inet_pton(AF_INET6, self.name) | |||||
except socket_error: | |||||
return False | |||||
return True | |||||
def reload(self): | |||||
"""Forcibly reload the user's attributes. | |||||
Emphasis on *reload*: this is only necessary if there is reason to | |||||
believe they have changed. | |||||
""" | |||||
self._load_attributes() | |||||
def get_userpage(self): | |||||
"""Return a Page object representing the user's userpage. | |||||
No checks are made to see if it exists or not. Proper site namespace | |||||
conventions are followed. | |||||
""" | |||||
prefix = self.site.namespace_id_to_name(constants.NS_USER) | |||||
pagename = ':'.join((prefix, self._name)) | |||||
return Page(self.site, pagename) | |||||
def get_talkpage(self): | |||||
"""Return a Page object representing the user's talkpage. | |||||
No checks are made to see if it exists or not. Proper site namespace | |||||
conventions are followed. | |||||
""" | |||||
prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK) | |||||
pagename = ':'.join((prefix, self._name)) | |||||
return Page(self.site, pagename) |
@@ -1,33 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# A base class for commands on IRC. | |||||
class BaseCommand(object): | |||||
def __init__(self, connection): | |||||
"""A base class for commands on IRC.""" | |||||
self.connection = connection | |||||
def get_hooks(self): | |||||
"""Hooks are: 'msg', 'msg_private', 'msg_public', and 'join'. Return | |||||
the hooks you want this command to be called on.""" | |||||
return [] | |||||
def get_help(self, command): | |||||
"""Return help information for the command, used by !help. return None | |||||
for no help. If a given class handles multiple commands, the command | |||||
variable can be used to return different help for each one.""" | |||||
return None | |||||
def check(self, data): | |||||
"""Given a Data() object, return True if we should respond to this | |||||
activity, or False if we should ignore it/it doesn't apply to us. Most | |||||
commands return True if data.command == 'command_name', otherwise | |||||
they return False.""" | |||||
return False | |||||
def process(self, data): | |||||
"""Handle an activity (usually a message) on IRC. At this point, thanks | |||||
to self.check() which is called automatically by command_handler, we | |||||
know this is something we should respond to, so (usually) a | |||||
'if data.command != "command_name": return' is unnecessary.""" | |||||
pass |
@@ -1,66 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# A module to manage IRC commands. | |||||
import os | |||||
import traceback | |||||
commands = [] | |||||
def load_commands(connection): | |||||
"""load all valid command classes from irc/commmands/ into the commands variable""" | |||||
files = os.listdir(os.path.join("irc", "commands")) # get all files in irc/commands/ | |||||
files.sort() # alphabetically sort list of files | |||||
for f in files: | |||||
if f.startswith("_") or not f.endswith(".py"): # ignore non-python files or files beginning with "_" | |||||
continue | |||||
module = f[:-3] # strip .py from end | |||||
try: | |||||
exec "from irc.commands import %s" % module | |||||
except: # importing the file failed for some reason... | |||||
print "Couldn't load file %s:" % f | |||||
traceback.print_exc() | |||||
continue | |||||
process_module(connection, eval(module)) # 'module' is a string, so get the actual object for processing by eval-ing it | |||||
pretty_cmnds = map(lambda c: c.__class__.__name__, commands) | |||||
print "Found %s command classes: %s." % (len(commands), ', '.join(pretty_cmnds)) | |||||
def process_module(connection, module): | |||||
"""go through all objects in a module and add valid command classes to the commands variable""" | |||||
global commands | |||||
objects = dir(module) | |||||
for this_obj in objects: # go through everything in the file | |||||
obj = eval("module.%s" % this_obj) # this_obj is a string, so get the actual object corresponding to that string | |||||
try: | |||||
bases = obj.__bases__ | |||||
except AttributeError: # object isn't a valid class, so ignore it | |||||
continue | |||||
for base in bases: | |||||
if base.__name__ == "BaseCommand": # this inherits BaseCommand, so it must be a command class | |||||
command = obj(connection) # initialize a new command object | |||||
commands.append(command) | |||||
print "Added command class %s from %s..." % (this_obj, module.__name__) | |||||
continue | |||||
def get_commands(): | |||||
"""get our commands""" | |||||
return commands | |||||
def check(hook, data): | |||||
"""given an event on IRC, check if there's anything we can respond to by calling each command class""" | |||||
data.parse_args() # parse command arguments into data.command and data.args | |||||
for command in commands: | |||||
if hook in command.get_hooks(): | |||||
if command.check(data): | |||||
try: | |||||
command.process(data) | |||||
except: | |||||
print "Error executing command '{}':".format(data.command) | |||||
traceback.print_exc() # catch exceptions and print them | |||||
break |
@@ -1,979 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
###### | |||||
###### NOTE: | |||||
###### This is an old commands file from the previous version of EarwigBot. | |||||
###### It is not used by the new EarwigBot and is simply here for reference | |||||
###### when developing new commands. | |||||
###### | |||||
### EarwigBot | |||||
## Import basics. | |||||
import sys, socket, string, time, codecs, os, traceback, thread, re, urllib, web, math, unicodedata | |||||
## Import our functions. | |||||
import config | |||||
## Set up constants. | |||||
HOST, PORT, NICK, IDENT, REALNAME, CHANS, REPORT_CHAN, WELCOME_CHAN, HOST2, CHAN2, OWNER, ADMINS, ADMINS_R, PASS = config.host, config.port, config.nick, config.ident, config.realname, config.chans, config.report_chan, config.welcome_chan, config.host2, config.chan2, config.owner, config.admins, config.admin_readable, config.password | |||||
def get_commandList(): | |||||
return {'quiet': 'quiet', | |||||
'welcome': 'welcome', | |||||
'greet': 'welcome', | |||||
'linker': 'linker', | |||||
'auth': 'auth', | |||||
'access': 'access', | |||||
'join': 'join', | |||||
'part': 'part', | |||||
'restart': 'restart', | |||||
'quit': 'quit', | |||||
'die': 'quit', | |||||
'msg': 'msg', | |||||
'me': 'me', | |||||
'calc': 'calc', | |||||
'dice': 'dice', | |||||
'tock': 'tock', | |||||
'beats': 'beats', | |||||
'copyvio': 'copyvio', | |||||
'copy': 'copyvio', | |||||
'copyright': 'copyvio', | |||||
'dict': 'dictionary', | |||||
'dictionary': 'dictionary', | |||||
'ety': 'etymology', | |||||
'etymology': 'etymology', | |||||
'lang': 'langcode', | |||||
'langcode': 'langcode', | |||||
'num': 'number', | |||||
'number': 'number', | |||||
'count': 'number', | |||||
'c': 'number', | |||||
'nick': 'nick', | |||||
'op': 'op', | |||||
'deop': 'deop', | |||||
'voice': 'voice', | |||||
'devoice': 'devoice', | |||||
'pend': 'pending', | |||||
'pending': 'pending', | |||||
'sub': 'submissions', | |||||
'submissions': 'submissions', | |||||
'praise': 'praise', | |||||
'leonard': 'leonard', | |||||
'groovedog': 'groovedog', | |||||
'earwig': 'earwig', | |||||
'macmed': 'macmed', | |||||
'cubs197': 'cubs197', | |||||
'sparksboy': 'sparksboy', | |||||
'tim_song': 'tim_song', | |||||
'tim': 'tim_song', | |||||
'blurpeace': 'blurpeace', | |||||
'sausage': 'sausage', | |||||
'mindstormskid': 'mindstormskid', | |||||
'mcjohn': 'mcjohn', | |||||
'fetchcomms': 'fetchcomms', | |||||
'trout': 'trout', | |||||
'kill': 'kill', | |||||
'destroy': 'kill', | |||||
'murder': 'kill', | |||||
'fish': 'fish', | |||||
'report': 'report', | |||||
'commands': 'commands', | |||||
'help': 'help', | |||||
'doc': 'help', | |||||
'documentation': 'help', | |||||
'mysql': 'mysql', | |||||
'remind': 'reminder', | |||||
'reminder': 'reminder', | |||||
'notes': 'notes', | |||||
'note': 'notes', | |||||
'about': 'notes', | |||||
'data': 'notes', | |||||
'database': 'notes', | |||||
'hash': 'hash', | |||||
'lookup': 'lookup', | |||||
'ip': 'lookup' | |||||
} | |||||
def main(command, line, line2, nick, chan, host, auth, notice, say, reply, s): | |||||
try: | |||||
parse(command, line, line2, nick, chan, host, auth, notice, say, reply, s) | |||||
except Exception: | |||||
trace = traceback.format_exc() # Traceback. | |||||
print trace # Print. | |||||
lines = list(reversed(trace.splitlines())) # Convert lines to process traceback.... | |||||
report2 = [lines[0].strip()] | |||||
for line in lines: | |||||
line = line.strip() | |||||
if line.startswith('File "/'): | |||||
report2.append(line[0].lower() + line[1:]) | |||||
break | |||||
else: report2.append('source unknown') | |||||
say(report2[0] + ' (' + report2[1] + ')', chan) | |||||
def parse(command, line, line2, nick, chan, host, auth, notice, say, reply, s): | |||||
authy = auth(host) | |||||
if command == "access": | |||||
a = 'The bot\'s owner is "%s".' % OWNER | |||||
b = 'The bot\'s admins are "%s".' % ', '.join(ADMINS_R) | |||||
reply(a, chan, nick) | |||||
reply(b, chan, nick) | |||||
return | |||||
if command == "join": | |||||
if authy == "owner" or authy == "admin": | |||||
try: | |||||
channel = line2[4] | |||||
except Exception: | |||||
channel = chan | |||||
s.send("JOIN %s\r\n" % channel) | |||||
else: | |||||
reply("You aren't authorized to use that command.", chan, nick) | |||||
return | |||||
if command == "part": | |||||
if authy == "owner" or authy == "admin": | |||||
try: | |||||
channel = line2[4] | |||||
except Exception: | |||||
channel = chan | |||||
s.send("PART %s\r\n" % channel) | |||||
else: | |||||
reply("You aren't authorized to use that command.", chan, nick) | |||||
return | |||||
if command == "restart": | |||||
import thread | |||||
if authy == "owner": | |||||
s.send("QUIT\r\n") | |||||
time.sleep(5) | |||||
os.system("nice -15 python main.py") | |||||
exit() | |||||
else: | |||||
reply("Only the owner, %s, can stop the bot. This incident will be reported." % OWNER, chan, nick) | |||||
return | |||||
if command == "quit" or command == "die": | |||||
if authy != "owner": | |||||
if command != "suicide": | |||||
reply("Only the owner, %s, can stop the bot. This incident will be reported." % OWNER, chan, nick) | |||||
else: | |||||
say("\x01ACTION hands %s a gun... have fun :D\x01" % nick, nick) | |||||
else: | |||||
if command == "suicide": | |||||
say("\x01ACTION stabs himself with a knife.\x01", chan) | |||||
time.sleep(0.2) | |||||
try: | |||||
s.send("QUIT :%s\r\n" % ' '.join(line2[4:])) | |||||
except Exception: | |||||
s.send("QUIT\r\n") | |||||
__import__('os')._exit(0) | |||||
return | |||||
if command == "msg": | |||||
if authy == "owner" or authy == "admin": | |||||
say(' '.join(line2[5:]), line2[4]) | |||||
else: | |||||
reply("You aren't authorized to use that command.", chan, nick) | |||||
return | |||||
if command == "me": | |||||
if authy == "owner" or authy == "admin": | |||||
say("\x01ACTION %s\x01" % ' '.join(line2[5:]), line2[4]) | |||||
else: | |||||
reply("You aren't authorized to use that command.", chan, nick) | |||||
return | |||||
if command == "calc": | |||||
r_result = re.compile(r'(?i)<A NAME=results>(.*?)</A>') | |||||
r_tag = re.compile(r'<\S+.*?>') | |||||
subs = [ | |||||
(' in ', ' -> '), | |||||
(' over ', ' / '), | |||||
(u'£', 'GBP '), | |||||
(u'€', 'EUR '), | |||||
('\$', 'USD '), | |||||
(r'\bKB\b', 'kilobytes'), | |||||
(r'\bMB\b', 'megabytes'), | |||||
(r'\bGB\b', 'kilobytes'), | |||||
('kbps', '(kilobits / second)'), | |||||
('mbps', '(megabits / second)') | |||||
] | |||||
try: | |||||
q = ' '.join(line2[4:]) | |||||
except Exception: | |||||
say("0?", chan) | |||||
return | |||||
query = q[:] | |||||
for a, b in subs: | |||||
query = re.sub(a, b, query) | |||||
query = query.rstrip(' \t') | |||||
precision = 5 | |||||
if query[-3:] in ('GBP', 'USD', 'EUR', 'NOK'): | |||||
precision = 2 | |||||
query = web.urllib.quote(query.encode('utf-8')) | |||||
uri = 'http://futureboy.us/fsp/frink.fsp?fromVal=' | |||||
bytes = web.get(uri + query) | |||||
m = r_result.search(bytes) | |||||
if m: | |||||
result = m.group(1) | |||||
result = r_tag.sub('', result) # strip span.warning tags | |||||
result = result.replace('>', '>') | |||||
result = result.replace('(undefined symbol)', '(?) ') | |||||
if '.' in result: | |||||
try: result = str(round(float(result), precision)) | |||||
except ValueError: pass | |||||
if not result.strip(): | |||||
result = '?' | |||||
elif ' in ' in q: | |||||
result += ' ' + q.split(' in ', 1)[1] | |||||
say(q + ' = ' + result[:350], chan) | |||||
else: reply("Sorry, can't calculate that.", chan, nick) | |||||
return | |||||
if command == "dice": | |||||
import random | |||||
try: | |||||
set = range(int(line2[4]), int(line2[5]) + 1) | |||||
except Exception: | |||||
set = range(1, 7) | |||||
num = random.choice(set) | |||||
reply("You rolled a %s." % num, chan, nick) | |||||
if len(set) < 30: | |||||
say("Set consisted of %s." % set, nick) | |||||
else: | |||||
say("Set consisted of %s... and %s others." % (set[:30], len(set) - 30), nick) | |||||
return | |||||
if command == "tock": | |||||
u = urllib.urlopen('http://tycho.usno.navy.mil/cgi-bin/timer.pl') | |||||
info = u.info() | |||||
u.close() | |||||
say('"' + info['Date'] + '" - tycho.usno.navy.mil', chan) | |||||
return | |||||
if command == "beats": | |||||
beats = ((time.time() + 3600) % 86400) / 86.4 | |||||
beats = int(math.floor(beats)) | |||||
say('@%03i' % beats, chan) | |||||
return | |||||
if command == "copyvio" or command == "copy" or command == "copyright": | |||||
url = "http://en.wikipedia.org/wiki/User:EarwigBot/AfC copyvios" | |||||
query = urllib.urlopen(url) | |||||
data = query.read() | |||||
url = "http://toolserver.org/~earwig/earwigbot/pywikipedia/error.txt" | |||||
query = urllib.urlopen(url) | |||||
data2 = query.read() | |||||
if "critical" in data2: | |||||
text = "AfC copyvio situation is CRITICAL: Major disaster." | |||||
elif "exceed" in data2: | |||||
text = "AfC copyvio situation is CRITICAL: Queries exceeded error." | |||||
elif "spam" in data2: | |||||
text = "AfC copyvio situation is CRITICAL: Spamfilter error." | |||||
elif "<h3>" in data: | |||||
text = "AfC copyvio situation is BAD: Unsolved copyvios at [[User:EarwigBot/AfC copyvios]]" | |||||
else: | |||||
text = "AfC copyvio situation is OK: OK." | |||||
reply(text, chan, nick) | |||||
return | |||||
if command == "dict" or command == "dictionary": | |||||
def trim(thing): | |||||
if thing.endswith(' '): | |||||
thing = thing[:-6] | |||||
return thing.strip(' :.') | |||||
r_li = re.compile(r'(?ims)<li>.*?</li>') | |||||
r_tag = re.compile(r'<[^>]+>') | |||||
r_parens = re.compile(r'(?<=\()(?:[^()]+|\([^)]+\))*(?=\))') | |||||
r_word = re.compile(r'^[A-Za-z0-9\' -]+$') | |||||
uri = 'http://encarta.msn.com/dictionary_/%s.html' | |||||
r_info = re.compile(r'(?:ResultBody"><br /><br />(.*?) )|(?:<b>(.*?)</b>)') | |||||
try: | |||||
word = line2[4] | |||||
except Exception: | |||||
reply("Please enter a word.", chan, nick) | |||||
return | |||||
word = urllib.quote(word.encode('utf-8')) | |||||
bytes = web.get(uri % word) | |||||
results = {} | |||||
wordkind = None | |||||
for kind, sense in r_info.findall(bytes): | |||||
kind, sense = trim(kind), trim(sense) | |||||
if kind: wordkind = kind | |||||
elif sense: | |||||
results.setdefault(wordkind, []).append(sense) | |||||
result = word.encode('utf-8') + ' - ' | |||||
for key in sorted(results.keys()): | |||||
if results[key]: | |||||
result += (key or '') + ' 1. ' + results[key][0] | |||||
if len(results[key]) > 1: | |||||
result += ', 2. ' + results[key][1] | |||||
result += '; ' | |||||
result = result.rstrip('; ') | |||||
if result.endswith('-') and (len(result) < 30): | |||||
reply('Sorry, no definition found.', chan, nick) | |||||
else: say(result, chan) | |||||
return | |||||
if command == "ety" or command == "etymology": | |||||
etyuri = 'http://etymonline.com/?term=%s' | |||||
etysearch = 'http://etymonline.com/?search=%s' | |||||
r_definition = re.compile(r'(?ims)<dd[^>]*>.*?</dd>') | |||||
r_tag = re.compile(r'<(?!!)[^>]+>') | |||||
r_whitespace = re.compile(r'[\t\r\n ]+') | |||||
abbrs = [ | |||||
'cf', 'lit', 'etc', 'Ger', 'Du', 'Skt', 'Rus', 'Eng', 'Amer.Eng', 'Sp', | |||||
'Fr', 'N', 'E', 'S', 'W', 'L', 'Gen', 'J.C', 'dial', 'Gk', | |||||
'19c', '18c', '17c', '16c', 'St', 'Capt', 'obs', 'Jan', 'Feb', 'Mar', | |||||
'Apr', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'c', 'tr', 'e', 'g' | |||||
] | |||||
t_sentence = r'^.*?(?<!%s)(?:\.(?= [A-Z0-9]|\Z)|\Z)' | |||||
r_sentence = re.compile(t_sentence % ')(?<!'.join(abbrs)) | |||||
def unescape(s): | |||||
s = s.replace('>', '>') | |||||
s = s.replace('<', '<') | |||||
s = s.replace('&', '&') | |||||
return s | |||||
def text(html): | |||||
html = r_tag.sub('', html) | |||||
html = r_whitespace.sub(' ', html) | |||||
return unescape(html).strip() | |||||
try: | |||||
word = line2[4] | |||||
except Exception: | |||||
reply("Please enter a word.", chan, nick) | |||||
return | |||||
def ety(word): | |||||
if len(word) > 25: | |||||
raise ValueError("Word too long: %s[...]" % word[:10]) | |||||
word = {'axe': 'ax/axe'}.get(word, word) | |||||
bytes = web.get(etyuri % word) | |||||
definitions = r_definition.findall(bytes) | |||||
if not definitions: | |||||
return None | |||||
defn = text(definitions[0]) | |||||
m = r_sentence.match(defn) | |||||
if not m: | |||||
return None | |||||
sentence = m.group(0) | |||||
try: | |||||
sentence = unicode(sentence, 'iso-8859-1') | |||||
sentence = sentence.encode('utf-8') | |||||
except: pass | |||||
maxlength = 275 | |||||
if len(sentence) > maxlength: | |||||
sentence = sentence[:maxlength] | |||||
words = sentence[:-5].split(' ') | |||||
words.pop() | |||||
sentence = ' '.join(words) + ' [...]' | |||||
sentence = '"' + sentence.replace('"', "'") + '"' | |||||
return sentence + ' - ' + (etyuri % word) | |||||
try: | |||||
result = ety(word.encode('utf-8')) | |||||
except IOError: | |||||
msg = "Can't connect to etymonline.com (%s)" % (etyuri % word) | |||||
reply(msg, chan, nick) | |||||
return | |||||
except AttributeError: | |||||
result = None | |||||
if result is not None: | |||||
reply(result, chan, nick) | |||||
else: | |||||
uri = etysearch % word | |||||
msg = 'Can\'t find the etymology for "%s". Try %s' % (word, uri) | |||||
reply(msg, chan, nick) | |||||
return | |||||
if command == "num" or command == "number" or command == "count" or command == "c": | |||||
try: | |||||
params = string.lower(line2[4]) | |||||
except Exception: | |||||
params = False | |||||
if params == "old" or params == "afc" or params == "a": | |||||
number = unicode(int(len(re.findall("title=", urllib.urlopen("http://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Pending_AfC_submissions&cmlimit=500").read()))) - 2) | |||||
reply("There are currently %s pending AfC submissions." % number, chan, nick) | |||||
elif params == "redirect" or params == "redir" or params == "redirs" or params == "redirects" or params == "r": | |||||
redir_data = urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Articles_for_creation/Redirects").read() | |||||
redirs = (string.count(redir_data, "<h2>") - 1) - (string.count(redir_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">')) | |||||
reply("There are currently %s open redirect requests." % redirs, chan, nick) | |||||
elif params == "files" or params == "ffu" or params == "file" or params == "image" or params == "images" or params == "ifu" or params == "f": | |||||
file_data = re.sub("<h2>Contents</h2>", "", urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Files_for_upload").read()) | |||||
files = (string.count(file_data, "<h2>") - 1) - (string.count(file_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">')) | |||||
reply("There are currently %s open file upload requests." % files, chan, nick) | |||||
elif params == "aggregate" or params == "agg": | |||||
subs = unicode(int(len(re.findall("title=", urllib.urlopen("http://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Pending_AfC_submissions&cmlimit=500").read()))) - 2) | |||||
redir_data = urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Articles_for_creation/Redirects").read() | |||||
file_data = re.sub("<h2>Contents</h2>", "", urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Files_for_upload").read()) | |||||
redirs = (string.count(redir_data, "<h2><span class=\"editsection\">")) - (string.count(redir_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">')) | |||||
files = (string.count(file_data, "<h2>") - 1) - (string.count(file_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">')) | |||||
aggregate = (int(subs) * 5) + (int(redirs) * 2) + (int(files) * 2) | |||||
if aggregate == 0: | |||||
stat = "clear" | |||||
elif aggregate < 60: | |||||
stat = "almost clear" | |||||
elif aggregate < 125: | |||||
stat = "small backlog" | |||||
elif aggregate < 175: | |||||
stat = "average backlog" | |||||
elif aggregate < 250: | |||||
stat = "backlogged" | |||||
elif aggregate < 300: | |||||
stat = "heavily backlogged" | |||||
else: | |||||
stat = "severely backlogged" | |||||
reply("Aggregate is currently %s (%s)." % (aggregate, stat), chan, nick) | |||||
else: | |||||
subs = unicode(int(len(re.findall("title=", urllib.urlopen("http://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Pending_AfC_submissions&cmlimit=500").read()))) - 2) | |||||
redir_data = urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Articles_for_creation/Redirects").read() | |||||
file_data = re.sub("<h2>Contents</h2>", "", urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Files_for_upload").read()) | |||||
redirs = (string.count(redir_data, "<h2><span class=\"editsection\">")) - (string.count(redir_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">')) | |||||
files = (string.count(file_data, "<h2>") - 1) - (string.count(file_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">')) | |||||
reply("There are currently %s pending submissions, %s open redirect requests, and %s open file upload requests." % (subs, redirs, files), chan, nick) | |||||
return | |||||
if command == "nick": | |||||
if authy == "owner": | |||||
try: | |||||
new_nick = line2[4] | |||||
except Exception: | |||||
reply("Please specify a nick to change to.", chan, nick) | |||||
return | |||||
s.send("NICK %s\r\n" % new_nick) | |||||
else: | |||||
reply("You aren't authorized to use that command.", chan, nick) | |||||
return | |||||
if command == "op" or command == "deop" or command == "voice" or command == "devoice": | |||||
if authy == "owner" or authy == "admin": | |||||
try: | |||||
user = line2[4] | |||||
except Exception: | |||||
user = nick | |||||
say("%s %s %s" % (command, chan, user), "ChanServ") | |||||
else: | |||||
reply("You aren't authorized to use that command.", chan, nick) | |||||
return | |||||
if command == "pend" or command == "pending": | |||||
say("Pending submissions status page: <http://en.wikipedia.org/wiki/WP:AFC/S>.", chan) | |||||
say("Pending submissions category: <http://en.wikipedia.org/wiki/Category:Pending_AfC_submissions>.", chan) | |||||
return | |||||
if command == "sub" or command == "submissions": | |||||
try: | |||||
number = int(line2[4]) | |||||
except Exception: | |||||
reply("Please enter a number.", chan, nick) | |||||
return | |||||
do_url = False | |||||
try: | |||||
if "url" in line2[5:]: do_url = True | |||||
except Exception: | |||||
pass | |||||
url = "http://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Pending_AfC_submissions&cmlimit=500&cmsort=timestamp" | |||||
query = urllib.urlopen(url) | |||||
data = query.read() | |||||
pages = re.findall("title="(.*?)"", data) | |||||
try: | |||||
pages.remove("Wikipedia:Articles for creation/Redirects") | |||||
except Exception: | |||||
pass | |||||
try: | |||||
pages.remove("Wikipedia:Files for upload") | |||||
except Exception: | |||||
pass | |||||
pages.reverse() | |||||
pages = pages[:number] | |||||
if not do_url: | |||||
s = string.join(pages, "]], [[") | |||||
s = "[[%s]]" % s | |||||
else: | |||||
s = string.join(pages, ">, <http://en.wikipedia.org/wiki/") | |||||
s = "<http://en.wikipedia.org/wiki/%s>" % s | |||||
s = re.sub(" ", "_", s) | |||||
s = re.sub(">,_<", ">, <", s) | |||||
report = "\x02First %s pending AfC submissions:\x0F %s" % (number, s) | |||||
say(report, chan) | |||||
return | |||||
if command == "praise" or command == "leonard" or command == "groovedog" or command == "earwig" or command == "macmed" or command == "cubs197" or command == "sparksboy" or command == "tim_song" or command == "tim" or command == "sausage" or command == "mindstormskid" or command == "mcjohn" or command == "fetchcomms" or command == "blurpeace": | |||||
bad = False | |||||
if command == "leonard": | |||||
special = "AfC redirect reviewer" | |||||
user = "Leonard^Bloom" | |||||
elif command == "groovedog": | |||||
special = "heh" | |||||
user = "GrooveDog" | |||||
elif command == "earwig": | |||||
special = "Python programmer" | |||||
user = "Earwig" | |||||
elif command == "macmed": | |||||
special = "CSD tagger" | |||||
user = "MacMed" | |||||
elif command == "mindstormskid": | |||||
special = "Lego fanatic" | |||||
user = "MindstormsKid" | |||||
elif command == "cubs197": | |||||
special = "IRC dude" | |||||
user = "Cubs197" | |||||
elif command == "sparksboy": | |||||
special = "pet owner" | |||||
user = "SparksBoy" | |||||
elif command == "tim_song" or command == "tim": | |||||
special = "JavaScript programmer" | |||||
user = "Tim_Song" | |||||
elif command == "sausage": | |||||
special = "helper" | |||||
user = "chzz" | |||||
elif command == "mcjohn": | |||||
special = "edit summary writer" | |||||
user = "McJohn" | |||||
elif command == "fetchcomms": | |||||
special = "n00b" | |||||
user = "Fetchcomms" | |||||
elif command == "blurpeace": | |||||
special = "Commons admin" | |||||
user = "Blurpeace" | |||||
else: | |||||
say("Only a true fool would use that command, %s." % nick, chan) | |||||
# say("The users who you can praise are: Leonard^Bloom, GrooveDog, Earwig, MacMed, Cubs197, SparksBoy, MindstormsKid, Chzz, McJohn, Tim_Song, Fetchcomms, and Blurpeace.", chan) | |||||
return | |||||
if not bad: | |||||
say("\x02%s\x0F is the bestest %s evah!" % (user, special), chan) | |||||
if bad: | |||||
say("\x02%s\x0F is worstest %s evah!" % (user, special), chan) | |||||
return | |||||
if command == "trout": | |||||
try: | |||||
user = line2[4] | |||||
user = ' '.join(line2[4:]) | |||||
except Exception: | |||||
reply("Hahahahahahahaha...", chan, nick) | |||||
return | |||||
normal = unicodedata.normalize('NFKD', unicode(string.lower(user))) | |||||
if "itself" in normal: | |||||
reply("I'm not that stupid ;)", chan, nick) | |||||
return | |||||
elif "earwigbot" in normal: | |||||
reply("I'm not that stupid ;)", chan, nick) | |||||
elif "earwig" not in normal and "ear wig" not in normal: | |||||
text = 'slaps %s around a bit with a large trout.' % user | |||||
msg = '\x01ACTION %s\x01' % text | |||||
say(msg, chan) | |||||
else: | |||||
reply("I refuse to hurt anything with \"Earwig\" in its name :P", chan, nick) | |||||
return | |||||
if command == "kill" or command == "destroy" or command == "murder": | |||||
reply("Who do you think I am? The Mafia?", chan, nick) | |||||
return | |||||
if command == "fish": | |||||
try: | |||||
user = line2[4] | |||||
fish = ' '.join(line2[5:]) | |||||
except Exception: | |||||
reply("Hahahahahahahaha...", chan, nick) | |||||
return | |||||
normal = unicodedata.normalize('NFKD', unicode(string.lower(user))) | |||||
if "itself" in normal: | |||||
reply("I'm not that stupid ;)", chan, nick) | |||||
return | |||||
elif "earwigbot" in normal: | |||||
reply("I'm not that stupid ;)", chan, nick) | |||||
elif "earwig" not in normal and "ear wig" not in normal: | |||||
text = 'slaps %s around a bit with a %s.' % (user, fish) | |||||
msg = '\x01ACTION %s\x01' % text | |||||
say(msg, chan) | |||||
else: | |||||
reply("I refuse to hurt anything with \"Earwig\" in its name :P", chan, nick) | |||||
return | |||||
if command == "report": | |||||
def find_status(name="", talk=False): | |||||
enname = re.sub(" ", "_", name) | |||||
if talk == True: | |||||
enname = "Wikipedia_talk:Articles_for_creation/%s" % enname | |||||
if talk == False: | |||||
enname = "Wikipedia:Articles_for_creation/%s" % enname | |||||
url = "http://en.wikipedia.org/w/api.php?action=query&titles=%s&prop=revisions&rvprop=content" % enname | |||||
query = urllib.urlopen(url) | |||||
data = query.read() | |||||
status = "" | |||||
if "{{AFC submission|D" in data or "{{AFC submission|d" in data: | |||||
reason = re.findall("(D|d)\|(.*?)\|", data) | |||||
if reason[0][1] != "reason": | |||||
status = "Declined, reason is '%s'" % reason[0][1] | |||||
if reason[0][1] == "reason": | |||||
status = "Declined, reason is a custom reason" | |||||
if "{{AFC submission|H" in data or "{{AFC submission|h" in data: | |||||
reason = re.findall("(H|h)\|(.*?)\|", data) | |||||
if reason[0][1] != "reason": | |||||
status = "Held, reason is '%s'" % reason[0][1] | |||||
if reason[0][1] == "reason": | |||||
status = "Held, reason is a custom reason" | |||||
if "{{AFC submission||" in data: | |||||
status = "Pending" | |||||
if "{{AFC submission|R" in data or "{{AFC submission|r" in data: | |||||
status = "Reviewing" | |||||
if not status: | |||||
exist = exists(name=enname) | |||||
if exist == True: | |||||
status = "Accepted" | |||||
if exist == False: | |||||
status = "Not found" | |||||
return status | |||||
def exists(name=""): | |||||
url = "http://en.wikipedia.org/wiki/%s" % name | |||||
query = urllib.urlopen(url) | |||||
data = query.read() | |||||
if "Wikipedia does not have a" in data: | |||||
return False | |||||
return True | |||||
def get_submitter(name="", talk=False): | |||||
enname = re.sub(" ", "_", name) | |||||
if talk == True: | |||||
enname = "Wikipedia_talk:Articles_for_creation/%s" % enname | |||||
if talk == False: | |||||
enname = "Wikipedia:Articles_for_creation/%s" % enname | |||||
url = "http://en.wikipedia.org/w/api.php?action=query&titles=%s&prop=revisions&rvprop=user&rvdir=newer&rvlimit=1" % enname | |||||
query = urllib.urlopen(url) | |||||
data = query.read() | |||||
extract = re.findall("user="(.*?)"", data) | |||||
if "anon=" in data: | |||||
anon = True | |||||
else: | |||||
anon = False | |||||
try: | |||||
return extract[0], anon | |||||
except BaseException: | |||||
print extract | |||||
return "", anon | |||||
try: | |||||
rawSub = line2[4] | |||||
rawSub = ' '.join(line2[4:]) | |||||
except Exception: | |||||
reply("You need to specify a submission name in order to use %s!" % command, chan, nick) | |||||
return | |||||
talk = False | |||||
if "[[" in rawSub and "]]" in rawSub: | |||||
name = re.sub("\[\[(.*)\]\]", "\\1", rawSub) | |||||
name = re.sub(" ", "_", name) | |||||
name = urllib.quote(name, ":/") | |||||
name = "http://en.wikipedia.org/wiki/%s" % name | |||||
if "talk:" in name: | |||||
talk = True | |||||
elif "http://" in rawSub: | |||||
name = rawSub | |||||
if "talk:" in name: | |||||
talk = True | |||||
elif "en.wikipedia.org" in rawSub: | |||||
name = "http://%s" % rawSub | |||||
if "talk:" in name: | |||||
talk = True | |||||
elif "Wikipedia:" in rawSub or "Wikipedia_talk:" in rawSub or "Wikipedia talk:" in rawSub: | |||||
name = re.sub(" ", "_", rawSub) | |||||
name = urllib.quote(name, ":/") | |||||
name = "http://en.wikipedia.org/wiki/%s" % name | |||||
if "talk:" in name: | |||||
talk = True | |||||
else: | |||||
url = "http://en.wikipedia.org/wiki/" | |||||
pagename = re.sub(" ", "_", rawSub) | |||||
pagename = urllib.quote(pagename, ":/") | |||||
pagename = "Wikipedia:Articles_for_creation/%s" % pagename | |||||
page = urllib.urlopen("%s%s" % (url, pagename)) | |||||
text = page.read() | |||||
name = "http://en.wikipedia.org/wiki/%s" % pagename | |||||
if "Wikipedia does not have a" in text: | |||||
pagename = re.sub(" ", "_", rawSub) | |||||
pagename = urllib.quote(pagename, ":/") | |||||
pagename = "Wikipedia_talk:Articles_for_creation/%s" % pagename | |||||
page = urllib.urlopen("%s%s" % (url, pagename)) | |||||
name = "http://en.wikipedia.org/wiki/%s" % pagename | |||||
talk = True | |||||
unname = re.sub("http://en.wikipedia.org/wiki/Wikipedia:Articles_for_creation/", "", name) | |||||
unname = re.sub("http://en.wikipedia.org/wiki/Wikipedia_talk:Articles_for_creation/", "", unname) | |||||
unname = re.sub("_", " ", unname) | |||||
if "talk" in unname: | |||||
talk = True | |||||
submitter, anon = get_submitter(name=unname, talk=talk) | |||||
status = find_status(name=unname, talk=talk) | |||||
if submitter != "": | |||||
if anon == True: | |||||
submitter_page = "Special:Contributions/%s" % submitter | |||||
if anon == False: | |||||
unsubmit = re.sub(" ", "_", submitter) | |||||
unsubmit = urllib.quote(unsubmit, ":/") | |||||
submitter_page = "User:%s" % unsubmit | |||||
if status == "Accepted": | |||||
submitterm = "Reviewer" | |||||
else: | |||||
submitterm = "Submitter" | |||||
line1 = "\x02AfC submission report for %s:" % unname | |||||
line2 = "\x02URL: \x0301\x0F%s" % name | |||||
if submitter != "": | |||||
line3 = "\x02%s: \x0F\x0302%s (\x0301\x0Fhttp://en.wikipedia.org/wiki/%s)." % (submitterm, submitter, submitter_page) | |||||
line4 = "\x02Status: \x0F\x0302%s." % status | |||||
say(line1, chan) | |||||
time.sleep(0.1) | |||||
say(line2, chan) | |||||
time.sleep(0.1) | |||||
if submitter != "": | |||||
say(line3, chan) | |||||
time.sleep(0.1) | |||||
say(line4, chan) | |||||
return | |||||
if command == "commands": | |||||
if chan.startswith("#"): | |||||
reply("Please use that command in a private message.", chan, nick) | |||||
return | |||||
others2 = get_commandList().values() | |||||
others = [] | |||||
for com in others2: | |||||
if com == "copyvio" or com == "number" or com == "pending" or com == "report" or com == "submissions" or com == "access" or com == "help" or com == "join" or com == "linker" or com == "nick" or com == "op" or com == "part" or com == "quiet" or com == "quit" or com == "restart" or com == "voice" or com == "welcome" or com == "fish" or com == "praise" or com == "trout" or com == "notes": | |||||
continue | |||||
if com in others: continue | |||||
others.append(com) | |||||
others.sort() | |||||
say("\x02AFC commands:\x0F copyvio, number, pending, report, submissions.", chan) | |||||
time.sleep(0.1) | |||||
say("\x02Bot operation and channel maintaince commands:\x0F access, help, join, linker, nick, op, part, quiet, quit, restart, voice, welcome.", chan) | |||||
time.sleep(0.1) | |||||
say("\x02Fun commands:\x0F fish, praise, trout, and numerous easter eggs", chan) | |||||
time.sleep(0.1) | |||||
say("\x02Other commands:\x0F %s" % ', '.join(others), chan) | |||||
time.sleep(0.1) | |||||
say("The bot maintains a mini-wiki. Type \"!notes help\" for more information.", chan) | |||||
time.sleep(0.1) | |||||
say("See http://enwp.org/User:The_Earwig/Bots/IRC for details. For help on a specific command, type '!help command'.", chan) | |||||
return | |||||
if command == "help" or command == "doc" or command == "documentation": | |||||
try: | |||||
com = line2[4] | |||||
except Exception: | |||||
reply("Hi, I'm a bot that does work for Articles for Creation. You can find information about me at http://enwp.org/User:The_Earwig/Bots/IRC. Say \"!commands\" to me in a private message for some of my abilities. Earwig is my owner and creator, and you can contact him at http://enwp.org/User_talk:The_Earwig.", chan, nick) | |||||
return | |||||
say("Sorry, command documentation has not been implemented yet.", chan) | |||||
return | |||||
if command == "mysql": | |||||
if authy != "owner": | |||||
reply("You aren't authorized to use this command.", chan, nick) | |||||
return | |||||
import MySQLdb | |||||
try: | |||||
strings = line2[4] | |||||
strings = ' '.join(line2[4:]) | |||||
if "db:" in strings: | |||||
database = re.findall("db\:(.*?)\s", strings)[0] | |||||
else: | |||||
database = "enwiki_p" | |||||
if "time:" in strings: | |||||
times = int(re.findall("time\:(.*?)\s", strings)[0]) | |||||
else: | |||||
times = 60 | |||||
file = re.findall("file\:(.*?)\s", strings)[0] | |||||
sqlquery = re.findall("query\:(.*?)\Z", strings)[0] | |||||
except Exception: | |||||
reply("You did not specify enough data for the bot to continue.", chan, nick) | |||||
return | |||||
database2 = database[:-2] + "-p" | |||||
db = MySQLdb.connect(db=database, host="%s.rrdb.toolserver.org" % database2, read_default_file="/home/earwig/.my.cnf") | |||||
db.query(sqlquery) | |||||
r = db.use_result() | |||||
data = r.fetch_row(0) | |||||
try: | |||||
f = codecs.open("/home/earwig/public_html/reports/%s/%s" % (database[:-2], file), 'r') | |||||
reply("A file already exists with that name.", chan, nick) | |||||
return | |||||
except Exception: | |||||
pass | |||||
f = codecs.open("/home/earwig/public_html/reports/%s/%s" % (database[:-2], file), 'a', 'utf-8') | |||||
for line in data: | |||||
new_line = [] | |||||
for l in line: | |||||
new_line.append(str(l)) | |||||
f.write(' '.join(new_line) + "\n") | |||||
f.close() | |||||
reply("Query completed successfully. See http://toolserver.org/~earwig/reports/%s/%s. I will delete the report in %s seconds." % (database[:-2], file, times), chan, nick) | |||||
time.sleep(times) | |||||
os.remove("/home/earwig/public_html/reports/%s/%s" % (database[:-2], file)) | |||||
return | |||||
if command == "remind" or command == "reminder": | |||||
try: | |||||
times = int(line2[4]) | |||||
content = ' '.join(line2[5:]) | |||||
except Exception: | |||||
reply("Please specify a time and a note in the following format: !remind <time> <note>.", chan, nick) | |||||
return | |||||
reply("Set reminder for \"%s\" in %s seconds." % (content, times), chan, nick) | |||||
time.sleep(times) | |||||
reply(content, chan, nick) | |||||
return | |||||
if command == "notes" or command == "note" or command == "about" or command == "data" or command == "database": | |||||
try: | |||||
action = line2[4] | |||||
except BaseException: | |||||
reply("What do you want me to do? Type \"!notes help\" for more information.", chan, nick) | |||||
return | |||||
import MySQLdb | |||||
db = MySQLdb.connect(db="u_earwig_ircbot", host="sql", read_default_file="/home/earwig/.my.cnf") | |||||
specify = ' '.join(line2[5:]) | |||||
if action == "help" or action == "manual": | |||||
shortCommandList = "read, write, change, undo, delete, move, author, category, list, report, developer" | |||||
if specify == "read": | |||||
say("To read an entry, type \"!notes read <entry>\".", chan) | |||||
elif specify == "write": | |||||
say("To write a new entry, type \"!notes write <entry> <content>\". This will create a new entry only if one does not exist, see the below command...", chan) | |||||
elif specify == "change": | |||||
say("To change an entry, type \"!notes change <entry> <new content>\". The old entry will be stored in the database, so it can be undone later.", chan) | |||||
elif specify == "undo": | |||||
say("To undo a change, type \"!notes undo <entry>\".", chan) | |||||
elif specify == "delete": | |||||
say("To delete an entry, type \"!notes delete <entry>\". For security reasons, only bot admins can do this.", chan) | |||||
elif specify == "move": | |||||
say("To move an entry, type \"!notes move <old_title> <new_title>\".", chan) | |||||
elif specify == "author": | |||||
say("To return the author of an entry, type \"!notes author <entry>\".", chan) | |||||
elif specify == "category" or specify == "cat": | |||||
say("To change an entry's category, type \"!notes category <entry> <category>\".", chan) | |||||
elif specify == "list": | |||||
say("To list all categories in the database, type \"!notes list\". Type \"!notes list <category>\" to get all entries in a certain category.", chan) | |||||
elif specify == "report": | |||||
say("To give some statistics about the mini-wiki, including some debugging information, type \"!notes report\" in a PM.", chan) | |||||
elif specify == "developer": | |||||
say("To do developer work, such as writing to the database directly, type \"!notes developer <command>\". This can only be done by the bot owner.", chan) | |||||
else: | |||||
db.query("SELECT * FROM version;") | |||||
r = db.use_result() | |||||
data = r.fetch_row(0) | |||||
version = data[0] | |||||
reply("The Earwig Mini-Wiki: running v%s." % version, chan, nick) | |||||
reply("The full list of commands, for reference, are: %s." % shortCommandList, chan, nick) | |||||
reply("For an explaination of a certain command, type \"!notes help <command>\".", chan, nick) | |||||
reply("You can also access the database from the Toolserver: http://toolserver.org/~earwig/cgi-bin/irc_database.py", chan, nick) | |||||
time.sleep(0.4) | |||||
return | |||||
elif action == "read": | |||||
specify = string.lower(specify) | |||||
if " " in specify: specify = string.split(specify, " ")[0] | |||||
if not specify or "\"" in specify: | |||||
reply("Please include the name of the entry you would like to read after the command, e.g. !notes read earwig", chan, nick) | |||||
return | |||||
try: | |||||
db.query("SELECT entry_content FROM entries WHERE entry_title = \"%s\";" % specify) | |||||
r = db.use_result() | |||||
data = r.fetch_row(0) | |||||
entry = data[0][0] | |||||
say("Entry \"\x02%s\x0F\": %s" % (specify, entry), chan) | |||||
except Exception: | |||||
reply("There is no entry titled \"\x02%s\x0F\"." % specify, chan, nick) | |||||
return | |||||
elif action == "delete" or action == "remove": | |||||
specify = string.lower(specify) | |||||
if " " in specify: specify = string.split(specify, " ")[0] | |||||
if not specify or "\"" in specify: | |||||
reply("Please include the name of the entry you would like to delete after the command, e.g. !notes delete earwig", chan, nick) | |||||
return | |||||
if authy == "owner" or authy == "admin": | |||||
try: | |||||
db.query("DELETE from entries where entry_title = \"%s\";" % specify) | |||||
r = db.use_result() | |||||
db.commit() | |||||
reply("The entry on \"\x02%s\x0F\" has been removed." % specify, chan, nick) | |||||
except Exception: | |||||
phenny.reply("Unable to remove the entry on \"\x02%s\x0F\", because it doesn't exist." % specify, chan, nick) | |||||
else: | |||||
reply("Only bot admins can remove entries.", chan, nick) | |||||
return | |||||
elif action == "developer": | |||||
if authy == "owner": | |||||
db.query(specify) | |||||
r = db.use_result() | |||||
try: | |||||
print r.fetch_row(0) | |||||
except Exception: | |||||
pass | |||||
db.commit() | |||||
reply("Done.", chan, nick) | |||||
else: | |||||
reply("Only the bot owner can modify the raw database.", chan, nick) | |||||
return | |||||
elif action == "write": | |||||
try: | |||||
write = line2[5] | |||||
content = ' '.join(line2[6:]) | |||||
except Exception: | |||||
reply("Please include some content in your entry.", chan, nick) | |||||
return | |||||
db.query("SELECT * from entries WHERE entry_title = \"%s\";" % write) | |||||
r = db.use_result() | |||||
data = r.fetch_row(0) | |||||
if data: | |||||
reply("An entry on %s already exists; please use \"!notes change %s %s\"." % (write, write, content), chan, nick) | |||||
return | |||||
content2 = content.replace('"', '\\' + '"') | |||||
db.query("INSERT INTO entries (entry_title, entry_author, entry_category, entry_content, entry_content_old) VALUES (\"%s\", \"%s\", \"uncategorized\", \"%s\", NULL);" % (write, nick, content2)) | |||||
db.commit() | |||||
reply("You have written an entry titled \"\x02%s\x0F\", with the following content: \"%s\"" % (write, content), chan, nick) | |||||
return | |||||
elif action == "change": | |||||
reply("NotImplementedError", chan, nick) | |||||
elif action == "undo": | |||||
reply("NotImplementedError", chan, nick) | |||||
elif action == "move": | |||||
reply("NotImplementedError", chan, nick) | |||||
elif action == "author": | |||||
try: | |||||
entry = line2[5] | |||||
except Exception: | |||||
reply("Please include the name of the entry you would like to get information for after the command, e.g. !notes author earwig", chan, nick) | |||||
return | |||||
db.query("SELECT entry_author from entries WHERE entry_title = \"%s\";" % entry) | |||||
r = db.use_result() | |||||
data = r.fetch_row(0) | |||||
if data: | |||||
say("The author of \"\x02%s\x0F\" is \x02%s\x0F." % (entry, data[0][0]), chan) | |||||
return | |||||
reply("There is no entry titled \"\x02%s\x0F\"." % entry, chan, nick) | |||||
return | |||||
elif action == "cat" or action == "category": | |||||
reply("NotImplementedError", chan, nick) | |||||
elif action == "list": | |||||
reply("NotImplementedError", chan, nick) | |||||
elif action == "report": | |||||
reply("NotImplementedError", chan, nick) | |||||
if command == "hash": | |||||
import hashlib | |||||
try: | |||||
hashVia = line2[4] | |||||
hashText = line2[5] | |||||
hashText = ' '.join(line2[5:]) | |||||
except Exception: | |||||
reply("Please provide a string and method to hash by.", chan, nick) | |||||
return | |||||
try: | |||||
hashed = eval("hashlib.%s(\"%s\").hexdigest()" % (hashVia, hashText)) | |||||
reply(hashed, chan, nick) | |||||
except Exception: | |||||
try: | |||||
hashing = hashlib.new(hashVia) | |||||
hashing.update(hashText) | |||||
hashed = hashing.hexdigest() | |||||
reply(hashed, chan, nick) | |||||
except Exception: | |||||
reply("Error.", chan, nick) | |||||
if command == "langcode" or command == "lang" or command == "language": | |||||
try: | |||||
lang = line2[4] | |||||
except Exception: | |||||
reply("Please specify an ISO code.", chan, nick) | |||||
return | |||||
data = urllib.urlopen("http://toolserver.org/~earwig/cgi-bin/swmt.py?action=iso").read() | |||||
data = string.split(data, "\n") | |||||
result = False | |||||
for datum in data: | |||||
if datum.startswith(lang): | |||||
result = re.findall(".*? (.*)", datum)[0] | |||||
break | |||||
if result: | |||||
reply(result, chan, nick) | |||||
return | |||||
reply("Not found.", chan, nick) | |||||
return | |||||
if command == "lookup" or command == "ip": | |||||
try: | |||||
hexIP = line2[4] | |||||
except Exception: | |||||
reply("Please specify a hex IP address.", chan, nick) | |||||
return | |||||
hexes = [hexIP[:2], hexIP[2:4], hexIP[4:6], hexIP[6:8]] | |||||
hashes = [] | |||||
for hexHash in hexes: | |||||
newHex = int(hexHash, 16) | |||||
hashes.append(newHex) | |||||
normalizedIP = "%s.%s.%s.%s" % (hashes[0], hashes[1], hashes[2], hashes[3]) | |||||
reply(normalizedIP, chan, nick) | |||||
return |
@@ -1,138 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# Report the status of AFC submissions, either as an automatic message on join or a request via !status. | |||||
import json | |||||
import re | |||||
import urllib | |||||
from config.watcher import * | |||||
from irc.base_command import BaseCommand | |||||
class AFCStatus(BaseCommand): | |||||
def get_hooks(self): | |||||
return ["join", "msg"] | |||||
def get_help(self, command): | |||||
return "Get the number of pending AfC submissions, open redirect requests, and open file upload requests." | |||||
def check(self, data): | |||||
if data.is_command and (data.command == "status" or | |||||
data.command == "count" or data.command == "num" or | |||||
data.command == "number" or data.command == "afc_status"): | |||||
return True | |||||
try: | |||||
if data.line[1] == "JOIN" and data.chan in AFC_CHANS: | |||||
return True | |||||
except IndexError: | |||||
pass | |||||
return False | |||||
def process(self, data): | |||||
if data.line[1] == "JOIN": | |||||
notice = self.get_join_notice() | |||||
self.connection.notice(data.nick, notice) | |||||
return | |||||
if data.args: | |||||
if data.args[0].startswith("sub") or data.args[0] == "s": | |||||
subs = self.count_submissions() | |||||
self.connection.reply(data, "there are currently %s pending AfC submissions." % subs) | |||||
elif data.args[0].startswith("redir") or data.args[0] == "r": | |||||
redirs = self.count_redirects() | |||||
self.connection.reply(data, "there are currently %s open redirect requests." % redirs) | |||||
elif data.args[0].startswith("file") or data.args[0] == "f": | |||||
files = self.count_redirects() | |||||
self.connection.reply(data, "there are currently %s open file upload requests." % files) | |||||
elif data.args[0].startswith("agg") or data.args[0] == "a": | |||||
try: | |||||
agg_num = int(data.args[1]) | |||||
except IndexError: | |||||
agg_data = (self.count_submissions(), self.count_redirects(), self.count_files()) | |||||
agg_num = self.get_aggregate_number(agg_data) | |||||
except ValueError: | |||||
self.connection.reply(data, "\x0303%s\x0301 isn't a number!" % data.args[1]) | |||||
return | |||||
aggregate = self.get_aggregate(agg_num) | |||||
self.connection.reply(data, "aggregate is currently %s (AfC %s)." % (agg_num, aggregate)) | |||||
elif data.args[0].startswith("join") or data.args[0] == "j": | |||||
notice = self.get_join_notice() | |||||
self.connection.reply(data, notice) | |||||
else: | |||||
self.connection.reply(data, "unknown argument: \x0303%s\x0301. Valid args are 'subs', 'redirs', 'files', 'agg', and 'join'." % data.args[0]) | |||||
else: | |||||
subs = self.count_submissions() | |||||
redirs = self.count_redirects() | |||||
files = self.count_files() | |||||
self.connection.reply(data, "there are currently %s pending submissions, %s open redirect requests, and %s open file upload requests." | |||||
% (subs, redirs, files)) | |||||
def get_join_notice(self): | |||||
subs = self.count_submissions() | |||||
redirs = self.count_redirects() | |||||
files = self.count_files() | |||||
agg_num = self.get_aggregate_number((subs, redirs, files)) | |||||
aggregate = self.get_aggregate(agg_num) | |||||
return ("\x02Current status:\x0F Articles for Creation %s (\x0302AFC\x0301: \x0305%s\x0301; \x0302AFC/R\x0301: \x0305%s\x0301; \x0302FFU\x0301: \x0305%s\x0301)" | |||||
% (aggregate, subs, redirs, files)) | |||||
def count_submissions(self): | |||||
params = {'action': 'query', 'list': 'categorymembers', 'cmlimit':'500', 'format': 'json'} | |||||
params['cmtitle'] = "Category:Pending_AfC_submissions" | |||||
data = urllib.urlencode(params) | |||||
raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read() | |||||
res = json.loads(raw) | |||||
subs = len(res['query']['categorymembers']) | |||||
subs -= 2 # remove [[Wikipedia:Articles for creation/Redirects]] and [[Wikipedia:Files for upload]], which aren't real submissions | |||||
return subs | |||||
def count_redirects(self): | |||||
content = self.get_page("Wikipedia:Articles_for_creation/Redirects") | |||||
total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE)) | |||||
closed = content.lower().count("{{afc-c|b}}") | |||||
redirs = total - closed | |||||
return redirs | |||||
def count_files(self): | |||||
content = self.get_page("Wikipedia:Files_for_upload") | |||||
total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE)) | |||||
closed = content.lower().count("{{ifu-c|b}}") | |||||
files = total - closed | |||||
return files | |||||
def get_page(self, pagename): | |||||
params = {'action': 'query', 'prop': 'revisions', 'rvprop':'content', 'rvlimit':'1', 'format': 'json'} | |||||
params['titles'] = pagename | |||||
data = urllib.urlencode(params) | |||||
raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read() | |||||
res = json.loads(raw) | |||||
pageid = res['query']['pages'].keys()[0] | |||||
content = res['query']['pages'][pageid]['revisions'][0]['*'] | |||||
return content | |||||
def get_aggregate(self, num): | |||||
if num == 0: | |||||
agg = "is \x02\x0303clear\x0301\x0F" | |||||
elif num < 60: | |||||
agg = "is \x0303almost clear\x0301" | |||||
elif num < 125: | |||||
agg = "has a \x0312small backlog\x0301" | |||||
elif num < 175: | |||||
agg = "has an \x0307average backlog\x0301" | |||||
elif num < 250: | |||||
agg = "is \x0304backlogged\x0301" | |||||
elif num < 300: | |||||
agg = "is \x02\x0304heavily backlogged\x0301\x0F" | |||||
else: | |||||
agg = "is \x02\x1F\x0304severely backlogged\x0301\x0F" | |||||
return agg | |||||
def get_aggregate_number(self, (subs, redirs, files)): | |||||
num = (subs * 5) + (redirs * 2) + (files * 2) | |||||
return num |
@@ -1,71 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# A somewhat advanced calculator: http://futureboy.us/fsp/frink.fsp. | |||||
import re | |||||
import urllib | |||||
from irc.base_command import BaseCommand | |||||
class Calc(BaseCommand): | |||||
def get_hooks(self): | |||||
return ["msg"] | |||||
def get_help(self, command): | |||||
return "A somewhat advanced calculator: see http://futureboy.us/fsp/frink.fsp for details." | |||||
def check(self, data): | |||||
if data.is_command and data.command == "calc": | |||||
return True | |||||
return False | |||||
def process(self, data): | |||||
if not data.args: | |||||
self.connection.reply(data, "What do you want me to calculate?") | |||||
return | |||||
query = ' '.join(data.args) | |||||
query = self.cleanup(query) | |||||
url = "http://futureboy.us/fsp/frink.fsp?fromVal=%s" % urllib.quote(query) | |||||
result = urllib.urlopen(url).read() | |||||
r_result = re.compile(r'(?i)<A NAME=results>(.*?)</A>') | |||||
r_tag = re.compile(r'<\S+.*?>') | |||||
match = r_result.search(result) | |||||
if not match: | |||||
self.connection.reply(data, "Calculation error.") | |||||
return | |||||
result = match.group(1) | |||||
result = r_tag.sub("", result) # strip span.warning tags | |||||
result = result.replace(">", ">") | |||||
result = result.replace("(undefined symbol)", "(?) ") | |||||
result = result.strip() | |||||
if not result: | |||||
result = '?' | |||||
elif " in " in query: | |||||
result += " " + query.split(" in ", 1)[1] | |||||
res = "%s = %s" % (query, result) | |||||
self.connection.reply(data, res) | |||||
def cleanup(self, query): | |||||
fixes = [ | |||||
(' in ', ' -> '), | |||||
(' over ', ' / '), | |||||
(u'£', 'GBP '), | |||||
(u'€', 'EUR '), | |||||
('\$', 'USD '), | |||||
(r'\bKB\b', 'kilobytes'), | |||||
(r'\bMB\b', 'megabytes'), | |||||
(r'\bGB\b', 'kilobytes'), | |||||
('kbps', '(kilobits / second)'), | |||||
('mbps', '(megabits / second)') | |||||
] | |||||
for original, fix in fixes: | |||||
query = re.sub(original, fix, query) | |||||
return query.strip() |
@@ -1,31 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# Voice/devoice/op/deop users in the channel. | |||||
from irc.base_command import BaseCommand | |||||
from config.irc import * | |||||
class ChanOps(BaseCommand): | |||||
def get_hooks(self): | |||||
return ["msg"] | |||||
def get_help(self, command): | |||||
action = command.capitalize() | |||||
return "%s users in the channel." % action | |||||
def check(self, data): | |||||
if data.is_command and data.command in ["voice", "devoice", "op", "deop"]: | |||||
return True | |||||
return False | |||||
def process(self, data): | |||||
if data.host not in ADMINS: | |||||
self.connection.reply(data, "you must be a bot admin to use this command.") | |||||
return | |||||
if not data.args: # if it is just !op/!devoice/whatever without arguments, assume they want to do this to themselves | |||||
target = data.nick | |||||
else: | |||||
target = data.args[0] | |||||
self.connection.say("ChanServ", "%s %s %s" % (data.command, data.chan, target)) |
@@ -1,160 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# Commands to interface with the bot's git repository; use '!git help' for sub-command list. | |||||
import shlex, subprocess, re | |||||
from config.irc import * | |||||
from irc.base_command import BaseCommand | |||||
class Git(BaseCommand): | |||||
def get_hooks(self): | |||||
return ["msg"] | |||||
def get_help(self, command): | |||||
return "Commands to interface with the bot's git repository; use '!git help' for sub-command list." | |||||
def check(self, data): | |||||
if data.is_command and data.command == "git": | |||||
return True | |||||
return False | |||||
def process(self, data): | |||||
self.data = data | |||||
if data.host not in OWNERS: | |||||
self.connection.reply(data, "you must be a bot owner to use this command.") | |||||
return | |||||
if not data.args: | |||||
self.connection.reply(data, "no arguments provided. Maybe you wanted '!git help'?") | |||||
return | |||||
if data.args[0] == "help": | |||||
self.do_help() | |||||
elif data.args[0] == "branch": | |||||
self.do_branch() | |||||
elif data.args[0] == "branches": | |||||
self.do_branches() | |||||
elif data.args[0] == "checkout": | |||||
self.do_checkout() | |||||
elif data.args[0] == "delete": | |||||
self.do_delete() | |||||
elif data.args[0] == "pull": | |||||
self.do_pull() | |||||
elif data.args[0] == "status": | |||||
self.do_status() | |||||
else: # they asked us to do something we don't know | |||||
self.connection.reply(data, "unknown argument: \x0303%s\x0301." % data.args[0]) | |||||
def exec_shell(self, command): | |||||
"""execute a shell command and get the output""" | |||||
command = shlex.split(command) | |||||
result = subprocess.check_output(command, stderr=subprocess.STDOUT) | |||||
if result: | |||||
result = result[:-1] # strip newline | |||||
return result | |||||
def do_help(self): | |||||
"""display all commands""" | |||||
help_dict = { | |||||
"branch": "get current branch", | |||||
"branches": "get all branches", | |||||
"checkout": "switch branches", | |||||
"delete": "delete an old branch", | |||||
"pull": "update everything from the remote server", | |||||
"status": "check if we are up-to-date", | |||||
} | |||||
keys = help_dict.keys() | |||||
keys.sort() | |||||
help = "" | |||||
for key in keys: | |||||
help += "\x0303%s\x0301 (%s), " % (key, help_dict[key]) | |||||
help = help[:-2] # trim last comma and space | |||||
self.connection.reply(self.data, "sub-commands are: %s." % help) | |||||
def do_branch(self): | |||||
"""get our current branch""" | |||||
branch = self.exec_shell("git name-rev --name-only HEAD") | |||||
self.connection.reply(self.data, "currently on branch \x0302%s\x0301." % branch) | |||||
def do_branches(self): | |||||
"""get list of branches""" | |||||
branches = self.exec_shell("git branch") | |||||
branches = branches.replace('\n* ', ', ') # cleanup extraneous characters | |||||
branches = branches.replace('* ', ' ') | |||||
branches = branches.replace('\n ', ', ') | |||||
branches = branches.strip() | |||||
self.connection.reply(self.data, "branches: \x0302%s\x0301." % branches) | |||||
def do_checkout(self): | |||||
"""switch branches""" | |||||
try: | |||||
branch = self.data.args[1] | |||||
except IndexError: # no branch name provided | |||||
self.connection.reply(self.data, "switch to which branch?") | |||||
return | |||||
try: | |||||
result = self.exec_shell("git checkout %s" % branch) | |||||
if "Already on" in result: | |||||
self.connection.reply(self.data, "already on \x0302%s\x0301!" % branch) | |||||
else: | |||||
current_branch = self.exec_shell("git name-rev --name-only HEAD") | |||||
self.connection.reply(self.data, "switched from branch \x0302%s\x0301 to \x0302%s\x0301." % (current_branch, branch)) | |||||
except subprocess.CalledProcessError: # git couldn't switch branches | |||||
self.connection.reply(self.data, "branch \x0302%s\x0301 doesn't exist!" % branch) | |||||
def do_delete(self): | |||||
"""delete a branch, while making sure that we are not on it""" | |||||
try: | |||||
delete_branch = self.data.args[1] | |||||
except IndexError: # no branch name provided | |||||
self.connection.reply(self.data, "delete which branch?") | |||||
return | |||||
current_branch = self.exec_shell("git name-rev --name-only HEAD") | |||||
if current_branch == delete_branch: | |||||
self.connection.reply(self.data, "you're currently on this branch; please checkout to a different branch before deleting.") | |||||
return | |||||
try: | |||||
self.exec_shell("git branch -d %s" % delete_branch) | |||||
self.connection.reply(self.data, "branch \x0302%s\x0301 has been deleted locally." % delete_branch) | |||||
except subprocess.CalledProcessError: # git couldn't delete | |||||
self.connection.reply(self.data, "branch \x0302%s\x0301 doesn't exist!" % delete_branch) | |||||
def do_pull(self): | |||||
"""pull from remote repository""" | |||||
branch = self.exec_shell("git name-rev --name-only HEAD") | |||||
self.connection.reply(self.data, "pulling from remote (currently on \x0302%s\x0301)..." % branch) | |||||
result = self.exec_shell("git pull") | |||||
if "Already up-to-date." in result: | |||||
self.connection.reply(self.data, "done; no new changes.") | |||||
else: | |||||
changes = re.findall("\s*((.*?)\sfile(.*?)tions?\(-\))", result)[0][0] # find the changes | |||||
try: | |||||
remote = self.exec_shell("git config --get branch.%s.remote" % branch) | |||||
url = self.exec_shell("git config --get remote.%s.url" % remote) | |||||
self.connection.reply(self.data, "done; %s [from %s]." % (changes, url)) | |||||
except subprocess.CalledProcessError: # something in .git/config is not specified correctly, so we cannot get the remote's url | |||||
self.connection.reply(self.data, "done; %s." % changes) | |||||
def do_status(self): | |||||
"""check whether we have anything to pull""" | |||||
last = self.exec_shell("git log -n 1 --pretty=\"%ar\"") | |||||
result = self.exec_shell("git fetch --dry-run") | |||||
if not result: # nothing was fetched, so remote and local are equal | |||||
self.connection.reply(self.data, "last commit was %s. Local copy is \x02up-to-date\x0F with remote." % last) | |||||
else: | |||||
self.connection.reply(self.data, "last local commit was %s. Remote is \x02ahead\x0F of local copy." % last) |
@@ -1,55 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# Generates help information. | |||||
from irc.base_command import BaseCommand | |||||
from irc.data import Data | |||||
from irc import command_handler | |||||
class Help(BaseCommand): | |||||
def get_hooks(self): | |||||
return ["msg"] | |||||
def get_help(self, command): | |||||
return "Generates help information." | |||||
def check(self, data): | |||||
if data.is_command and data.command == "help": | |||||
return True | |||||
return False | |||||
def process(self, data): | |||||
if not data.args: | |||||
self.do_general_help(data) | |||||
else: | |||||
if data.args[0] == "list": | |||||
self.do_list_help(data) | |||||
else: | |||||
self.do_command_help(data) | |||||
def do_general_help(self, data): | |||||
self.connection.reply(data, "I am a bot! You can get help for any command with '!help <command>', or a list of all loaded modules with '!help list'.") | |||||
def do_list_help(self, data): | |||||
commands = command_handler.get_commands() | |||||
cmnds = map(lambda c: c.__class__.__name__, commands) | |||||
pretty_cmnds = ', '.join(cmnds) | |||||
self.connection.reply(data, "%s command classes loaded: %s." % (len(cmnds), pretty_cmnds)) | |||||
def do_command_help(self, data): | |||||
command = data.args[0] | |||||
commands = command_handler.get_commands() | |||||
dummy = Data() # dummy message to test which command classes pick up this command | |||||
dummy.command = command.lower() # lowercase command name | |||||
dummy.is_command = True | |||||
for cmnd in commands: | |||||
if cmnd.check(dummy): | |||||
help = cmnd.get_help(command) | |||||
break | |||||
try: | |||||
self.connection.reply(data, "info for command \x0303%s\x0301: \"%s\"" % (command, help)) | |||||
except UnboundLocalError: | |||||
self.connection.reply(data, "sorry, no help for \x0303%s\x0301." % command) |
@@ -1,65 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# Convert a Wikipedia page name into a URL. | |||||
import re | |||||
from irc.base_command import BaseCommand | |||||
class Link(BaseCommand): | |||||
def get_hooks(self): | |||||
return ["msg"] | |||||
def get_help(self, command): | |||||
return "Convert a Wikipedia page name into a URL." | |||||
def check(self, data): | |||||
if ((data.is_command and data.command == "link") or | |||||
(("[[" in data.msg and "]]" in data.msg) or | |||||
("{{" in data.msg and "}}" in data.msg))): | |||||
return True | |||||
return False | |||||
def process(self, data): | |||||
msg = data.msg | |||||
if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", msg): | |||||
links = self.parse_line(msg) | |||||
links = " , ".join(links) | |||||
self.connection.reply(data, links) | |||||
elif data.command == "link": | |||||
if not data.args: | |||||
self.connection.reply(data, "what do you want me to link to?") | |||||
return | |||||
pagename = ' '.join(data.args) | |||||
link = self.parse_link(pagename) | |||||
self.connection.reply(data, link) | |||||
def parse_line(self, line): | |||||
results = list() | |||||
line = re.sub("\{\{\{(.*?)\}\}\}", "", line) # destroy {{{template parameters}}} | |||||
links = re.findall("(\[\[(.*?)(\||\]\]))", line) # find all [[links]] | |||||
if links: | |||||
links = map(lambda x: x[1], links) # re.findall() returns a list of tuples, but we only want the 2nd item in each tuple | |||||
results.extend(map(self.parse_link, links)) | |||||
templates = re.findall("(\{\{(.*?)(\||\}\}))", line) # find all {{templates}} | |||||
if templates: | |||||
templates = map(lambda x: x[1], templates) | |||||
results.extend(map(self.parse_template, templates)) | |||||
return results | |||||
def parse_link(self, pagename): | |||||
pagename = pagename.strip() | |||||
link = "http://en.wikipedia.org/wiki/" + pagename | |||||
link = link.replace(" ", "_") | |||||
return link | |||||
def parse_template(self, pagename): | |||||
pagename = "Template:%s" % pagename # TODO: implement an actual namespace check | |||||
link = self.parse_link(pagename) | |||||
return link |
@@ -1,124 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# Manage wiki tasks from IRC, and check on thread status. | |||||
import threading, re | |||||
from irc.base_command import BaseCommand | |||||
from irc.data import * | |||||
from wiki import task_manager | |||||
from config.main import * | |||||
from config.irc import * | |||||
class Tasks(BaseCommand): | |||||
def get_hooks(self): | |||||
return ["msg"] | |||||
def get_help(self, command): | |||||
return "Manage wiki tasks from IRC, and check on thread status." | |||||
def check(self, data): | |||||
if data.is_command and data.command in ["tasks", "threads", "tasklist"]: | |||||
return True | |||||
return False | |||||
def process(self, data): | |||||
self.data = data | |||||
if data.host not in OWNERS: | |||||
self.connection.reply(data, "at this time, you must be a bot owner to use this command.") | |||||
return | |||||
if not data.args: | |||||
if data.command == "tasklist": | |||||
self.do_list() | |||||
else: | |||||
self.connection.reply(data, "no arguments provided. Maybe you wanted '!{cmnd} list', '!{cmnd} start', or '!{cmnd} listall'?".format(cmnd=data.command)) | |||||
return | |||||
if data.args[0] == "list": | |||||
self.do_list() | |||||
elif data.args[0] == "start": | |||||
self.do_start() | |||||
elif data.args[0] in ["listall", "all"]: | |||||
self.do_listall() | |||||
else: # they asked us to do something we don't know | |||||
self.connection.reply(data, "unknown argument: \x0303{}\x0301.".format(data.args[0])) | |||||
def do_list(self): | |||||
threads = threading.enumerate() | |||||
normal_threads = [] | |||||
task_threads = [] | |||||
for thread in threads: | |||||
tname = thread.name | |||||
if tname == "MainThread": | |||||
tname = self.get_main_thread_name() | |||||
normal_threads.append("\x0302{}\x0301 (as main thread, id {})".format(tname, thread.ident)) | |||||
elif tname in ["irc-frontend", "irc-watcher", "wiki-scheduler"]: | |||||
normal_threads.append("\x0302{}\x0301 (id {})".format(tname, thread.ident)) | |||||
else: | |||||
tname, start_time = re.findall("^(.*?) \((.*?)\)$", tname)[0] | |||||
task_threads.append("\x0302{}\x0301 (id {}, since {})".format(tname, thread.ident, start_time)) | |||||
if task_threads: | |||||
msg = "\x02{}\x0F threads active: {}, and \x02{}\x0F task threads: {}.".format(len(threads), ', '.join(normal_threads), len(task_threads), ', '.join(task_threads)) | |||||
else: | |||||
msg = "\x02{}\x0F threads active: {}, and \x020\x0F task threads.".format(len(threads), ', '.join(normal_threads)) | |||||
self.connection.reply(self.data, msg) | |||||
def do_listall(self): | |||||
tasks = task_manager.task_list.keys() | |||||
threads = threading.enumerate() | |||||
tasklist = [] | |||||
tasks.sort() | |||||
for task in tasks: | |||||
threads_running_task = [t for t in threads if t.name.startswith(task)] | |||||
ids = map(lambda t: str(t.ident), threads_running_task) | |||||
if not ids: | |||||
tasklist.append("\x0302{}\x0301 (idle)".format(task)) | |||||
elif len(ids) == 1: | |||||
tasklist.append("\x0302{}\x0301 (\x02active\x0F as id {})".format(task, ids[0])) | |||||
else: | |||||
tasklist.append("\x0302{}\x0301 (\x02active\x0F as ids {})".format(task, ', '.join(ids))) | |||||
tasklist = ", ".join(tasklist) | |||||
msg = "{} tasks loaded: {}.".format(len(tasks), tasklist) | |||||
self.connection.reply(self.data, msg) | |||||
def do_start(self): | |||||
data = self.data | |||||
try: | |||||
task_name = data.args[1] | |||||
except IndexError: # no task name given | |||||
self.connection.reply(data, "what task do you want me to start?") | |||||
return | |||||
try: | |||||
data.parse_kwargs() | |||||
except KwargParseException, arg: | |||||
self.connection.reply(data, "error parsing argument: \x0303{}\x0301.".format(arg)) | |||||
return | |||||
if task_name not in task_manager.task_list.keys(): # this task does not exist or hasn't been loaded | |||||
self.connection.reply(data, "task could not be found; either wiki/tasks/{}.py doesn't exist, or it wasn't loaded correctly.".format(task_name)) | |||||
return | |||||
task_manager.start_task(task_name, **data.kwargs) | |||||
self.connection.reply(data, "task \x0302{}\x0301 started.".format(task_name)) | |||||
def get_main_thread_name(self): | |||||
"""Return the "proper" name of the MainThread; e.g. "irc-frontend" or "irc-watcher".""" | |||||
if enable_irc_frontend: | |||||
return "irc-frontend" | |||||
elif enable_wiki_schedule: | |||||
return "wiki-scheduler" | |||||
else: | |||||
return "irc-watcher" |
@@ -1,26 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# A very simple command to test the bot. | |||||
import random | |||||
from irc.base_command import BaseCommand | |||||
class Test(BaseCommand): | |||||
def get_hooks(self): | |||||
return ["msg"] | |||||
def get_help(self, command): | |||||
return "Test the bot!" | |||||
def check(self, data): | |||||
if data.is_command and data.command == "test": | |||||
return True | |||||
return False | |||||
def process(self, data): | |||||
hey = random.randint(0, 1) | |||||
if hey: | |||||
self.connection.say(data.chan, "Hey \x02%s\x0F!" % data.nick) | |||||
else: | |||||
self.connection.say(data.chan, "'sup \x02%s\x0F?" % data.nick) |
@@ -1,75 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# A class to interface with IRC. | |||||
import socket | |||||
import threading | |||||
class BrokenSocketException(Exception): | |||||
"""A socket has broken, because it is not sending data.""" | |||||
pass | |||||
class Connection(object): | |||||
def __init__(self, host=None, port=None, nick=None, ident=None, realname=None): | |||||
"""a class to interface with IRC""" | |||||
self.host = host | |||||
self.port = port | |||||
self.nick = nick | |||||
self.ident = ident | |||||
self.realname = realname | |||||
def connect(self): | |||||
"""connect to IRC""" | |||||
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |||||
self.sock.connect((self.host, self.port)) | |||||
self.send("NICK %s" % self.nick) | |||||
self.send("USER %s %s * :%s" % (self.ident, self.host, self.realname)) | |||||
def close(self): | |||||
"""close our connection with IRC""" | |||||
try: | |||||
self.sock.shutdown(socket.SHUT_RDWR) # shut down connection first | |||||
except socket.error: | |||||
pass # ignore if the socket is already down | |||||
self.sock.close() | |||||
def get(self, size=4096): | |||||
"""receive (get) data from the server""" | |||||
data = self.sock.recv(4096) | |||||
if not data: # socket giving us no data, so it is dead/broken | |||||
raise BrokenSocketException() | |||||
return data | |||||
def send(self, msg): | |||||
"""send data to the server""" | |||||
lock = threading.Lock() | |||||
lock.acquire() # ensure that we only send one message at a time (blocking) | |||||
try: | |||||
self.sock.sendall(msg + "\r\n") | |||||
print " %s" % msg | |||||
finally: | |||||
lock.release() | |||||
def say(self, target, msg): | |||||
"""send a message""" | |||||
self.send("PRIVMSG %s :%s" % (target, msg)) | |||||
def reply(self, data, msg): | |||||
"""send a message as a reply""" | |||||
self.say(data.chan, "%s%s%s: %s" % (chr(2), data.nick, chr(0x0f), msg)) | |||||
def action(self, target, msg): | |||||
"""send a message as an action""" | |||||
self.say(target,"%sACTION %s%s" % (chr(1), msg, chr(1))) | |||||
def notice(self, target, msg): | |||||
"""send a notice""" | |||||
self.send("NOTICE %s :%s" % (target, msg)) | |||||
def join(self, chan): | |||||
"""join a channel""" | |||||
self.send("JOIN %s" % chan) | |||||
def mode(self, chan, level, msg): | |||||
"""send a mode message""" | |||||
self.send("MODE %s %s %s" % (chan, level, msg)) |
@@ -1,55 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# A class to store data from an individual line received on IRC. | |||||
import re | |||||
class KwargParseException(Exception): | |||||
"""Couldn't parse a certain keyword argument in self.args, probably because | |||||
it was given incorrectly: e.g., no value (abc), just a value (=xyz), just | |||||
an equal sign (=), instead of the correct (abc=xyz).""" | |||||
pass | |||||
class Data(object): | |||||
def __init__(self): | |||||
"""store data from an individual line received on IRC""" | |||||
self.line = str() | |||||
self.chan = str() | |||||
self.nick = str() | |||||
self.ident = str() | |||||
self.host = str() | |||||
self.msg = str() | |||||
def parse_args(self): | |||||
"""parse command arguments from self.msg into self.command and self.args""" | |||||
args = self.msg.strip().split(' ') # strip out extra whitespace and split the message into a list | |||||
while '' in args: # remove any empty arguments | |||||
args.remove('') | |||||
self.args = args[1:] # the command arguments | |||||
self.is_command = False # whether this is a real command or not | |||||
try: | |||||
self.command = args[0] # the command itself | |||||
except IndexError: | |||||
self.command = None | |||||
try: | |||||
if self.command.startswith('!') or self.command.startswith('.'): | |||||
self.is_command = True | |||||
self.command = self.command[1:] # strip '!' or '.' | |||||
self.command = self.command.lower() # lowercase command name | |||||
except AttributeError: | |||||
pass | |||||
def parse_kwargs(self): | |||||
"""parse command arguments from self.args, given as !command key1=value1 key2=value2..., into a dict self.kwargs: {'key1': 'value2', 'key2': 'value2'...}""" | |||||
self.kwargs = {} | |||||
for arg in self.args[2:]: | |||||
try: | |||||
key, value = re.findall("^(.*?)\=(.*?)$", arg)[0] | |||||
except IndexError: | |||||
raise KwargParseException(arg) | |||||
if not key or not value: | |||||
raise KwargParseException(arg) | |||||
self.kwargs[key] = value |
@@ -1,75 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
## Imports | |||||
import re, time | |||||
from config.irc import * | |||||
from config.secure import * | |||||
from irc import command_handler | |||||
from irc.connection import * | |||||
from irc.data import Data | |||||
connection = None | |||||
def get_connection(): | |||||
connection = Connection(HOST, PORT, NICK, IDENT, REALNAME) | |||||
return connection | |||||
def startup(conn): | |||||
global connection | |||||
connection = conn | |||||
command_handler.load_commands(connection) | |||||
connection.connect() | |||||
def main(): | |||||
read_buffer = str() | |||||
while 1: | |||||
try: | |||||
read_buffer = read_buffer + connection.get() | |||||
except BrokenSocketException: | |||||
print "Socket has broken on front-end; restarting bot..." | |||||
return | |||||
lines = read_buffer.split("\n") | |||||
read_buffer = lines.pop() | |||||
for line in lines: | |||||
line = line.strip().split() | |||||
data = Data() | |||||
data.line = line | |||||
if line[1] == "JOIN": | |||||
data.nick, data.ident, data.host = re.findall(":(.*?)!(.*?)@(.*?)\Z", line[0])[0] | |||||
data.chan = line[2][1:] | |||||
command_handler.check("join", data) # check if there's anything we can respond to, and if so, respond | |||||
if line[1] == "PRIVMSG": | |||||
data.nick, data.ident, data.host = re.findall(":(.*?)!(.*?)@(.*?)\Z", line[0])[0] | |||||
data.msg = ' '.join(line[3:])[1:] | |||||
data.chan = line[2] | |||||
if data.chan == NICK: # this is a privmsg to us, so set 'chan' as the nick of the sender | |||||
data.chan = data.nick | |||||
command_handler.check("msg_private", data) # only respond if it's a private message | |||||
else: | |||||
command_handler.check("msg_public", data) # only respond if it's a public (channel) message | |||||
command_handler.check("msg", data) # check for general messages | |||||
if data.msg.startswith("!restart"): # hardcode the !restart command (we can't restart from within an ordinary command) | |||||
if data.host in OWNERS: | |||||
print "Restarting bot per owner request..." | |||||
return | |||||
if line[0] == "PING": # If we are pinged, pong back to the server | |||||
connection.send("PONG %s" % line[1]) | |||||
if line[1] == "376": | |||||
if NS_AUTH: # if we're supposed to auth to nickserv, do that | |||||
connection.say("NickServ", "IDENTIFY %s %s" % (NS_USER, NS_PASS)) | |||||
time.sleep(3) # sleep for a bit so we don't join channels un-authed | |||||
for chan in CHANS: # join all of our startup channels | |||||
connection.join(chan) |
@@ -1,57 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# A class to store data on an individual event received from our IRC watcher. | |||||
import re | |||||
class RC(object): | |||||
def __init__(self, msg): | |||||
"""store data on an individual event received from our IRC watcher""" | |||||
self.msg = msg | |||||
def parse(self): | |||||
"""parse recent changes log into some variables""" | |||||
msg = self.msg | |||||
msg = re.sub("\x03([0-9]{1,2}(,[0-9]{1,2})?)?", "", msg) # strip IRC color codes; we don't want/need 'em | |||||
msg = msg.strip() | |||||
self.msg = msg | |||||
self.is_edit = True | |||||
# flags: 'M' for minor edit, 'B' for bot edit, 'create' for a user creation log entry... | |||||
try: | |||||
page, flags, url, user, comment = re.findall("\A\[\[(.*?)\]\]\s(.*?)\s(http://.*?)\s\*\s(.*?)\s\*\s(.*?)\Z", msg)[0] | |||||
except IndexError: # we're probably missing the http:// part, because it's a log entry, which lacks a url | |||||
page, flags, user, comment = re.findall("\A\[\[(.*?)\]\]\s(.*?)\s\*\s(.*?)\s\*\s(.*?)\Z", msg)[0] | |||||
url = "http://en.wikipedia.org/wiki/{}".format(page) | |||||
flags = flags.strip() # flag tends to have a extraneous whitespace character at the end when it's a log entry | |||||
self.is_edit = False # this is a log entry, not edit | |||||
self.page, self.flags, self.url, self.user, self.comment = page, flags, url, user, comment | |||||
def get_pretty(self): | |||||
"""make a nice, colorful message from self.msg to send to the front-end""" | |||||
flags = self.flags | |||||
event_type = flags # "New <event>:" if we don't know exactly what happened | |||||
if "N" in flags: | |||||
event_type = "page" # "New page:" | |||||
elif flags == "delete": | |||||
event_type = "deletion" # "New deletion:" | |||||
elif flags == "protect": | |||||
event_type = "protection" # "New protection:" | |||||
elif flags == "create": | |||||
event_type = "user" # "New user:" | |||||
if self.page == "Special:Log/move": | |||||
event_type = "move" # New move: | |||||
else: | |||||
event_type = "edit" # "New edit:" | |||||
if "B" in flags: | |||||
event_type = "bot {}".format(event_type) # "New bot edit:" | |||||
if "M" in flags: | |||||
event_type = "minor {}".format(event_type) # "New minor edit:" OR "New minor bot edit:" | |||||
if self.is_edit: | |||||
pretty = "\x02New {0}\x0F: \x0314[[\x0307{1}\x0314]]\x0306 *\x0303 {2}\x0306 *\x0302 {3}\x0306 *\x0310 {4}".format(event_type, self.page, self.user, self.url, self.comment) | |||||
else: | |||||
pretty = "\x02New {0}\x0F: \x0303{1}\x0306 *\x0302 {2}\x0306 *\x0310 {3}".format(event_type, self.user, self.url, self.comment) | |||||
return pretty |