Browse Source

Merge branch 'release/0.1'

tags/v0.1^0
Ben Kurtovic 12 years ago
parent
commit
9254158fc5
100 changed files with 10602 additions and 2340 deletions
  1. +5
    -9
      .gitignore
  2. +1
    -2
      LICENSE
  3. +0
    -20
      README
  4. +205
    -0
      README.rst
  5. +0
    -0
     
  6. +0
    -25
      config/irc.py
  7. +0
    -24
      config/main.py
  8. +0
    -28
      config/schedule.py
  9. +0
    -9
      config/secure.default.py
  10. +0
    -69
      config/watcher.py
  11. +0
    -0
     
  12. +0
    -122
      core/main.py
  13. +153
    -0
      docs/Makefile
  14. +9
    -0
      docs/api/earwigbot.commands.rst
  15. +46
    -0
      docs/api/earwigbot.config.rst
  16. +46
    -0
      docs/api/earwigbot.irc.rst
  17. +57
    -0
      docs/api/earwigbot.rst
  18. +16
    -0
      docs/api/earwigbot.tasks.rst
  19. +47
    -0
      docs/api/earwigbot.wiki.copyvios.rst
  20. +59
    -0
      docs/api/earwigbot.wiki.rst
  21. +7
    -0
      docs/api/modules.rst
  22. +242
    -0
      docs/conf.py
  23. +240
    -0
      docs/customizing.rst
  24. +48
    -0
      docs/index.rst
  25. +55
    -0
      docs/installation.rst
  26. +28
    -0
      docs/setup.rst
  27. +46
    -0
      docs/tips.rst
  28. +247
    -0
      docs/toolset.rst
  29. +0
    -22
      earwigbot.py
  30. +68
    -0
      earwigbot/__init__.py
  31. +222
    -0
      earwigbot/bot.py
  32. +122
    -0
      earwigbot/commands/__init__.py
  33. +142
    -0
      earwigbot/commands/access.py
  34. +83
    -0
      earwigbot/commands/calc.py
  35. +91
    -0
      earwigbot/commands/chanops.py
  36. +79
    -0
      earwigbot/commands/crypt.py
  37. +68
    -0
      earwigbot/commands/ctcp.py
  38. +181
    -0
      earwigbot/commands/dictionary.py
  39. +53
    -0
      earwigbot/commands/editcount.py
  40. +71
    -0
      earwigbot/commands/help.py
  41. +101
    -0
      earwigbot/commands/lag.py
  42. +62
    -0
      earwigbot/commands/langcode.py
  43. +79
    -0
      earwigbot/commands/link.py
  44. +319
    -0
      earwigbot/commands/notes.py
  45. +68
    -0
      earwigbot/commands/quit.py
  46. +72
    -0
      earwigbot/commands/registration.py
  47. +62
    -0
      earwigbot/commands/remind.py
  48. +52
    -0
      earwigbot/commands/rights.py
  49. +37
    -0
      earwigbot/commands/test.py
  50. +143
    -0
      earwigbot/commands/threads.py
  51. +65
    -0
      earwigbot/commands/time_command.py
  52. +48
    -0
      earwigbot/commands/trout.py
  53. +347
    -0
      earwigbot/config/__init__.py
  54. +51
    -0
      earwigbot/config/formatter.py
  55. +104
    -0
      earwigbot/config/node.py
  56. +106
    -0
      earwigbot/config/ordered_yaml.py
  57. +176
    -0
      earwigbot/config/permissions.py
  58. +446
    -0
      earwigbot/config/script.py
  59. +256
    -0
      earwigbot/exceptions.py
  60. +27
    -0
      earwigbot/irc/__init__.py
  61. +259
    -0
      earwigbot/irc/connection.py
  62. +212
    -0
      earwigbot/irc/data.py
  63. +86
    -0
      earwigbot/irc/frontend.py
  64. +96
    -0
      earwigbot/irc/rc.py
  65. +129
    -0
      earwigbot/irc/watcher.py
  66. +81
    -0
      earwigbot/lazy.py
  67. +269
    -0
      earwigbot/managers.py
  68. +143
    -0
      earwigbot/tasks/__init__.py
  69. +329
    -0
      earwigbot/tasks/wikiproject_tagger.py
  70. +157
    -0
      earwigbot/util.py
  71. +51
    -0
      earwigbot/wiki/__init__.py
  72. +205
    -0
      earwigbot/wiki/category.py
  73. +61
    -0
      earwigbot/wiki/constants.py
  74. +229
    -0
      earwigbot/wiki/copyvios/__init__.py
  75. +171
    -0
      earwigbot/wiki/copyvios/exclusions.py
  76. +87
    -0
      earwigbot/wiki/copyvios/markov.py
  77. +138
    -0
      earwigbot/wiki/copyvios/parsers.py
  78. +60
    -0
      earwigbot/wiki/copyvios/result.py
  79. +91
    -0
      earwigbot/wiki/copyvios/search.py
  80. +787
    -0
      earwigbot/wiki/page.py
  81. +849
    -0
      earwigbot/wiki/site.py
  82. +438
    -0
      earwigbot/wiki/sitesdb.py
  83. +316
    -0
      earwigbot/wiki/user.py
  84. +0
    -0
     
  85. +0
    -33
      irc/base_command.py
  86. +0
    -66
      irc/command_handler.py
  87. +0
    -0
     
  88. +0
    -979
      irc/commands/_old.py
  89. +0
    -138
      irc/commands/afc_status.py
  90. +0
    -71
      irc/commands/calc.py
  91. +0
    -31
      irc/commands/chanops.py
  92. +0
    -160
      irc/commands/git.py
  93. +0
    -55
      irc/commands/help.py
  94. +0
    -65
      irc/commands/link.py
  95. +0
    -124
      irc/commands/tasks.py
  96. +0
    -26
      irc/commands/test.py
  97. +0
    -75
      irc/connection.py
  98. +0
    -55
      irc/data.py
  99. +0
    -75
      irc/frontend.py
  100. +0
    -57
      irc/rc.py

+ 5
- 9
.gitignore View File

@@ -1,10 +1,6 @@
# Ignore python bytecode:
*.pyc

# Ignore secure config files:
config/secure.py

# Ignore pydev's nonsense:
.project
.pydevproject
.settings/
*.egg
*.egg-info
.DS_Store
build
docs/_build

+ 1
- 2
LICENSE View File

@@ -1,5 +1,4 @@
Copyright (c) 2009-2011 Ben Kurtovic (The Earwig)
<http://en.wikipedia.org/wiki/User:The_Earwig>
Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal


+ 0
- 20
README View File

@@ -1,20 +0,0 @@
EarwigBot[1] is a Python[2] robot that edits Wikipedia.

Development began, based on the Pywikipedia framework[3], in early 2009.
Approval for its fist task, a copyright violation detector[4], was carried out
in May, and the bot has been running consistently ever since (with the
exception of Jan/Feb 2011). It currently handles several ongoing tasks[5],
ranging from statistics generation to category cleanup, and on-demand tasks
such as WikiProject template tagging. Since it started running, the bot has
made over 45,000 edits.

A project to rewrite it from scratch began in early April 2011, thus moving
away from the Pywikipedia framework and allowing for less overall code, better
integration between bot parts, and easier maintenance.

Links:
[1] http://toolserver.org/~earwig/earwigbot/
[2] http://python.org/
[3] http://pywikipediabot.sourceforge.net/
[4] http://en.wikipedia.org/wiki/Wikipedia:Bots/Requests_for_approval/EarwigBot_1
[5] http://en.wikipedia.org/wiki/User:EarwigBot#Tasks

+ 205
- 0
README.rst View File

@@ -0,0 +1,205 @@
EarwigBot
=========

EarwigBot_ is a Python_ robot that edits Wikipedia_ and interacts with people
over IRC_. This file provides a basic overview of how to install and setup the
bot; more detailed information is located in the ``docs/`` directory (available
online at PyPI_).

History
-------

Development began, based on the `Pywikipedia framework`_, in early 2009.
Approval for its fist task, a `copyright violation detector`_, was carried out
in May, and the bot has been running consistently ever since (with the
exception of Jan/Feb 2011). It currently handles `several ongoing tasks`_
ranging from statistics generation to category cleanup, and on-demand tasks
such as WikiProject template tagging. Since it started running, the bot has
made over 50,000 edits.

A project to rewrite it from scratch began in early April 2011, thus moving
away from the Pywikipedia framework and allowing for less overall code, better
integration between bot parts, and easier maintenance.

Installation
------------

This package contains the core ``earwigbot``, abstracted enough that it should
be usable and customizable by anyone running a bot on a MediaWiki site. Since
it is component-based, the IRC components can be disabled if desired. IRC
commands and bot tasks specific to `my instance of EarwigBot`_ that I don't
feel the average user will need are available from the repository
`earwigbot-plugins`_.

It's recommended to run the bot's unit tests before installing. Run ``python
setup.py test`` from the project's root directory. Note that some
tests require an internet connection, and others may take a while to run.
Coverage is currently rather incomplete.

Latest release (v0.1)
~~~~~~~~~~~~~~~~~~~~~

EarwigBot is available from the `Python Package Index`_, so you can install the
latest release with ``pip install earwigbot`` (`get pip`_).

You can also install it from source [1]_ directly::

curl -Lo earwigbot.tgz https://github.com/earwig/earwigbot/tarball/v0.1
tar -xf earwigbot.tgz
cd earwig-earwigbot-*
python setup.py install
cd ..
rm -r earwigbot.tgz earwig-earwigbot-*

Development version
~~~~~~~~~~~~~~~~~~~

You can install the development version of the bot from ``git`` by using
setuptools/distribute's ``develop`` command [1]_, probably on the ``develop``
branch which contains (usually) working code. ``master`` contains the latest
release. EarwigBot uses `git flow`_, so you're free to
browse by tags or by new features (``feature/*`` branches)::

git clone git://github.com/earwig/earwigbot.git earwigbot
cd earwigbot
python setup.py develop

Setup
-----

The bot stores its data in a "working directory", including its config file and
databases. This is also the location where you will place custom IRC commands
and bot tasks, which will be explained later. It doesn't matter where this
directory is, as long as the bot can write to it.

Start the bot with ``earwigbot path/to/working/dir``, or just ``earwigbot`` if
the working directory is the current directory. It will notice that no
``config.yml`` file exists and take you through the setup process.

There is currently no way to edit the ``config.yml`` file from within the bot
after it has been created, but YAML is a very straightforward format, so you
should be able to make any necessary changes yourself. Check out the
`explanation of YAML`_ on Wikipedia for help.

After setup, the bot will start. This means it will connect to the IRC servers
it has been configured for, schedule bot tasks to run at specific times, and
then wait for instructions (as commands on IRC). For a list of commands, say
"``!help``" (commands are messages prefixed with an exclamation mark).

You can stop the bot at any time with Control+C, same as you stop a normal
Python program, and it will try to exit safely. You can also use the
"``!quit``" command on IRC.

Customizing
-----------

The bot's working directory contains a ``commands`` subdirectory and a
``tasks`` subdirectory. Custom IRC commands can be placed in the former,
whereas custom wiki bot tasks go into the latter. Developing custom modules is
explained below, and in more detail through the bot's documentation on PyPI_
(or in the ``docs/`` dir).

Note that custom commands will override built-in commands and tasks with the
same name.

``Bot`` and ``BotConfig``
~~~~~~~~~~~~~~~~~~~~~~~~~

`earwigbot.bot.Bot`_ is EarwigBot's main class. You don't have to instantiate
this yourself, but it's good to be familiar with its attributes and methods,
because it is the main way to communicate with other parts of the bot. A
``Bot`` object is accessible as an attribute of commands and tasks (i.e.,
``self.bot``).

`earwigbot.config.BotConfig`_ stores configuration information for the bot. Its
docstring explains what each attribute is used for, but essentially each "node"
(one of ``config.components``, ``wiki``, ``irc``, ``commands``, ``tasks``, and
``metadata``) maps to a section of the bot's ``config.yml`` file. For example,
if ``config.yml`` includes something like::

irc:
frontend:
nick: MyAwesomeBot
channels:
- "##earwigbot"
- "#channel"
- "#other-channel"

...then ``config.irc["frontend"]["nick"]`` will be ``"MyAwesomeBot"`` and
``config.irc["frontend"]["channels"]`` will be ``["##earwigbot", "#channel",
"#other-channel"]``.

Custom IRC commands
~~~~~~~~~~~~~~~~~~~

Custom commands are subclasses of `earwigbot.commands.Command`_ that override
``Command``'s ``process()`` (and optionally ``check()`` or ``setup()``)
methods.

The bot has a wide selection of built-in commands and plugins to act as sample
code and/or to give ideas. Start with test_, and then check out chanops_ and
afc_status_ for some more complicated scripts.

Custom bot tasks
~~~~~~~~~~~~~~~~

Custom tasks are subclasses of `earwigbot.tasks.Task`_ that override ``Task``'s
``run()`` (and optionally ``setup()``) methods.

See the built-in wikiproject_tagger_ task for a relatively straightforward
task, or the afc_statistics_ plugin for a more complicated one.

The Wiki Toolset
----------------

EarwigBot's answer to the `Pywikipedia framework`_ is the Wiki Toolset
(``earwigbot.wiki``), which you will mainly access through ``bot.wiki``.

``bot.wiki`` provides three methods for the management of Sites -
``get_site()``, ``add_site()``, and ``remove_site()``. Sites are objects that
simply represent a MediaWiki site. A single instance of EarwigBot (i.e. a
single *working directory*) is expected to relate to a single site or group of
sites using the same login info (like all WMF wikis with CentralAuth).

Load your default site (the one that you picked during setup) with
``site = bot.wiki.get_site()``.

Not all aspects of the toolset are covered in the docs. Explore `its code and
docstrings`_ to learn how to use it in a more hands-on fashion. For reference,
``bot.wiki`` is an instance of ``earwigbot.wiki.SitesDB`` tied to the
``sites.db`` file in the bot's working directory.

Footnotes
---------

- Questions, comments, or suggestions about the documentation? `Let me know`_
so I can improve it for other people.

.. [1] ``python setup.py install``/``develop`` may require root, or use the
``--user`` switch to install for the current user only.

.. _EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot
.. _Python: http://python.org/
.. _Wikipedia: http://en.wikipedia.org/
.. _IRC: http://en.wikipedia.org/wiki/Internet_Relay_Chat
.. _PyPI: http://packages.python.org/earwigbot
.. _Pywikipedia framework: http://pywikipediabot.sourceforge.net/
.. _copyright violation detector: http://en.wikipedia.org/wiki/Wikipedia:Bots/Requests_for_approval/EarwigBot_1
.. _several ongoing tasks: http://en.wikipedia.org/wiki/User:EarwigBot#Tasks
.. _my instance of EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot
.. _earwigbot-plugins: https://github.com/earwig/earwigbot-plugins
.. _Python Package Index: http://pypi.python.org
.. _get pip: http://pypi.python.org/pypi/pip
.. _git flow: http://nvie.com/posts/a-successful-git-branching-model/
.. _explanation of YAML: http://en.wikipedia.org/wiki/YAML
.. _earwigbot.bot.Bot: https://github.com/earwig/earwigbot/blob/develop/earwigbot/bot.py
.. _earwigbot.config.BotConfig: https://github.com/earwig/earwigbot/blob/develop/earwigbot/config.py
.. _earwigbot.commands.Command: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/__init__.py
.. _test: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/test.py
.. _chanops: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/chanops.py
.. _afc_status: https://github.com/earwig/earwigbot-plugins/blob/develop/commands/afc_status.py
.. _earwigbot.tasks.Task: https://github.com/earwig/earwigbot/blob/develop/earwigbot/tasks/__init__.py
.. _wikiproject_tagger: https://github.com/earwig/earwigbot/blob/develop/earwigbot/tasks/wikiproject_tagger.py
.. _afc_statistics: https://github.com/earwig/earwigbot-plugins/blob/develop/tasks/afc_statistics.py
.. _its code and docstrings: https://github.com/earwig/earwigbot/tree/develop/earwigbot/wiki
.. _Let me know: ben.kurtovic@verizon.net

+ 0
- 0
View File


+ 0
- 25
config/irc.py View File

@@ -1,25 +0,0 @@
# -*- coding: utf-8 -*-

# EarwigBot Configuration File
# This file contains information that the bot uses to connect to IRC.

# our main (front-end) server's hostname and port
HOST = "irc.freenode.net"
PORT = 6667

# our watcher server's hostname, port, and RC channel
WATCHER_HOST = "irc.wikimedia.org"
WATCHER_PORT = 6667
WATCHER_CHAN = "#en.wikipedia"

# our nick, ident, and real name, used on both servers
NICK = "EarwigBot"
IDENT = "earwigbot"
REALNAME = "[[w:en:User:EarwigBot]]"

# channels to join on main server's startup
CHANS = ["##earwigbot", "##earwig", "#wikipedia-en-afc"]

# hardcoded hostnames of users with certain permissions
OWNERS = ["wikipedia/The-Earwig"] # can use owner-only commands (!restart and !git)
ADMINS = ["wikipedia/The-Earwig", "wikipedia/LeonardBloom"] # can use high-risk commands, e.g. !op

+ 0
- 24
config/main.py View File

@@ -1,24 +0,0 @@
# -*- coding: utf-8 -*-

# EarwigBot Configuration File
# This file tells the bot which of its components should be enabled.

# The IRC frontend (configured in config/irc.py) sits on a public IRC network,
# responds to commands given to it, and reports edits (if the IRC watcher
# component is enabled).
enable_irc_frontend = True

# The IRC watcher (connection details configured in config/irc.py as well) sits
# on an IRC network that gives a recent changes feed, usually irc.wikimedia.net.
# It looks for edits matching certain (often regex) patterns (rules configured
# in config/watcher.py), and either reports them to the IRC frontend (if
# enabled), or activates a task on the WikiBot (if configured to do).
enable_irc_watcher = True

# EarwigBot doesn't have to edit a wiki, although this is its main purpose. If
# the wiki schedule is disabled, it will not be able to handle scheduled tasks
# that involve editing (such as creating a daily category every day at midnight
# UTC), but it can still edit through rules given in the watcher, and bot tasks
# can still be activated by the command line. The schedule is configured in
# config/schedule.py.
enable_wiki_schedule = True

+ 0
- 28
config/schedule.py View File

@@ -1,28 +0,0 @@
# -*- coding: utf-8 -*-

# EarwigBot Configuration File
# This file tells the bot when to run certain wiki-editing tasks.

def check(minute, hour, month_day, month, week_day):
tasks = [] # tasks to run this turn, each as a tuple of (task_name, kwargs) or just task_name

if minute == 0: # run every hour on the hour
tasks.append(("afc_statistics", {"action": "save"})) # save statistics to [[Template:AFC_statistics]]

if hour == 0: # run every day at midnight
tasks.append("afc_dailycats") # create daily categories for WP:AFC
tasks.append("feed_dailycats") # create daily categories for WP:FEED

if week_day == 0: # run every Sunday at midnight (that is, the start of Sunday, not the end)
tasks.append("afc_undated") # clear [[Category:Undated AfC submissions]]

if week_day == 1: # run every Monday at midnight
tasks.append("afc_catdelink") # delink mainspace categories in declined AfC submissions

if week_day == 2: # run every Tuesday at midnight
tasks.append("wrongmime") # tag files whose extensions do not agree with their MIME type

if week_day == 3: # run every Wednesday at midnight
tasks.append("blptag") # add |blp=yes to {{WPB}} or {{WPBS}} when it is used along with {{WP Biography}}

return tasks

+ 0
- 9
config/secure.default.py View File

@@ -1,9 +0,0 @@
# -*- coding: utf-8 -*-

# EarwigBot Configuration File
# This file contains information that should be kept hidden, including passwords.

# IRC: identify ourselves to NickServ?
NS_AUTH = False
NS_USER = ""
NS_PASS = ""

+ 0
- 69
config/watcher.py View File

@@ -1,69 +0,0 @@
# -*- coding: utf-8 -*-

# EarwigBot Configuration File
# This file contains rules for the bot's watcher component.

import re

from wiki import task_manager

# Define different report channels on our front-end server. They /must/ be in CHANS in config/irc.py or the bot will not be able to send messages to them (unless they have -n set).
AFC_CHANS = ["#wikipedia-en-afc"] # report recent AfC changes/give AfC status messages upon join
BOT_CHANS = ["##earwigbot", "#wikipedia-en-afc"] # report edits containing "!earwigbot"

# Define some commonly used strings.
afc_prefix = "wikipedia( talk)?:(wikiproject )?articles for creation"

# Define our compiled regexps used when finding certain edits.
r_page = re.compile(afc_prefix)
r_ffu = re.compile("wikipedia( talk)?:files for upload")
r_move1 = re.compile("moved \[\[{}".format(afc_prefix)) # an AFC page was either moved locally or out
r_move2 = re.compile("moved \[\[(.*?)\]\] to \[\[{}".format(afc_prefix)) # an outside page was moved into AFC
r_moved_pages = re.compile("^moved \[\[(.*?)\]\] to \[\[(.*?)\]\]")
r_delete = re.compile("deleted \"\[\[{}".format(afc_prefix))
r_deleted_page = re.compile("^deleted \"\[\[(.*?)\]\]")
r_restore = re.compile("restored \"\[\[{}".format(afc_prefix))
r_restored_page = re.compile("^restored \"\[\[(.*?)\]\]")
r_protect = re.compile("protected \"\[\[{}".format(afc_prefix))

def process(rc):
chans = set() # channels to report this message to
page_name = rc.page.lower()
comment = rc.comment.lower()
if "!earwigbot" in rc.msg.lower():
chans.update(BOT_CHANS)
if r_page.search(page_name):
task_manager.start_task("afc_statistics", action="process_edit", page=rc.page)
task_manager.start_task("afc_copyvios", action="process_edit", page=rc.page)
chans.update(AFC_CHANS)
elif r_ffu.match(page_name):
chans.update(AFC_CHANS)
elif page_name.startswith("template:afc submission"):
chans.update(AFC_CHANS)
elif rc.flags == "move" and (r_move1.match(comment) or r_move2.match(comment)):
p = r_moved_pages.findall(rc.comment)[0]
task_manager.start_task("afc_statistics", action="process_move", pages=p)
task_manager.start_task("afc_copyvios", action="process_move", pages=p)
chans.update(AFC_CHANS)
elif rc.flags == "delete" and r_delete.match(comment):
p = r_deleted_page.findall(rc.comment)[0][0]
task_manager.start_task("afc_statistics", action="process_delete", page=p)
task_manager.start_task("afc_copyvios", action="process_delete", page=p)
chans.update(AFC_CHANS)
elif rc.flags == "restore" and r_restore.match(comment):
p = r_restored_page.findall(rc.comment)[0][0]
task_manager.start_task("afc_statistics", action="process_restore", page=p)
task_manager.start_task("afc_copyvios", action="process_restore", page=p)
chans.update(AFC_CHANS)
elif rc.flags == "protect" and r_protect.match(comment):
chans.update(AFC_CHANS)

return chans

+ 0
- 0
View File


+ 0
- 122
core/main.py View File

@@ -1,122 +0,0 @@
# -*- coding: utf-8 -*-

## EarwigBot's Core

## EarwigBot has three components that can run independently of each other: an
## IRC front-end, an IRC watcher, and a wiki scheduler.
## * The IRC front-end runs on a normal IRC server and expects users to
## interact with it/give it commands.
## * The IRC watcher runs on a wiki recent-changes server and listens for
## edits. Users cannot interact with this part of the bot.
## * The wiki scheduler runs wiki-editing bot tasks in separate threads at
## user-defined times through a cron-like interface.

## There is a "priority" system here:
## 1. If the IRC frontend is enabled, it will run on the main thread, and the
## IRC watcher and wiki scheduler (if enabled) will run on separate threads.
## 2. If the wiki scheduler is enabled, it will run on the main thread, and the
## IRC watcher (if enabled) will run on a separate thread.
## 3. If the IRC watcher is enabled, it will run on the main (and only) thread.
## Else, the bot will stop, as no components are enabled.

import threading
import time
import traceback
import sys
import os

parent_dir = os.path.split(sys.path[0])[0]
sys.path.append(parent_dir) # make sure we look in the parent directory for modules

from config.main import *
from irc import frontend, watcher
from wiki import task_manager

f_conn = None
w_conn = None

def irc_watcher(f_conn):
"""Function to handle the IRC watcher as another thread (if frontend and/or
scheduler is enabled), otherwise run as the main thread."""
global w_conn
print "\nStarting IRC watcher..."
while 1: # restart the watcher component if (just) it breaks
w_conn = watcher.get_connection()
w_conn.connect()
print # print a blank line here to signify that the bot has finished starting up
try:
watcher.main(w_conn, f_conn)
except:
traceback.print_exc()
time.sleep(5) # sleep a bit before restarting watcher
print "\nWatcher has stopped; restarting component..."

def wiki_scheduler():
"""Function to handle the wiki scheduler as another thread, or as the
primary thread if the IRC frontend is not enabled."""
while 1:
time_start = time.time()
now = time.gmtime(time_start)
task_manager.start_tasks(now)
time_end = time.time()
time_diff = time_start - time_end
if time_diff < 60: # sleep until the next minute
time.sleep(60 - time_diff)

def irc_frontend():
"""If the IRC frontend is enabled, make it run on our primary thread, and
enable the wiki scheduler and IRC watcher on new threads if they are
enabled."""
global f_conn
print "\nStarting IRC frontend..."
f_conn = frontend.get_connection()
frontend.startup(f_conn)
if enable_wiki_schedule:
print "\nStarting wiki scheduler..."
task_manager.load_tasks()
t_scheduler = threading.Thread(target=wiki_scheduler)
t_scheduler.name = "wiki-scheduler"
t_scheduler.daemon = True
t_scheduler.start()
if enable_irc_watcher:
t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,))
t_watcher.name = "irc-watcher"
t_watcher.daemon = True
t_watcher.start()

frontend.main()

if enable_irc_watcher:
w_conn.close()
f_conn.close()
def run():
if enable_irc_frontend: # make the frontend run on our primary thread if enabled, and enable additional components through that function
irc_frontend()
elif enable_wiki_schedule: # the scheduler is enabled - run it on the main thread, but also run the IRC watcher on another thread if it is enabled
print "\nStarting wiki scheduler..."
task_manager.load_tasks()
if enable_irc_watcher:
t_watcher = threading.Thread(target=irc_watcher, args=(f_conn,))
t_watcher.name = "irc-watcher"
t_watcher.daemon = True
t_watcher.start()
wiki_scheduler()
elif enable_irc_watcher: # the IRC watcher is our only enabled component, so run its function only and don't worry about anything else
irc_watcher()
else: # nothing is enabled!
exit("\nNo bot parts are enabled; stopping...")

if __name__ == "__main__":
try:
run()
except KeyboardInterrupt:
exit("\nKeyboardInterrupt: stopping main bot loop.")

+ 153
- 0
docs/Makefile View File

@@ -0,0 +1,153 @@
# Makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build

# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .

.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext

help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"

clean:
-rm -rf $(BUILDDIR)/*

html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."

singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."

pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."

json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."

htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."

qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/EarwigBot.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/EarwigBot.qhc"

devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/EarwigBot"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/EarwigBot"
@echo "# devhelp"

epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."

latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."

latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."

text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."

man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."

texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."

info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."

gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."

changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."

linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."

doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."

+ 9
- 0
docs/api/earwigbot.commands.rst View File

@@ -0,0 +1,9 @@
commands Package
================

:mod:`commands` Package
-----------------------

.. automodule:: earwigbot.commands
:members:
:undoc-members:

+ 46
- 0
docs/api/earwigbot.config.rst View File

@@ -0,0 +1,46 @@
config Package
==============

:mod:`config` Package
---------------------

.. automodule:: earwigbot.config
:members:
:undoc-members:

:mod:`formatter` Module
-----------------------

.. automodule:: earwigbot.config.formatter
:members:
:undoc-members:
:show-inheritance:

:mod:`node` Module
------------------

.. automodule:: earwigbot.config.node
:members:
:undoc-members:

:mod:`ordered_yaml` Module
--------------------------

.. automodule:: earwigbot.config.ordered_yaml
:members:
:undoc-members:
:show-inheritance:

:mod:`permissions` Module
-------------------------

.. automodule:: earwigbot.config.permissions
:members:
:undoc-members:

:mod:`script` Module
--------------------

.. automodule:: earwigbot.config.script
:members:
:undoc-members:

+ 46
- 0
docs/api/earwigbot.irc.rst View File

@@ -0,0 +1,46 @@
irc Package
===========

:mod:`irc` Package
------------------

.. automodule:: earwigbot.irc
:members:
:undoc-members:

:mod:`connection` Module
------------------------

.. automodule:: earwigbot.irc.connection
:members:
:undoc-members:

:mod:`data` Module
------------------

.. automodule:: earwigbot.irc.data
:members:
:undoc-members:

:mod:`frontend` Module
----------------------

.. automodule:: earwigbot.irc.frontend
:members:
:undoc-members:
:show-inheritance:

:mod:`rc` Module
----------------

.. automodule:: earwigbot.irc.rc
:members:
:undoc-members:

:mod:`watcher` Module
---------------------

.. automodule:: earwigbot.irc.watcher
:members:
:undoc-members:
:show-inheritance:

+ 57
- 0
docs/api/earwigbot.rst View File

@@ -0,0 +1,57 @@
earwigbot Package
=================

:mod:`earwigbot` Package
------------------------

.. automodule:: earwigbot.__init__
:members:
:undoc-members:

:mod:`bot` Module
-----------------

.. automodule:: earwigbot.bot
:members:
:undoc-members:

:mod:`exceptions` Module
------------------------

.. automodule:: earwigbot.exceptions
:members:
:undoc-members:
:show-inheritance:

:mod:`lazy` Module
------------------

.. automodule:: earwigbot.lazy
:members:
:undoc-members:

:mod:`managers` Module
----------------------

.. automodule:: earwigbot.managers
:members: _ResourceManager, CommandManager, TaskManager
:undoc-members:
:show-inheritance:

:mod:`util` Module
------------------

.. automodule:: earwigbot.util
:members:
:undoc-members:

Subpackages
-----------

.. toctree::

earwigbot.commands
earwigbot.config
earwigbot.irc
earwigbot.tasks
earwigbot.wiki

+ 16
- 0
docs/api/earwigbot.tasks.rst View File

@@ -0,0 +1,16 @@
tasks Package
=============

:mod:`tasks` Package
--------------------

.. automodule:: earwigbot.tasks
:members:
:undoc-members:

:mod:`wikiproject_tagger` Module
--------------------------------

.. automodule:: earwigbot.tasks.wikiproject_tagger
:members:
:show-inheritance:

+ 47
- 0
docs/api/earwigbot.wiki.copyvios.rst View File

@@ -0,0 +1,47 @@
copyvios Package
================

:mod:`copyvios` Package
-----------------------

.. automodule:: earwigbot.wiki.copyvios
:members:
:undoc-members:

:mod:`exclusions` Module
------------------------

.. automodule:: earwigbot.wiki.copyvios.exclusions
:members:
:undoc-members:

:mod:`markov` Module
--------------------

.. automodule:: earwigbot.wiki.copyvios.markov
:members:
:undoc-members:
:show-inheritance:

:mod:`parsers` Module
---------------------

.. automodule:: earwigbot.wiki.copyvios.parsers
:members:
:undoc-members:
:show-inheritance:

:mod:`result` Module
--------------------

.. automodule:: earwigbot.wiki.copyvios.result
:members:
:undoc-members:

:mod:`search` Module
--------------------

.. automodule:: earwigbot.wiki.copyvios.search
:members:
:undoc-members:
:show-inheritance:

+ 59
- 0
docs/api/earwigbot.wiki.rst View File

@@ -0,0 +1,59 @@
wiki Package
============

:mod:`wiki` Package
-------------------

.. automodule:: earwigbot.wiki
:members:
:undoc-members:

:mod:`category` Module
----------------------

.. automodule:: earwigbot.wiki.category
:members:
:undoc-members:

:mod:`constants` Module
-----------------------

.. automodule:: earwigbot.wiki.constants
:members:
:undoc-members:

:mod:`page` Module
------------------

.. automodule:: earwigbot.wiki.page
:members:
:undoc-members:
:show-inheritance:

:mod:`site` Module
------------------

.. automodule:: earwigbot.wiki.site
:members:
:undoc-members:

:mod:`sitesdb` Module
---------------------

.. automodule:: earwigbot.wiki.sitesdb
:members:
:undoc-members:

:mod:`user` Module
------------------

.. automodule:: earwigbot.wiki.user
:members:
:undoc-members:

Subpackages
-----------

.. toctree::

earwigbot.wiki.copyvios

+ 7
- 0
docs/api/modules.rst View File

@@ -0,0 +1,7 @@
earwigbot
=========

.. toctree::
:maxdepth: 6

earwigbot

+ 242
- 0
docs/conf.py View File

@@ -0,0 +1,242 @@
# -*- coding: utf-8 -*-
#
# EarwigBot documentation build configuration file, created by
# sphinx-quickstart on Sun Apr 29 01:42:25 2012.
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.

import sys, os

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, os.path.abspath('..'))

# -- General configuration -----------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.viewcode']

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# The suffix of source filenames.
source_suffix = '.rst'

# The encoding of source files.
#source_encoding = 'utf-8-sig'

# The master toctree document.
master_doc = 'index'

# General information about the project.
project = u'EarwigBot'
copyright = u'2009, 2010, 2011, 2012 Ben Kurtovic'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '0.1'
# The full version, including alpha/beta/rc tags.
release = '0.1'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None

# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']

# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None

# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True

# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True

# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'

# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []


# -- Options for HTML output ---------------------------------------------------

# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'nature'

# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}

# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []

# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None

# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None

# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None

# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']

# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'

# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True

# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}

# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}

# If false, no module index is generated.
#html_domain_indices = True

# If false, no index is generated.
#html_use_index = True

# If true, the index is split into individual pages for each letter.
#html_split_index = False

# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True

# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True

# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True

# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''

# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None

# Output file base name for HTML help builder.
htmlhelp_basename = 'EarwigBotdoc'


# -- Options for LaTeX output --------------------------------------------------

latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',

# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',

# Additional stuff for the LaTeX preamble.
#'preamble': '',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'EarwigBot.tex', u'EarwigBot Documentation',
u'Ben Kurtovic', 'manual'),
]

# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None

# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False

# If true, show page references after internal links.
#latex_show_pagerefs = False

# If true, show URL addresses after external links.
#latex_show_urls = False

# Documents to append as an appendix to all manuals.
#latex_appendices = []

# If false, no module index is generated.
#latex_domain_indices = True


# -- Options for manual page output --------------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'earwigbot', u'EarwigBot Documentation',
[u'Ben Kurtovic'], 1)
]

# If true, show URL addresses after external links.
#man_show_urls = False


# -- Options for Texinfo output ------------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'EarwigBot', u'EarwigBot Documentation',
u'Ben Kurtovic', 'EarwigBot', 'One line description of project.',
'Miscellaneous'),
]

# Documents to append as an appendix to all manuals.
#texinfo_appendices = []

# If false, no module index is generated.
#texinfo_domain_indices = True

# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'

+ 240
- 0
docs/customizing.rst View File

@@ -0,0 +1,240 @@
Customizing
===========

The bot's working directory contains a :file:`commands` subdirectory and a
:file:`tasks` subdirectory. Custom IRC commands can be placed in the former,
whereas custom wiki bot tasks go into the latter. Developing custom modules is
explained in detail in this documentation.

Note that custom commands will override built-in commands and tasks with the
same name.

:py:class:`~earwigbot.bot.Bot` and :py:class:`~earwigbot.bot.BotConfig`
-----------------------------------------------------------------------

:py:class:`earwigbot.bot.Bot` is EarwigBot's main class. You don't have to
instantiate this yourself, but it's good to be familiar with its attributes and
methods, because it is the main way to communicate with other parts of the bot.
A :py:class:`~earwigbot.bot.Bot` object is accessible as an attribute of
commands and tasks (i.e., :py:attr:`self.bot`).

The most useful attributes are:

- :py:attr:`~earwigbot.bot.Bot.config`: an instance of
:py:class:`~earwigbot.config.BotConfig`, for accessing the bot's
configuration data (see below).

- :py:attr:`~earwigbot.bot.Bot.commands`: the bot's
:py:class:`~earwigbot.managers.CommandManager`, which is used internally to
run IRC commands (through
:py:meth:`commands.call() <earwigbot.managers.CommandManager.call>`, which
you shouldn't have to use); you can safely reload all commands with
:py:meth:`commands.load() <earwigbot.managers._ResourceManager.load>`.

- :py:attr:`~earwigbot.bot.Bot.tasks`: the bot's
:py:class:`~earwigbot.managers.TaskManager`, which can be used to start tasks
with :py:meth:`tasks.start(task_name, **kwargs)
<earwigbot.managers.TaskManager.start>`. :py:meth:`tasks.load()
<earwigbot.managers._ResourceManager.load>` can be used to safely reload all
tasks.

- :py:attr:`~earwigbot.bot.Bot.frontend` /
:py:attr:`~earwigbot.bot.Bot.watcher`: instances of
:py:class:`earwigbot.irc.Frontend <earwigbot.irc.frontend.Frontend>` and
:py:class:`earwigbot.irc.Watcher <earwigbot.irc.watcher.Watcher>`,
respectively, which represent the bot's connections to these two servers; you
can, for example, send a message to the frontend with
:py:meth:`frontend.say(chan, msg)
<earwigbot.irc.connection.IRCConnection.say>` (more on communicating with IRC
below).

- :py:attr:`~earwigbot.bot.Bot.wiki`: interface with the
:doc:`Wiki Toolset <toolset>`.

- Finally, :py:meth:`~earwigbot.bot.Bot.restart` (restarts IRC components and
reloads config, commands, and tasks) and :py:meth:`~earwigbot.bot.Bot.stop`
can be used almost anywhere. Both take an optional "reason" that will be
logged and used as the quit message when disconnecting from IRC.

:py:class:`earwigbot.config.BotConfig` stores configuration information for the
bot. Its docstrings explains what each attribute is used for, but essentially
each "node" (one of :py:attr:`config.components
<earwigbot.config.BotConfig.components>`,
:py:attr:`~earwigbot.config.BotConfig.wiki`,
:py:attr:`~earwigbot.config.BotConfig.irc`,
:py:attr:`~earwigbot.config.BotConfig.commands`,
:py:attr:`~earwigbot.config.BotConfig.tasks`, or
:py:attr:`~earwigbot.config.BotConfig.metadata`) maps to a section
of the bot's :file:`config.yml` file. For example, if :file:`config.yml`
includes something like::

irc:
frontend:
nick: MyAwesomeBot
channels:
- "##earwigbot"
- "#channel"
- "#other-channel"

...then :py:attr:`config.irc["frontend"]["nick"]` will be ``"MyAwesomeBot"``
and :py:attr:`config.irc["frontend"]["channels"]` will be
``["##earwigbot", "#channel", "#other-channel"]``.

Custom IRC commands
-------------------

Custom commands are subclasses of :py:class:`earwigbot.commands.Command` that
override :py:class:`~earwigbot.commands.Command`'s
:py:meth:`~earwigbot.commands.Command.process` (and optionally
:py:meth:`~earwigbot.commands.Command.check` or
:py:meth:`~earwigbot.commands.Command.setup`) methods.

:py:class:`~earwigbot.commands.Command`'s docstrings should explain what each
attribute and method is for and what they should be overridden with, but these
are the basics:

- Class attribute :py:attr:`~earwigbot.commands.Command.name` is the name of
the command. This must be specified.

- Class attribute :py:attr:`~earwigbot.commands.Command.commands` is a list of
names that will trigger this command. It defaults to the command's
:py:attr:`~earwigbot.commands.Command.name`, but you can override it with
multiple names to serve as aliases. This is handled by the default
:py:meth:`~earwigbot.commands.Command.check` implementation (see below), so
if :py:meth:`~earwigbot.commands.Command.check` is overridden, this is
ignored by everything except the help_ command (so ``!help alias`` will
trigger help for the actual command).

- Class attribute :py:attr:`~earwigbot.commands.Command.hooks` is a list of the
"IRC events" that this command might respond to. It defaults to ``["msg"]``,
but options include ``"msg_private"`` (for private messages only),
``"msg_public"`` (for channel messages only), and ``"join"`` (for when a user
joins a channel). See the afc_status_ plugin for a command that responds to
other hook types.

- Method :py:meth:`~earwigbot.commands.Command.setup` is called *once* with no
arguments immediately after the command is first loaded. Does nothing by
default; treat it like an :py:meth:`__init__` if you want
(:py:meth:`~earwigbot.tasks.Command.__init__` does things by default and a
dedicated setup method is often easier than overriding
:py:meth:`~earwigbot.tasks.Command.__init__` and using :py:obj:`super`).

- Method :py:meth:`~earwigbot.commands.Command.check` is passed a
:py:class:`~earwigbot.irc.data.Data` object, and should return ``True`` if
you want to respond to this message, or ``False`` otherwise. The default
behavior is to return ``True`` only if :py:attr:`data.is_command` is ``True``
and :py:attr:`data.command` ``==``
:py:attr:`~earwigbot.commands.Command.name` (or :py:attr:`data.command
<earwigbot.irc.data.Data.command>` is in
:py:attr:`~earwigbot.commands.Command.commands` if that list is overriden;
see above), which is suitable for most cases. A possible reason for
overriding is if you want to do something in response to events from a
specific channel only. Note that by returning ``True``, you prevent any other
commands from responding to this message.

- Method :py:meth:`~earwigbot.commands.Command.process` is passed the same
:py:class:`~earwigbot.irc.data.Data` object as
:py:meth:`~earwigbot.commands.Command.check`, but only if
:py:meth:`~earwigbot.commands.Command.check` returned ``True``. This is where
the bulk of your command goes. To respond to IRC messages, there are a number
of methods of :py:class:`~earwigbot.commands.Command` at your disposal. See
the test_ command for a simple example, or look in
:py:class:`~earwigbot.commands.Command`'s
:py:meth:`~earwigbot.commands.Command.__init__` method for the full list.

The most common ones are :py:meth:`say(chan_or_user, msg)
<earwigbot.irc.connection.IRCConnection.say>`, :py:meth:`reply(data, msg)
<earwigbot.irc.connection.IRCConnection.reply>` (convenience function; sends
a reply to the issuer of the command in the channel it was received),
:py:meth:`action(chan_or_user, msg)
<earwigbot.irc.connection.IRCConnection.action>`,
:py:meth:`notice(chan_or_user, msg)
<earwigbot.irc.connection.IRCConnection.notice>`, :py:meth:`join(chan)
<earwigbot.irc.connection.IRCConnection.join>`, and
:py:meth:`part(chan) <earwigbot.irc.connection.IRCConnection.part>`.

Commands have access to :py:attr:`config.commands[command_name]` for config
information, which is a node in :file:`config.yml` like every other attribute
of :py:attr:`bot.config`. This can be used to store, for example, API keys or
SQL connection info, so that these can be easily changed without modifying the
command itself.

The command *class* doesn't need a specific name, but it should logically
follow the command's name. The filename doesn't matter, but it is recommended
to match the command name for readability. Multiple command classes are allowed
in one file.

The bot has a wide selection of built-in commands and plugins to act as sample
code and/or to give ideas. Start with test_, and then check out chanops_ and
afc_status_ for some more complicated scripts.

Custom bot tasks
----------------

Custom tasks are subclasses of :py:class:`earwigbot.tasks.Task` that
override :py:class:`~earwigbot.tasks.Task`'s
:py:meth:`~earwigbot.tasks.Task.run` (and optionally
:py:meth:`~earwigbot.tasks.Task.setup`) methods.

:py:class:`~earwigbot.tasks.Task`'s docstrings should explain what each
attribute and method is for and what they should be overridden with, but these
are the basics:

- Class attribute :py:attr:`~earwigbot.tasks.Task.name` is the name of the
task. This must be specified.

- Class attribute :py:attr:`~earwigbot.tasks.Task.number` can be used to store
an optional "task number", possibly for use in edit summaries (to be
generated with :py:meth:`~earwigbot.tasks.Task.make_summary`). For
example, EarwigBot's :py:attr:`config.wiki["summary"]` is
``"([[WP:BOT|Bot]]; [[User:EarwigBot#Task $1|Task $1]]): $2"``, which the
task class's :py:meth:`make_summary(comment)
<earwigbot.tasks.Task.make_summary>` method will take and replace
``$1`` with the task number and ``$2`` with the details of the edit.

Additionally, :py:meth:`~earwigbot.tasks.Task.shutoff_enabled` (which checks
whether the bot has been told to stop on-wiki by checking the content of a
particular page) can check a different page for each task using similar
variables. EarwigBot's :py:attr:`config.wiki["shutoff"]["page"]` is
``"User:$1/Shutoff/Task $2"``; ``$1`` is substituted with the bot's username,
and ``$2`` is substituted with the task number, so, e.g., task #14 checks the
page ``[[User:EarwigBot/Shutoff/Task 14]].`` If the page's content does *not*
match :py:attr:`config.wiki["shutoff"]["disabled"]` (``"run"`` by default),
then shutoff is considered to be *enabled* and
:py:meth:`~earwigbot.tasks.Task.shutoff_enabled` will return ``True``,
indicating the task should not run. If you don't intend to use either of
these methods, feel free to leave this attribute blank.

- Method :py:meth:`~earwigbot.tasks.Task.setup` is called *once* with no
arguments immediately after the task is first loaded. Does nothing by
default; treat it like an :py:meth:`__init__` if you want
(:py:meth:`~earwigbot.tasks.Task.__init__` does things by default and a
dedicated setup method is often easier than overriding
:py:meth:`~earwigbot.tasks.Task.__init__` and using :py:obj:`super`).

- Method :py:meth:`~earwigbot.tasks.Task.run` is called with any number of
keyword arguments every time the task is executed (by
:py:meth:`tasks.start(task_name, **kwargs)
<earwigbot.managers.TaskManager.start>`, usually). This is where the bulk of
the task's code goes. For interfacing with MediaWiki sites, read up on the
:doc:`Wiki Toolset <toolset>`.

Tasks have access to :py:attr:`config.tasks[task_name]` for config information,
which is a node in :file:`config.yml` like every other attribute of
:py:attr:`bot.config`. This can be used to store, for example, edit summaries
or templates to append to user talk pages, so that these can be easily changed
without modifying the task itself.

The task *class* doesn't need a specific name, but it should logically follow
the task's name. The filename doesn't matter, but it is recommended to match
the task name for readability. Multiple tasks classes are allowed in one file.

See the built-in wikiproject_tagger_ task for a relatively straightforward
task, or the afc_statistics_ plugin for a more complicated one.

.. _help: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/help.py
.. _afc_status: https://github.com/earwig/earwigbot-plugins/blob/develop/commands/afc_status.py
.. _test: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/test.py
.. _chanops: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/chanops.py
.. _wikiproject_tagger: https://github.com/earwig/earwigbot/blob/develop/earwigbot/tasks/wikiproject_tagger.py
.. _afc_statistics: https://github.com/earwig/earwigbot-plugins/blob/develop/tasks/afc_statistics.py

+ 48
- 0
docs/index.rst View File

@@ -0,0 +1,48 @@
EarwigBot v0.1 Documentation
============================

EarwigBot_ is a Python_ robot that edits Wikipedia_ and interacts with people
over IRC_.

History
-------

Development began, based on the `Pywikipedia framework`_, in early 2009.
Approval for its fist task, a `copyright violation detector`_, was carried out
in May, and the bot has been running consistently ever since (with the
exception of Jan/Feb 2011). It currently handles `several ongoing tasks`_
ranging from statistics generation to category cleanup, and on-demand tasks
such as WikiProject template tagging. Since it started running, the bot has
made over 50,000 edits.

A project to rewrite it from scratch began in early April 2011, thus moving
away from the Pywikipedia framework and allowing for less overall code, better
integration between bot parts, and easier maintenance.

.. _EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot
.. _Python: http://python.org/
.. _Wikipedia: http://en.wikipedia.org/
.. _IRC: http://en.wikipedia.org/wiki/Internet_Relay_Chat
.. _Pywikipedia framework: http://pywikipediabot.sourceforge.net/
.. _copyright violation detector: http://en.wikipedia.org/wiki/Wikipedia:Bots/Requests_for_approval/EarwigBot_1
.. _several ongoing tasks: http://en.wikipedia.org/wiki/User:EarwigBot#Tasks

Contents
--------

.. toctree::
:maxdepth: 2

installation
setup
customizing
toolset
tips
API Reference <api/modules>

Indices and tables
------------------

* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

+ 55
- 0
docs/installation.rst View File

@@ -0,0 +1,55 @@
Installation
============

This package contains the core :py:mod:`earwigbot`, abstracted enough that it
should be usable and customizable by anyone running a bot on a MediaWiki site.
Since it is component-based, the IRC components can be disabled if desired. IRC
commands and bot tasks specific to `my instance of EarwigBot`_ that I don't
feel the average user will need are available from the repository
`earwigbot-plugins`_.

It's recommended to run the bot's unit tests before installing. Run
:command:`python setup.py test` from the project's root directory. Note that
some tests require an internet connection, and others may take a while to run.
Coverage is currently rather incomplete.

Latest release (v0.1)
---------------------

EarwigBot is available from the `Python Package Index`_, so you can install the
latest release with :command:`pip install earwigbot` (`get pip`_).

You can also install it from source [1]_ directly::

curl -Lo earwigbot.tgz https://github.com/earwig/earwigbot/tarball/v0.1
tar -xf earwigbot.tgz
cd earwig-earwigbot-*
python setup.py install
cd ..
rm -r earwigbot.tgz earwig-earwigbot-*

Development version
-------------------

You can install the development version of the bot from :command:`git` by using
setuptools/`distribute`_'s :command:`develop` command [1]_, probably on the
``develop`` branch which contains (usually) working code. ``master`` contains
the latest release. EarwigBot uses `git flow`_, so you're free to browse by
tags or by new features (``feature/*`` branches)::

git clone git://github.com/earwig/earwigbot.git earwigbot
cd earwigbot
python setup.py develop

.. rubric:: Footnotes

.. [1] :command:`python setup.py install`/:command:`develop` may require root,
or use the :command:`--user` switch to install for the current user
only.

.. _my instance of EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot
.. _earwigbot-plugins: https://github.com/earwig/earwigbot-plugins
.. _Python Package Index: http://pypi.python.org
.. _get pip: http://pypi.python.org/pypi/pip
.. _distribute: http://pypi.python.org/pypi/distribute
.. _git flow: http://nvie.com/posts/a-successful-git-branching-model/

+ 28
- 0
docs/setup.rst View File

@@ -0,0 +1,28 @@
Setup
=====

The bot stores its data in a "working directory", including its config file and
databases. This is also the location where you will place custom IRC commands
and bot tasks, which will be explained later. It doesn't matter where this
directory is, as long as the bot can write to it.

Start the bot with :command:`earwigbot path/to/working/dir`, or just
:command:`earwigbot` if the working directory is the current directory. It will
notice that no :file:`config.yml` file exists and take you through the setup
process.

There is currently no way to edit the :file:`config.yml` file from within the
bot after it has been created, but YAML is a very straightforward format, so
you should be able to make any necessary changes yourself. Check out the
`explanation of YAML`_ on Wikipedia for help.

After setup, the bot will start. This means it will connect to the IRC servers
it has been configured for, schedule bot tasks to run at specific times, and
then wait for instructions (as commands on IRC). For a list of commands, say
"``!help``" (commands are messages prefixed with an exclamation mark).

You can stop the bot at any time with :kbd:`Control-c`, same as you stop a
normal Python program, and it will try to exit safely. You can also use the
"``!quit``" command on IRC.

.. _explanation of YAML: http://en.wikipedia.org/wiki/YAML

+ 46
- 0
docs/tips.rst View File

@@ -0,0 +1,46 @@
Tips
====

- Logging_ is a fantastic way to monitor the bot's progress as it runs. It has
a slew of built-in loggers, and enabling log retention (so logs are saved to
:file:`logs/` in the working directory) is highly recommended. In the normal
setup, there are three log files, each of which "rotate" at a specific time
(:file:`filename.log` becomes :file:`filename.log.2012-04-10`, for example).
The :file:`debug.log` file rotates every hour, and maintains six hours of
logs of every level (``DEBUG`` and up). :file:`bot.log` rotates every day at
midnight, and maintains seven days of non-debug logs (``INFO`` and up).
Finally, :file:`error.log` rotates every Sunday night, and maintains four
weeks of logs indicating unexpected events (``WARNING`` and up).

To use logging in your commands or tasks (recommended),
:py:class:~earwigbot.commands.BaseCommand` and
:py:class:~earwigbot.tasks.BaseTask` provide :py:attr:`logger` attributes
configured for the specific command or task. If you're working with other
classes, :py:attr:`bot.logger` is the root logger
(:py:obj:`logging.getLogger("earwigbot")` by default), so you can use
:py:func:`~logging.Logger.getChild` to make your logger. For example, task
loggers are essentially
:py:attr:`bot.logger.getChild("tasks").getChild(task.name) <bot.logger>`.

- A very useful IRC command is "``!reload``", which reloads all commands and
tasks without restarting the bot. [1]_ Combined with using the `!git plugin`_
for pulling repositories from IRC, this can provide a seamless command/task
development workflow if the bot runs on an external server and you set up
its working directory as a git repo.

- You can run a task by itself instead of the entire bot with
:command:`earwigbot path/to/working/dir --task task_name`.

- Questions, comments, or suggestions about the documentation? `Let me know`_,
or `create an issue`_ so I can improve it for other people.

.. rubric:: Footnotes

.. [1] In reality, all this does is call :py:meth:`bot.commands.load()
<earwigbot.managers._ResourceManager.load>` and
:py:meth:`bot.tasks.load() <earwigbot.managers._ResourceManager.load>`!

.. _logging: http://docs.python.org/library/logging.html
.. _!git plugin: https://github.com/earwig/earwigbot-plugins/blob/develop/commands/git.py
.. _Let me know: ben.kurtovic@verizon.net
.. _create an issue: https://github.com/earwig/earwigbot/issues

+ 247
- 0
docs/toolset.rst View File

@@ -0,0 +1,247 @@
The Wiki Toolset
================

EarwigBot's answer to the `Pywikipedia framework`_ is the Wiki Toolset
(:py:mod:`earwigbot.wiki`), which you will mainly access through
:py:attr:`bot.wiki <earwigbot.bot.Bot.wiki>`.

:py:attr:`bot.wiki <earwigbot.bot.Bot.wiki>` provides three methods for the
management of Sites - :py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site`,
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site`, and
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.remove_site`. Sites are objects that
simply represent a MediaWiki site. A single instance of EarwigBot (i.e. a
single *working directory*) is expected to relate to a single site or group of
sites using the same login info (like all WMF wikis with `CentralAuth`_).

Load your default site (the one that you picked during setup) with
``site = bot.wiki.get_site()``.

Dealing with other sites
~~~~~~~~~~~~~~~~~~~~~~~~

*Skip this section if you're only working with one site.*

If a site is *already known to the bot* (meaning that it is stored in the
:file:`sites.db` file, which includes just your default wiki at first), you can
load a site with ``site = bot.wiki.get_site(name)``, where ``name`` might be
``"enwiki"`` or ``"frwiktionary"`` (you can also do
``site = bot.wiki.get_site(project="wikipedia", lang="en")``). Recall that not
giving any arguments to ``get_site()`` will return the default site.

:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site` is used to add new sites to
the sites database. It may be called with similar arguments as
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site`, but the difference is
important. :py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site` only needs
enough information to identify the site in its database, which is usually just
its name; the database stores all other necessary connection info. With
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site`, you need to provide enough
connection info so the toolset can successfully access the site's API/SQL
databases and store that information for later. That might not be much; for WMF
wikis, you can usually use code like this::

project, lang = "wikipedia", "es"
try:
site = bot.wiki.get_site(project=project, lang=lang)
except earwigbot.SiteNotFoundError:
# Load site info from http://es.wikipedia.org/w/api.php:
site = bot.wiki.add_site(project=project, lang=lang)

This works because EarwigBot assumes that the URL for the site is
``"//{lang}.{project}.org"``, the API is at ``/w/api.php``, and the SQL
connection info (if any) is stored as ``config.wiki["sql"]``. This might change
if you're dealing with non-WMF wikis, where the code might look something more
like::

project, lang = "mywiki", "it"
try:
site = bot.wiki.get_site(project=project, lang=lang)
except earwigbot.SiteNotFoundError:
# Load site info from http://mysite.net/mywiki/it/s/api.php:
base_url = "http://mysite.net/" + project + "/" + lang
db_name = lang + project + "_p"
sql = {host: "sql.mysite.net", db: db_name}
site = bot.wiki.add_site(base_url=base_url, script_path="/s", sql=sql)

:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.remove_site` does the opposite of
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site`: give it a site's name or a
project/lang pair like :py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site`
takes, and it'll remove that site from the sites database.

Sites
~~~~~

:py:class:`earwigbot.wiki.Site <earwigbot.wiki.site.Site>` objects provide the
following attributes:

- :py:attr:`~earwigbot.wiki.site.Site.name`: the site's name (or "wikiid"),
like ``"enwiki"``
- :py:attr:`~earwigbot.wiki.site.Site.project`: the site's project name, like
``"wikipedia"``
- :py:attr:`~earwigbot.wiki.site.Site.lang`: the site's language code, like
``"en"``
- :py:attr:`~earwigbot.wiki.site.Site.domain`: the site's web domain, like
``"en.wikipedia.org"``
- :py:attr:`~earwigbot.wiki.site.Site.url`: the site's full base URL, like
``"https://en.wikipedia.org"``

and the following methods:

- :py:meth:`api_query(**kwargs) <earwigbot.wiki.site.Site.api_query>`: does an
API query with the given keyword arguments as params
- :py:meth:`sql_query(query, params=(), ...)
<earwigbot.wiki.site.Site.sql_query>`: does an SQL query and yields its
results (as a generator)
- :py:meth:`~earwigbot.wiki.site.Site.get_replag`: returns the estimated
database replication lag (if we have the site's SQL connection info)
- :py:meth:`namespace_id_to_name(id, all=False)
<earwigbot.wiki.site.Site.namespace_id_to_name>`: given a namespace ID,
returns the primary associated namespace name (or a list of all names when
``all`` is ``True``)
- :py:meth:`namespace_name_to_id(name)
<earwigbot.wiki.site.Site.namespace_name_to_id>`: given a namespace name,
returns the associated namespace ID
- :py:meth:`get_page(title, follow_redirects=False, ...)
<earwigbot.wiki.site.Site.get_page>`: returns a ``Page`` object for the given
title (or a :py:class:`~earwigbot.wiki.category.Category` object if the
page's namespace is "``Category:``")
- :py:meth:`get_category(catname, follow_redirects=False, ...)
<earwigbot.wiki.site.Site.get_category>`: returns a ``Category`` object for
the given title (sans namespace)
- :py:meth:`get_user(username) <earwigbot.wiki.site.Site.get_user>`: returns a
:py:class:`~earwigbot.wiki.user.User` object for the given username
- :py:meth:`delegate(services, ...) <earwigbot.wiki.site.Site.delegate>`:
delegates a task to either the API or SQL depending on various conditions,
such as server lag

Pages and categories
~~~~~~~~~~~~~~~~~~~~

Create :py:class:`earwigbot.wiki.Page <earwigbot.wiki.page.Page>` objects with
:py:meth:`site.get_page(title) <earwigbot.wiki.site.Site.get_page>`,
:py:meth:`page.toggle_talk() <earwigbot.wiki.page.Page.toggle_talk>`,
:py:meth:`user.get_userpage() <earwigbot.wiki.user.User.get_userpage>`, or
:py:meth:`user.get_talkpage() <earwigbot.wiki.user.User.get_talkpage>`. They
provide the following attributes:

- :py:attr:`~earwigbot.wiki.page.Page.site`: the page's corresponding
:py:class:`~earwigbot.wiki.site.Site` object
- :py:attr:`~earwigbot.wiki.page.Page.title`: the page's title, or pagename
- :py:attr:`~earwigbot.wiki.page.Page.exists`: whether or not the page exists
- :py:attr:`~earwigbot.wiki.page.Page.pageid`: an integer ID representing the
page
- :py:attr:`~earwigbot.wiki.page.Page.url`: the page's URL
- :py:attr:`~earwigbot.wiki.page.Page.namespace`: the page's namespace as an
integer
- :py:attr:`~earwigbot.wiki.page.Page.protection`: the page's current
protection status
- :py:attr:`~earwigbot.wiki.page.Page.is_talkpage`: ``True`` if the page is a
talkpage, else ``False``
- :py:attr:`~earwigbot.wiki.page.Page.is_redirect`: ``True`` if the page is a
redirect, else ``False``

and the following methods:

- :py:meth:`~earwigbot.wiki.page.Page.reload`: forcibly reloads the page's
attributes (emphasis on *reload* - this is only necessary if there is reason
to believe they have changed)
- :py:meth:`toggle_talk(...) <earwigbot.wiki.page.Page.toggle_talk>`: returns a
content page's talk page, or vice versa
- :py:meth:`~earwigbot.wiki.page.Page.get`: returns page content
- :py:meth:`~earwigbot.wiki.page.Page.get_redirect_target`: if the page is a
redirect, returns its destination
- :py:meth:`~earwigbot.wiki.page.Page.get_creator`: returns a
:py:class:`~earwigbot.wiki.user.User` object representing the first user to
edit the page
- :py:meth:`edit(text, summary, minor=False, bot=True, force=False)
<earwigbot.wiki.page.Page.edit>`: replaces the page's content with ``text``
or creates a new page
- :py:meth:`add_section(text, title, minor=False, bot=True, force=False)
<earwigbot.wiki.page.Page.add_section>`: adds a new section named ``title``
at the bottom of the page
- :py:meth:`copyvio_check(...)
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>`: checks the page for
copyright violations
- :py:meth:`copyvio_compare(url, ...)
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`: checks the page like
:py:meth:`~earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check`, but
against a specific URL
- :py:meth:`check_exclusion(username=None, optouts=None)
<earwigbot.wiki.page.Page.check_exclusion>`: checks whether or not we are
allowed to edit the page per ``{{bots}}``/``{{nobots}}``

Additionally, :py:class:`~earwigbot.wiki.category.Category` objects (created
with :py:meth:`site.get_category(name) <earwigbot.wiki.site.Site.get_category>`
or :py:meth:`site.get_page(title) <earwigbot.wiki.site.Site.get_page>` where
``title`` is in the ``Category:`` namespace) provide the following additional
attributes:

- :py:attr:`~earwigbot.wiki.category.Category.size`: the total number of
members in the category
- :py:attr:`~earwigbot.wiki.category.Category.pages`: the number of pages in
the category
- :py:attr:`~earwigbot.wiki.category.Category.files`: the number of files in
the category
- :py:attr:`~earwigbot.wiki.category.Category.subcats`: the number of
subcategories in the category

And the following additional method:

- :py:meth:`get_members(limit=None, ...)
<earwigbot.wiki.category.Category.get_members>`: iterates over
:py:class:`~earwigbot.wiki.page.Page`\ s in the category, until either the
category is exhausted or (if given) ``limit`` is reached

Users
~~~~~

Create :py:class:`earwigbot.wiki.User <earwigbot.wiki.user.User>` objects with
:py:meth:`site.get_user(name) <earwigbot.wiki.site.Site.get_user>` or
:py:meth:`page.get_creator() <earwigbot.wiki.page.Page.get_creator>`. They
provide the following attributes:

- :py:attr:`~earwigbot.wiki.user.User.site`: the user's corresponding
:py:class:`~earwigbot.wiki.site.Site` object
- :py:attr:`~earwigbot.wiki.user.User.name`: the user's username
- :py:attr:`~earwigbot.wiki.user.User.exists`: ``True`` if the user exists, or
``False`` if they do not
- :py:attr:`~earwigbot.wiki.user.User.userid`: an integer ID representing the
user
- :py:attr:`~earwigbot.wiki.user.User.blockinfo`: information about any current
blocks on the user (``False`` if no block, or a dict of
``{"by": blocking_user, "reason": block_reason,
"expiry": block_expire_time}``)
- :py:attr:`~earwigbot.wiki.user.User.groups`: a list of the user's groups
- :py:attr:`~earwigbot.wiki.user.User.rights`: a list of the user's rights
- :py:attr:`~earwigbot.wiki.user.User.editcount`: the number of edits made by
the user
- :py:attr:`~earwigbot.wiki.user.User.registration`: the time the user
registered as a :py:obj:`time.struct_time`
- :py:attr:`~earwigbot.wiki.user.User.emailable`: ``True`` if you can email the
user, ``False`` if you cannot
- :py:attr:`~earwigbot.wiki.user.User.gender`: the user's gender (``"male"``,
``"female"``, or ``"unknown"``)
- :py:attr:`~earwigbot.wiki.user.User.is_ip`: ``True`` if the user is an IP
address, IPv4 or IPv6, otherwise ``False``

and the following methods:

- :py:meth:`~earwigbot.wiki.user.User.reload`: forcibly reloads the user's
attributes (emphasis on *reload* - this is only necessary if there is reason
to believe they have changed)
- :py:meth:`~earwigbot.wiki.user.User.get_userpage`: returns a
:py:class:`~earwigbot.wiki.page.Page` object representing the user's userpage
- :py:meth:`~earwigbot.wiki.user.User.get_talkpage`: returns a
:py:class:`~earwigbot.wiki.page.Page` object representing the user's talkpage

Additional features
~~~~~~~~~~~~~~~~~~~

Not all aspects of the toolset are covered here. Explore `its code and
docstrings`_ to learn how to use it in a more hands-on fashion. For reference,
:py:attr:`bot.wiki <earwigbot.bot.Bot.wiki>` is an instance of
:py:class:`earwigbot.wiki.SitesDB <earwigbot.wiki.sitesdb.SitesDB>` tied to the
:file:`sites.db` file in the bot's working directory.

.. _Pywikipedia framework: http://pywikipediabot.sourceforge.net/
.. _CentralAuth: http://www.mediawiki.org/wiki/Extension:CentralAuth
.. _its code and docstrings: https://github.com/earwig/earwigbot/tree/develop/earwigbot/wiki

+ 0
- 22
earwigbot.py View File

@@ -1,22 +0,0 @@
# -*- coding: utf-8 -*-

import time
from subprocess import *

try:
from config import irc, main, schedule, secure, watcher
except ImportError:
print """Missing a config file! Make sure you have configured the bot. All *.py.default files in config/
should have their .default extension removed, and the info inside should be corrected."""
exit()

def main():
while 1:
call(['python', 'core/main.py'])
time.sleep(5) # sleep for five seconds between bot runs

if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
exit("\nKeyboardInterrupt: stopping bot wrapper.")

+ 68
- 0
earwigbot/__init__.py View File

@@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""
`EarwigBot <https://github.com/earwig/earwigbot>`_ is a Python robot that edits
Wikipedia and interacts with people over IRC.

See :file:`README.rst` for an overview, or the :file:`docs/` directory for
details. This documentation is also available `online
<http://packages.python.org/earwigbot>`_.
"""

__author__ = "Ben Kurtovic"
__copyright__ = "Copyright (C) 2009, 2010, 2011, 2012 Ben Kurtovic"
__license__ = "MIT License"
__version__ = "0.1"
__email__ = "ben.kurtovic@verizon.net"
__release__ = True

if not __release__:
def _get_git_commit_id():
"""Return the ID of the git HEAD commit."""
from git import Repo
from os.path import split, dirname
path = split(dirname(__file__))[0]
commit_id = Repo(path).head.object.hexsha
return commit_id[:8]
try:
__version__ += ".git+" + _get_git_commit_id()
except Exception:
pass
finally:
del _get_git_commit_id

from earwigbot import lazy

importer = lazy.LazyImporter()

bot = importer.new("earwigbot.bot")
commands = importer.new("earwigbot.commands")
config = importer.new("earwigbot.config")
exceptions = importer.new("earwigbot.exceptions")
irc = importer.new("earwigbot.irc")
managers = importer.new("earwigbot.managers")
tasks = importer.new("earwigbot.tasks")
util = importer.new("earwigbot.util")
wiki = importer.new("earwigbot.wiki")

del importer

+ 222
- 0
earwigbot/bot.py View File

@@ -0,0 +1,222 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import logging
from threading import Lock, Thread, enumerate as enumerate_threads
from time import sleep, time

from earwigbot import __version__
from earwigbot.config import BotConfig
from earwigbot.irc import Frontend, Watcher
from earwigbot.managers import CommandManager, TaskManager
from earwigbot.wiki import SitesDB

__all__ = ["Bot"]

class Bot(object):
"""
**EarwigBot: Main Bot Class**

The :py:class:`Bot` class is the core of EarwigBot, essentially responsible
for starting the various bot components and making sure they are all happy.

EarwigBot has three components that can run independently of each other: an
IRC front-end, an IRC watcher, and a wiki scheduler.

- The IRC front-end runs on a normal IRC server and expects users to
interact with it/give it commands.
- The IRC watcher runs on a wiki recent-changes server and listens for
edits. Users cannot interact with this part of the bot.
- The wiki scheduler runs wiki-editing bot tasks in separate threads at
user-defined times through a cron-like interface.

The :py:class:`Bot` object is accessible from within commands and tasks as
:py:attr:`self.bot`. This is the primary way to access data from other
components of the bot. For example, our
:py:class:`~earwigbot.config.BotConfig` object is accessable from
:py:attr:`bot.config`, tasks can be started with
:py:meth:`bot.tasks.start() <earwigbot.managers.TaskManager.start>`, and
sites can be loaded from the wiki toolset with
:py:meth:`bot.wiki.get_site() <earwigbot.wiki.sitesdb.SitesDB.get_site>`.
"""

def __init__(self, root_dir, level=logging.INFO):
self.config = BotConfig(self, root_dir, level)
self.logger = logging.getLogger("earwigbot")
self.commands = CommandManager(self)
self.tasks = TaskManager(self)
self.wiki = SitesDB(self)
self.frontend = None
self.watcher = None

self.component_lock = Lock()
self._keep_looping = True

self.config.load()
self.commands.load()
self.tasks.load()

def __repr__(self):
"""Return the canonical string representation of the Bot."""
return "Bot(config={0!r})".format(self.config)

def __str__(self):
"""Return a nice string representation of the Bot."""
return "<Bot at {0}>".format(self.config.root_dir)

def _dispatch_irc_component(self, name, klass):
"""Create a new IRC component, record it internally, and start it."""
component = klass(self)
setattr(self, name, component)
Thread(name="irc_" + name, target=component.loop).start()

def _start_irc_components(self):
"""Start the IRC frontend/watcher in separate threads if enabled."""
if self.config.components.get("irc_frontend"):
self.logger.info("Starting IRC frontend")
self._dispatch_irc_component("frontend", Frontend)
if self.config.components.get("irc_watcher"):
self.logger.info("Starting IRC watcher")
self._dispatch_irc_component("watcher", Watcher)

def _start_wiki_scheduler(self):
"""Start the wiki scheduler in a separate thread if enabled."""
def wiki_scheduler():
while self._keep_looping:
time_start = time()
self.tasks.schedule()
time_end = time()
time_diff = time_start - time_end
if time_diff < 60: # Sleep until the next minute
sleep(60 - time_diff)

if self.config.components.get("wiki_scheduler"):
self.logger.info("Starting wiki scheduler")
thread = Thread(name="wiki_scheduler", target=wiki_scheduler)
thread.daemon = True # Stop if other threads stop
thread.start()

def _keep_irc_component_alive(self, name, klass):
"""Ensure that IRC components stay connected, else restart them."""
component = getattr(self, name)
if component:
component.keep_alive()
if component.is_stopped():
log = "IRC {0} has stopped; restarting".format(name)
self.logger.warn(log)
self._dispatch_irc_component(name, klass)

def _stop_irc_components(self, msg):
"""Request the IRC frontend and watcher to stop if enabled."""
if self.frontend:
self.frontend.stop(msg)
if self.watcher:
self.watcher.stop(msg)

def _stop_daemon_threads(self):
"""Notify the user of which threads are going to be killed.

Unfortunately, there is no method right now of stopping command and
task threads safely. This is because there is no way to tell them to
stop like the IRC components can be told; furthermore, they are run as
daemons, and daemon threads automatically stop without calling any
__exit__ or try/finally code when all non-daemon threads stop. They
were originally implemented as regular non-daemon threads, but this
meant there was no way to completely stop the bot if tasks were
running, because all other threads would exit and threading would
absorb KeyboardInterrupts.

The advantage of this is that stopping the bot is truly guarenteed to
*stop* the bot, while the disadvantage is that the threads are given no
advance warning of their forced shutdown.
"""
tasks = []
component_names = self.config.components.keys()
skips = component_names + ["MainThread", "reminder", "irc:quit"]
for thread in enumerate_threads():
if thread.name not in skips and thread.is_alive():
tasks.append(thread.name)
if tasks:
log = "The following commands or tasks will be killed: {0}"
self.logger.warn(log.format(" ".join(tasks)))

@property
def is_running(self):
"""Whether or not the bot is currently running.

This may return ``False`` even if the bot is still technically active,
but in the process of shutting down.
"""
return self._keep_looping

def run(self):
"""Main entry point into running the bot.

Starts all config-enabled components and then enters an idle loop,
ensuring that all components remain online and restarting components
that get disconnected from their servers.
"""
self.logger.info("Starting bot (EarwigBot {0})".format(__version__))
self._start_irc_components()
self._start_wiki_scheduler()
while self._keep_looping:
with self.component_lock:
self._keep_irc_component_alive("frontend", Frontend)
self._keep_irc_component_alive("watcher", Watcher)
sleep(2)

def restart(self, msg=None):
"""Reload config, commands, tasks, and safely restart IRC components.

This is thread-safe, and it will gracefully stop IRC components before
reloading anything. Note that you can safely reload commands or tasks
without restarting the bot with :py:meth:`bot.commands.load()
<earwigbot.managers._ResourceManager.load>` or
:py:meth:`bot.tasks.load() <earwigbot.managers._ResourceManager.load>`.
These should not interfere with running components or tasks.

If given, *msg* will be used as our quit message.
"""
if msg:
self.logger.info('Restarting bot ("{0}")'.format(msg))
else:
self.logger.info("Restarting bot")
with self.component_lock:
self._stop_irc_components(msg)
self.config.load()
self.commands.load()
self.tasks.load()
self._start_irc_components()

def stop(self, msg=None):
"""Gracefully stop all bot components.

If given, *msg* will be used as our quit message.
"""
if msg:
self.logger.info('Stopping bot ("{0}")'.format(msg))
else:
self.logger.info("Stopping bot")
with self.component_lock:
self._stop_irc_components(msg)
self._keep_looping = False
self._stop_daemon_threads()

+ 122
- 0
earwigbot/commands/__init__.py View File

@@ -0,0 +1,122 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

__all__ = ["Command"]

class Command(object):
"""
**EarwigBot: Base IRC Command**

This package provides built-in IRC "commands" used by the bot's front-end
component. Additional commands can be installed as plugins in the bot's
working directory.

This class (import with ``from earwigbot.commands import Command``), can be
subclassed to create custom IRC commands.

This docstring is reported to the user when they type ``"!help
<command>"``.
"""
# The command's name, as reported to the user when they use !help:
name = None

# A list of names that will trigger this command. If left empty, it will
# be triggered by the command's name and its name only:
commands = []

# Hooks are "msg", "msg_private", "msg_public", and "join". "msg" is the
# default behavior; if you wish to override that, change the value in your
# command subclass:
hooks = ["msg"]

def __init__(self, bot):
"""Constructor for new commands.

This is called once when the command is loaded (from
:py:meth:`commands.load() <earwigbot.managers._ResourceManager.load>`).
*bot* is out base :py:class:`~earwigbot.bot.Bot` object. Don't override
this directly; if you do, remember to place
``super(Command, self).__init()`` first. Use :py:meth:`setup` for
typical command-init/setup needs.
"""
self.bot = bot
self.config = bot.config
self.logger = bot.commands.logger.getChild(self.name)

# Convenience functions:
self.say = lambda target, msg, hidelog=False: self.bot.frontend.say(target, msg, hidelog)
self.reply = lambda data, msg, hidelog=False: self.bot.frontend.reply(data, msg, hidelog)
self.action = lambda target, msg, hidelog=False: self.bot.frontend.action(target, msg, hidelog)
self.notice = lambda target, msg, hidelog=False: self.bot.frontend.notice(target, msg, hidelog)
self.join = lambda chan, hidelog=False: self.bot.frontend.join(chan, hidelog)
self.part = lambda chan, msg=None, hidelog=False: self.bot.frontend.part(chan, msg, hidelog)
self.mode = lambda t, level, msg, hidelog=False: self.bot.frontend.mode(t, level, msg, hidelog)
self.ping = lambda target, hidelog=False: self.bot.frontend.ping(target, hidelog)
self.pong = lambda target, hidelog=False: self.bot.frontend.pong(target, hidelog)

self.setup()

def __repr__(self):
"""Return the canonical string representation of the Command."""
res = "Command(name={0!r}, commands={1!r}, hooks={2!r}, bot={3!r})"
return res.format(self.name, self.commands, self.hooks, self.bot)

def __str__(self):
"""Return a nice string representation of the Command."""
return "<Command {0} of {1}>".format(self.name, self.bot)

def setup(self):
"""Hook called immediately after the command is loaded.

Does nothing by default; feel free to override.
"""
pass

def check(self, data):
"""Return whether this command should be called in response to *data*.

Given a :py:class:`~earwigbot.irc.data.Data` instance, return ``True``
if we should respond to this activity, or ``False`` if we should ignore
it and move on. Be aware that since this is called for each message
sent on IRC, it should be cheap to execute and unlikely to throw
exceptions.

Most commands return ``True`` only if :py:attr:`data.command
<earwigbot.irc.data.Data.command>` ``==`` :py:attr:`self.name <name>`,
or :py:attr:`data.command <earwigbot.irc.data.Data.command>` is in
:py:attr:`self.commands <commands>` if that list is overriden. This is
the default behavior; you should only override it if you wish to change
that.
"""
if self.commands:
return data.is_command and data.command in self.commands
return data.is_command and data.command == self.name

def process(self, data):
"""Main entry point for doing a command.

Handle an activity (usually a message) on IRC. At this point, thanks
to :py:meth:`check` which is called automatically by the command
handler, we know this is something we should respond to. Place your
command's body here.
"""
pass

+ 142
- 0
earwigbot/commands/access.py View File

@@ -0,0 +1,142 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re

from earwigbot.commands import Command

class Access(Command):
"""Control and get info on who can access the bot."""
name = "access"
commands = ["access", "permission", "permissions", "perm", "perms"]

def process(self, data):
if not data.args:
self.reply(data, "Subcommands are self, list, add, remove.")
return
permdb = self.config.irc["permissions"]
if data.args[0] == "self":
self.do_self(data, permdb)
elif data.args[0] == "list":
self.do_list(data, permdb)
elif data.args[0] == "add":
self.do_add(data, permdb)
elif data.args[0] == "remove":
self.do_remove(data, permdb)
else:
msg = "Unknown subcommand \x0303{0}\x0F.".format(data.args[0])
self.reply(data, msg)

def do_self(self, data, permdb):
if permdb.is_owner(data):
msg = "You are a bot owner (matching rule \x0302{0}\x0F)."
self.reply(data, msg.format(permdb.is_owner(data)))
elif permdb.is_admin(data):
msg = "You are a bot admin (matching rule \x0302{0}\x0F)."
self.reply(data, msg.format(permdb.is_admin(data)))
else:
self.reply(data, "You do not match any bot access rules.")

def do_list(self, data, permdb):
if len(data.args) > 1:
if data.args[1] in ["owner", "owners"]:
name, rules = "owners", permdb.data.get(permdb.OWNER)
elif data.args[1] in ["admin", "admins"]:
name, rules = "admins", permdb.data.get(permdb.ADMIN)
else:
msg = "Unknown access level \x0302{0}\x0F."
self.reply(data, msg.format(data.args[1]))
return
if rules:
msg = "Bot {0}: {1}.".format(name, ", ".join(map(str, rules)))
else:
msg = "No bot {0}.".format(name)
self.reply(data, msg)
else:
owners = len(permdb.data.get(permdb.OWNER, []))
admins = len(permdb.data.get(permdb.ADMIN, []))
msg = "There are {0} bot owners and {1} bot admins. Use '!{2} list owners' or '!{2} list admins' for details."
self.reply(data, msg.format(owners, admins, data.command))

def do_add(self, data, permdb):
user = self.get_user_from_args(data, permdb)
if user:
nick, ident, host = user
if data.args[1] in ["owner", "owners"]:
name, level, adder = "owner", permdb.OWNER, permdb.add_owner
else:
name, level, adder = "admin", permdb.ADMIN, permdb.add_admin
if permdb.has_exact(level, nick, ident, host):
rule = "{0}!{1}@{2}".format(nick, ident, host)
msg = "\x0302{0}\x0F is already a bot {1}.".format(rule, name)
self.reply(data, msg)
else:
rule = adder(nick, ident, host)
msg = "Added bot {0} \x0302{1}\x0F.".format(name, rule)
self.reply(data, msg)

def do_remove(self, data, permdb):
user = self.get_user_from_args(data, permdb)
if user:
nick, ident, host = user
if data.args[1] in ["owner", "owners"]:
name, rmver = "owner", permdb.remove_owner
else:
name, rmver = "admin", permdb.remove_admin
rule = rmver(nick, ident, host)
if rule:
msg = "Removed bot {0} \x0302{1}\x0F.".format(name, rule)
self.reply(data, msg)
else:
rule = "{0}!{1}@{2}".format(nick, ident, host)
msg = "No bot {0} matching \x0302{1}\x0F.".format(name, rule)
self.reply(data, msg)

def get_user_from_args(self, data, permdb):
if not permdb.is_owner(data):
msg = "You must be a bot owner to add users to the access list."
self.reply(data, msg)
return
levels = ["owner", "owners", "admin", "admins"]
if len(data.args) == 1 or data.args[1] not in levels:
msg = "Please specify an access level ('owners' or 'admins')."
self.reply(data, msg)
return
if len(data.args) == 2:
self.no_arg_error(data)
return
kwargs = data.kwargs
if "nick" in kwargs or "ident" in kwargs or "host" in kwargs:
nick = kwargs.get("nick", "*")
ident = kwargs.get("ident", "*")
host = kwargs.get("host", "*")
return nick, ident, host
user = re.match(r"(.*?)!(.*?)@(.*?)$", data.args[2])
if not user:
self.no_arg_error(data)
return
return user.group(1), user.group(2), user.group(3)

def no_arg_error(self, data):
msg = 'Please specify a user, either as "\x0302nick\x0F!\x0302ident\x0F@\x0302host\x0F"'
msg += ' or "nick=\x0302nick\x0F, ident=\x0302ident\x0F, host=\x0302host\x0F".'
self.reply(data, msg)

+ 83
- 0
earwigbot/commands/calc.py View File

@@ -0,0 +1,83 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re
import urllib

from earwigbot.commands import Command

class Calc(Command):
"""A somewhat advanced calculator: see http://futureboy.us/fsp/frink.fsp
for details."""
name = "calc"

def process(self, data):
if not data.args:
self.reply(data, "What do you want me to calculate?")
return

query = ' '.join(data.args)
query = self.cleanup(query)

url = "http://futureboy.us/fsp/frink.fsp?fromVal={0}"
url = url.format(urllib.quote(query))
result = urllib.urlopen(url).read()

r_result = re.compile(r'(?i)<A NAME=results>(.*?)</A>')
r_tag = re.compile(r'<\S+.*?>')

match = r_result.search(result)
if not match:
self.reply(data, "Calculation error.")
return

result = match.group(1)
result = r_tag.sub("", result) # strip span.warning tags
result = result.replace("&gt;", ">")
result = result.replace("(undefined symbol)", "(?) ")
result = result.strip()

if not result:
result = '?'
elif " in " in query:
result += " " + query.split(" in ", 1)[1]

res = "%s = %s" % (query, result)
self.reply(data, res)

def cleanup(self, query):
fixes = [
(' in ', ' -> '),
(' over ', ' / '),
(u'£', 'GBP '),
(u'€', 'EUR '),
('\$', 'USD '),
(r'\bKB\b', 'kilobytes'),
(r'\bMB\b', 'megabytes'),
(r'\bGB\b', 'kilobytes'),
('kbps', '(kilobits / second)'),
('mbps', '(megabits / second)')
]

for original, fix in fixes:
query = re.sub(original, fix, query)
return query.strip()

+ 91
- 0
earwigbot/commands/chanops.py View File

@@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot.commands import Command

class ChanOps(Command):
"""Voice, devoice, op, or deop users in the channel, or join or part from
other channels."""
name = "chanops"
commands = ["chanops", "voice", "devoice", "op", "deop", "join", "part"]

def process(self, data):
if data.command == "chanops":
msg = "Available commands are !voice, !devoice, !op, !deop, !join, and !part."
self.reply(data, msg)
return
de_escalate = data.command in ["devoice", "deop"]
if de_escalate and (not data.args or data.args[0] == data.nick):
target = data.nick
elif not self.config.irc["permissions"].is_admin(data):
self.reply(data, "You must be a bot admin to use this command.")
return

if data.command == "join":
self.do_join(data)
elif data.command == "part":
self.do_part(data)
else:
# If it is just !op/!devoice/whatever without arguments, assume
# they want to do this to themselves:
if not data.args:
target = data.nick
else:
target = data.args[0]
command = data.command.upper()
self.say("ChanServ", " ".join((command, data.chan, target)))
log = "{0} requested {1} on {2} in {3}"
self.logger.info(log.format(data.nick, command, target, data.chan))

def do_join(self, data):
if data.args:
channel = data.args[0]
if not channel.startswith("#"):
channel = "#" + channel
else:
msg = "You must specify a channel to join or part from."
self.reply(data, msg)
return

self.join(channel)
log = "{0} requested JOIN to {1}".format(data.nick, channel)
self.logger.info(log)

def do_part(self, data):
channel = data.chan
reason = None
if data.args:
if data.args[0].startswith("#"):
# "!part #channel reason for parting"
channel = data.args[0]
if data.args[1:]:
reason = " ".join(data.args[1:])
else: # "!part reason for parting"; assume current channel
reason = " ".join(data.args)

msg = "Requested by {0}".format(data.nick)
log = "{0} requested PART from {1}".format(data.nick, channel)
if reason:
msg += ": {0}".format(reason)
log += ' ("{0}")'.format(reason)
self.part(channel, msg)
self.logger.info(log)

+ 79
- 0
earwigbot/commands/crypt.py View File

@@ -0,0 +1,79 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import hashlib

from Crypto.Cipher import Blowfish

from earwigbot.commands import Command

class Crypt(Command):
"""Provides hash functions with !hash (!hash list for supported algorithms)
and Blowfish encryption with !encrypt and !decrypt."""
name = "crypt"
commands = ["crypt", "hash", "encrypt", "decrypt"]

def process(self, data):
if data.command == "crypt":
msg = "Available commands are !hash, !encrypt, and !decrypt."
self.reply(data, msg)
return

if not data.args:
msg = "What do you want me to {0}?".format(data.command)
self.reply(data, msg)
return

if data.command == "hash":
algo = data.args[0]
if algo == "list":
algos = ', '.join(hashlib.algorithms)
msg = algos.join(("Supported algorithms: ", "."))
self.reply(data, msg)
elif algo in hashlib.algorithms:
string = ' '.join(data.args[1:])
result = getattr(hashlib, algo)(string).hexdigest()
self.reply(data, result)
else:
msg = "Unknown algorithm: '{0}'.".format(algo)
self.reply(data, msg)

else:
key = data.args[0]
text = " ".join(data.args[1:])

if not text:
msg = "A key was provided, but text to {0} was not."
self.reply(data, msg.format(data.command))
return

cipher = Blowfish.new(hashlib.sha256(key).digest())
try:
if data.command == "encrypt":
if len(text) % 8:
pad = 8 - len(text) % 8
text = text.ljust(len(text) + pad, "\x00")
self.reply(data, cipher.encrypt(text).encode("hex"))
else:
self.reply(data, cipher.decrypt(text.decode("hex")))
except ValueError as error:
self.reply(data, error.message)

+ 68
- 0
earwigbot/commands/ctcp.py View File

@@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import platform
import time

from earwigbot import __version__
from earwigbot.commands import Command

class CTCP(Command):
"""Not an actual command; this module implements responses to the CTCP
requests PING, TIME, and VERSION."""
name = "ctcp"
hooks = ["msg_private"]

def check(self, data):
if data.is_command and data.command == "ctcp":
return True

commands = ["PING", "TIME", "VERSION"]
msg = data.line[3]
if msg[:2] == ":\x01" and msg[2:].rstrip("\x01") in commands:
return True
return False

def process(self, data):
if data.is_command:
return

target = data.nick
command = data.line[3][1:].strip("\x01")

if command == "PING":
msg = " ".join(data.line[4:])
if msg:
self.notice(target, "\x01PING {0}\x01".format(msg))
else:
self.notice(target, "\x01PING\x01")

elif command == "TIME":
ts = time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.localtime())
self.notice(target, "\x01TIME {0}\x01".format(ts))

elif command == "VERSION":
default = "EarwigBot - $1 - Python/$2 https://github.com/earwig/earwigbot"
vers = self.config.irc.get("version", default)
vers = vers.replace("$1", __version__)
vers = vers.replace("$2", platform.python_version())
self.notice(target, "\x01VERSION {0}\x01".format(vers))

+ 181
- 0
earwigbot/commands/dictionary.py View File

@@ -0,0 +1,181 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re

from earwigbot import exceptions
from earwigbot.commands import Command

class Dictionary(Command):
"""Define words and stuff."""
name = "dictionary"
commands = ["dict", "dictionary", "define"]

def process(self, data):
if not data.args:
self.reply(data, "What do you want me to define?")
return

term = " ".join(data.args)
lang = self.bot.wiki.get_site().lang
try:
defined = self.define(term, lang)
except exceptions.APIError:
msg = "Cannot find a {0}-language Wiktionary."
self.reply(data, msg.format(lang))
else:
self.reply(data, defined.encode("utf8"))

def define(self, term, lang, tries=2):
try:
site = self.bot.wiki.get_site(project="wiktionary", lang=lang)
except exceptions.SiteNotFoundError:
site = self.bot.wiki.add_site(project="wiktionary", lang=lang)

page = site.get_page(term, follow_redirects=True)
try:
entry = page.get()
except (exceptions.PageNotFoundError, exceptions.InvalidPageError):
if term.lower() != term and tries:
return self.define(term.lower(), lang, tries - 1)
if term.capitalize() != term and tries:
return self.define(term.capitalize(), lang, tries - 1)
return "No definition found."

level, languages = self.get_languages(entry)
if not languages:
return u"Couldn't parse {0}!".format(page.url)

result = []
for lang, section in sorted(languages.items()):
definition = self.get_definition(section, level)
result.append(u"({0}) {1}".format(lang, definition))
return u"; ".join(result)

def get_languages(self, entry, level=2):
regex = r"(?:\A|\n)==\s*([a-zA-Z0-9_ ]*?)\s*==(?:\Z|\n)"
split = re.split(regex, entry)
if len(split) % 2 == 0:
if level == 2:
return self.get_languages(entry, level=3)
else:
return 3, None
return 2, None

split.pop(0)
languages = {}
for i in xrange(0, len(split), 2):
languages[split[i]] = split[i + 1]
return level, languages

def get_definition(self, section, level):
parts_of_speech = {
"v.": "Verb",
"n.": "Noun",
"pron.": "Pronoun",
"adj.": "Adjective",
"adv.": "Adverb",
"prep.": "Preposition",
"conj.": "Conjunction",
"inter.": "Interjection",
"symbol": "Symbol",
"suffix": "Suffix",
"initialism": "Initialism",
"phrase": "Phrase",
"proverb": "Proverb",
"prop. n.": "Proper noun",
"abbr.": "Abbreviation",
"punct.": "Punctuation mark",
}
blocks = "=" * (level + 1)
defs = []
for part, basename in parts_of_speech.iteritems():
fullnames = [basename, "\{\{" + basename + "\}\}",
"\{\{" + basename.lower() + "\}\}"]
for fullname in fullnames:
regex = blocks + "\s*" + fullname + "\s*" + blocks
if re.search(regex, section):
regex = blocks + "\s*" + fullname
regex += "\s*{0}(.*?)(?:(?:{0})|\Z)".format(blocks)
bodies = re.findall(regex, section, re.DOTALL)
if bodies:
for body in bodies:
definition = self.parse_body(body)
if definition:
msg = u"\x02{0}\x0F {1}"
defs.append(msg.format(part, definition))

return "; ".join(defs)

def parse_body(self, body):
substitutions = [
("<!--(.*?)-->", ""),
("<ref>(.*?)</ref>", ""),
("\[\[[^\]|]*?\|([^\]|]*?)\]\]", r"\1"),
("\{\{unsupported\|(.*?)\}\}", r"\1"),
("\{\{(.*?) of\|([^}|]*?)(\|(.*?))?\}\}", r"\1 of \2."),
("\{\{w\|(.*?)\}\}", r"\1"),
("\{\{surname(.*?)\}\}", r"A surname."),
("\{\{given name\|([^}|]*?)(\|(.*?))?\}\}", r"A \1 given name."),
]

senses = []
for line in body.splitlines():
line = line.strip()
if re.match("#\s*[^:*#]", line):
for regex, repl in substitutions:
line = re.sub(regex, repl, line)
line = self.strip_templates(line)
line = line[1:].replace("'''", "").replace("''", "")
line = line.replace("[[", "").replace("]]", "")
if line.strip():
senses.append(line.strip()[0].upper() + line.strip()[1:])

if not senses:
return None
if len(senses) == 1:
return senses[0]

result = [] # Number the senses incrementally
for i, sense in enumerate(senses):
result.append(u"{0}. {1}".format(i + 1, sense))
return " ".join(result)

def strip_templates(self, line):
line = list(line)
stripped = ""
depth = 0
while line:
this = line.pop(0)
if line:
next = line[0]
else:
next = ""
if this == "{" and next == "{":
line.pop(0)
depth += 1
elif this == "}" and next == "}":
line.pop(0)
depth -= 1
elif depth == 0:
stripped += this
return stripped

+ 53
- 0
earwigbot/commands/editcount.py View File

@@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from urllib import quote_plus

from earwigbot import exceptions
from earwigbot.commands import Command

class Editcount(Command):
"""Return a user's edit count."""
name = "editcount"
commands = ["ec", "editcount"]

def process(self, data):
if not data.args:
name = data.nick
else:
name = ' '.join(data.args)

site = self.bot.wiki.get_site()
user = site.get_user(name)

try:
count = user.editcount
except exceptions.UserNotFoundError:
msg = "The user \x0302{0}\x0F does not exist."
self.reply(data, msg.format(name))
return

safe = quote_plus(user.name.encode("utf8"))
url = "http://toolserver.org/~tparis/pcount/index.php?name={0}&lang={1}&wiki={2}"
fullurl = url.format(safe, site.lang, site.project)
msg = "\x0302{0}\x0F has {1} edits ({2})."
self.reply(data, msg.format(name, count, fullurl))

+ 71
- 0
earwigbot/commands/help.py View File

@@ -0,0 +1,71 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re

from earwigbot.commands import Command

class Help(Command):
"""Displays help information."""
name = "help"

def check(self, data):
if data.is_command:
if data.command == "help":
return True
if not data.command and data.trigger == data.my_nick:
return True
return False

def process(self, data):
if not data.command:
self.do_hello(data)
elif data.args:
self.do_command_help(data)
else:
self.do_main_help(data)

def do_main_help(self, data):
"""Give the user a general help message with a list of all commands."""
msg = "Hi, I'm a bot! I have {0} commands loaded: {1}. You can get help for any command with '!help <command>'."
cmnds = sorted([cmnd.name for cmnd in self.bot.commands])
msg = msg.format(len(cmnds), ', '.join(cmnds))
self.reply(data, msg)

def do_command_help(self, data):
"""Give the user help for a specific command."""
target = data.args[0]

for command in self.bot.commands:
if command.name == target or target in command.commands:
if command.__doc__:
doc = command.__doc__.replace("\n", "")
doc = re.sub("\s\s+", " ", doc)
msg = 'Help for command \x0303{0}\x0F: "{1}"'
self.reply(data, msg.format(target, doc))
return

msg = "Sorry, no help for \x0303{0}\x0F.".format(target)
self.reply(data, msg)

def do_hello(self, data):
self.say(data.chan, "Yes, {0}?".format(data.nick))

+ 101
- 0
earwigbot/commands/lag.py View File

@@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot import exceptions
from earwigbot.commands import Command

class Lag(Command):
"""Return the replag for a specific database on the Toolserver."""
name = "lag"
commands = ["lag", "replag", "maxlag"]

def process(self, data):
site = self.get_site(data)
if not site:
return
if data.command == "replag":
base = "\x0302{0}\x0F: {1}."
msg = base.format(site.name, self.get_replag(site))
elif data.command == "maxlag":
base = "\x0302{0}\x0F: {1}."
msg = base.format(site.name, self.get_maxlag(site).capitalize())
else:
base = "\x0302{0}\x0F: {1}; {2}."
msg = base.format(site.name, self.get_replag(site),
self.get_maxlag(site))
self.reply(data, msg)

def get_replag(self, site):
return "Toolserver replag is {0}".format(self.time(site.get_replag()))

def get_maxlag(self, site):
return "database maxlag is {0}".format(self.time(site.get_maxlag()))

def get_site(self, data):
if data.kwargs and "project" in data.kwargs and "lang" in data.kwargs:
project, lang = data.kwargs["project"], data.kwargs["lang"]
return self.get_site_from_proj_and_lang(data, project, lang)

if not data.args:
return self.bot.wiki.get_site()

if len(data.args) > 1:
name = " ".join(data.args)
self.reply(data, "Unknown site: \x0302{0}\x0F.".format(name))
return
name = data.args[0]
if "." in name:
lang, project = name.split(".")[:2]
elif ":" in name:
project, lang = name.split(":")[:2]
else:
try:
return self.bot.wiki.get_site(name)
except exceptions.SiteNotFoundError:
msg = "Unknown site: \x0302{0}\x0F.".format(name)
self.reply(data, msg)
return
return self.get_site_from_proj_and_lang(data, project, lang)

def get_site_from_proj_and_lang(self, data, project, lang):
try:
site = self.bot.wiki.get_site(project=project, lang=lang)
except exceptions.SiteNotFoundError:
try:
site = self.bot.wiki.add_site(project=project, lang=lang)
except exceptions.APIError:
msg = "Site \x0302{0}:{1}\x0F not found."
self.reply(data, msg.format(project, lang))
return
return site

def time(self, seconds):
parts = [("year", 31536000), ("day", 86400), ("hour", 3600),
("minute", 60), ("second", 1)]
msg = []
for name, size in parts:
num = seconds / size
seconds -= num * size
if num:
chunk = "{0} {1}".format(num, name if num == 1 else name + "s")
msg.append(chunk)
return ", ".join(msg) if msg else "0 seconds"

+ 62
- 0
earwigbot/commands/langcode.py View File

@@ -0,0 +1,62 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot.commands import Command

class Langcode(Command):
"""Convert a language code into its name and a list of WMF sites in that
language, or a name into its code."""
name = "langcode"
commands = ["langcode", "lang", "language"]

def process(self, data):
if not data.args:
self.reply(data, "Please specify a language code.")
return

code, lcase = data.args[0], data.args[0].lower()
site = self.bot.wiki.get_site()
matrix = site.api_query(action="sitematrix")["sitematrix"]
del matrix["count"]
del matrix["specials"]

for site in matrix.itervalues():
if not site["name"]:
continue
name = site["name"].encode("utf8")
localname = site["localname"].encode("utf8")
if site["code"] == lcase:
if name != localname:
name += " ({0})".format(localname)
sites = ", ".join([s["url"] for s in site["site"]])
msg = "\x0302{0}\x0F is {1} ({2})".format(code, name, sites)
self.reply(data, msg)
return
elif name.lower() == lcase or localname.lower() == lcase:
if name != localname:
name += " ({0})".format(localname)
sites = ", ".join([s["url"] for s in site["site"]])
msg = "{0} is \x0302{1}\x0F ({2})"
self.reply(data, msg.format(name, site["code"], sites))
return

self.reply(data, "Language \x0302{0}\x0F not found.".format(code))

+ 79
- 0
earwigbot/commands/link.py View File

@@ -0,0 +1,79 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re

from earwigbot.commands import Command

class Link(Command):
"""Convert a Wikipedia page name into a URL."""
name = "link"

def setup(self):
self.last = {}

def check(self, data):
if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", data.msg):
self.last[data.chan] = data.msg # Store most recent link
return data.is_command and data.command == self.name

def process(self, data):
self.site = self.bot.wiki.get_site()

if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", data.msg):
links = u" , ".join(self.parse_line(data.msg))
self.reply(data, links.encode("utf8"))

elif data.command == "link":
if not data.args:
if data.chan in self.last:
links = u" , ".join(self.parse_line(self.last[data.chan]))
self.reply(data, links.encode("utf8"))
else:
self.reply(data, "What do you want me to link to?")
return
pagename = " ".join(data.args)
link = self.site.get_page(pagename).url.encode("utf8")
self.reply(data, link)

def parse_line(self, line):
"""Return a list of links within a line of text."""
results = []

# Destroy {{{template parameters}}}:
line = re.sub("\{\{\{(.*?)\}\}\}", "", line)

# Find all [[links]]:
links = re.findall("(\[\[(.*?)(\||\]\]))", line)
if links:
# re.findall() returns a list of tuples, but we only want the 2nd
# item in each tuple:
results = [self.site.get_page(name[1]).url for name in links]

# Find all {{templates}}
templates = re.findall("(\{\{(.*?)(\||\}\}))", line)
if templates:
p_tmpl = lambda name: self.site.get_page("Template:" + name).url
templates = [p_tmpl(i[1]) for i in templates]
results += templates

return results

+ 319
- 0
earwigbot/commands/notes.py View File

@@ -0,0 +1,319 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from datetime import datetime
from os import path
import re
import sqlite3 as sqlite
from threading import Lock

from earwigbot.commands import Command

class Notes(Command):
"""A mini IRC-based wiki for storing notes, tips, and reminders."""
name = "notes"
commands = ["notes", "note", "about"]
version = 2

def setup(self):
self._dbfile = path.join(self.config.root_dir, "notes.db")
self._db_access_lock = Lock()

def process(self, data):
commands = {
"help": self.do_help,
"list": self.do_list,
"read": self.do_read,
"edit": self.do_edit,
"info": self.do_info,
"rename": self.do_rename,
"delete": self.do_delete,
}

if not data.args:
msg = "\x0302The Earwig Mini-Wiki\x0F: running v{0}. Subcommands are: {1}. You can get help on any with '!{2} help subcommand'."
cmnds = ", ".join((commands))
self.reply(data, msg.format(self.version, cmnds, data.command))
return
command = data.args[0].lower()
if command in commands:
commands[command](data)
else:
msg = "Unknown subcommand: \x0303{0}\x0F.".format(command)
self.reply(data, msg)

def do_help(self, data):
"""Get help on a subcommand."""
info = {
"help": "Get help on other subcommands.",
"list": "List existing entries.",
"read": "Read an existing entry ('!notes read [name]').",
"edit": """Modify or create a new entry ('!notes edit name
[entry content]...'). If modifying, you must be the
entry author or a bot admin.""",
"info": """Get information on an existing entry ('!notes info
[name]').""",
"rename": """Rename an existing entry ('!notes rename [old_name]
[new_name]'). You must be the entry author or a bot
admin.""",
"delete": """Delete an existing entry ('!notes delete [name]'). You
must be the entry author or a bot admin.""",
}

try:
command = data.args[1]
except IndexError:
self.reply(data, "Please specify a subcommand to get help on.")
return
try:
help_ = re.sub(r"\s\s+", " ", info[command].replace("\n", ""))
self.reply(data, "\x0303{0}\x0F: ".format(command) + help_)
except KeyError:
msg = "Unknown subcommand: \x0303{0}\x0F.".format(command)
self.reply(data, msg)

def do_list(self, data):
"""Show a list of entries in the notes database."""
query = "SELECT entry_title FROM entries"
with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
try:
entries = conn.execute(query).fetchall()
except sqlite.OperationalError:
entries = []

if entries:
entries = [entry[0] for entry in entries]
self.reply(data, "Entries: {0}".format(", ".join(entries)))
else:
self.reply(data, "No entries in the database.")

def do_read(self, data):
"""Read an entry from the notes database."""
query = """SELECT entry_title, rev_content FROM entries
INNER JOIN revisions ON entry_revision = rev_id
WHERE entry_slug = ?"""
try:
slug = self.slugify(data.args[1])
except IndexError:
self.reply(data, "Please specify an entry to read from.")
return

with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
try:
title, content = conn.execute(query, (slug,)).fetchone()
except (sqlite.OperationalError, TypeError):
title, content = slug, None

if content:
self.reply(data, "\x0302{0}\x0F: {1}".format(title, content))
else:
self.reply(data, "Entry \x0302{0}\x0F not found.".format(title))

def do_edit(self, data):
"""Edit an entry in the notes database."""
query1 = """SELECT entry_id, entry_title, user_host FROM entries
INNER JOIN revisions ON entry_revision = rev_id
INNER JOIN users ON rev_user = user_id
WHERE entry_slug = ?"""
query2 = "INSERT INTO revisions VALUES (?, ?, ?, ?, ?)"
query3 = "INSERT INTO entries VALUES (?, ?, ?, ?)"
query4 = "UPDATE entries SET entry_revision = ? WHERE entry_id = ?"
try:
slug = self.slugify(data.args[1])
except IndexError:
self.reply(data, "Please specify an entry to edit.")
return
content = " ".join(data.args[2:]).strip()
if not content:
self.reply(data, "Please give some content to put in the entry.")
return

with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
create = True
try:
id_, title, author = conn.execute(query1, (slug,)).fetchone()
create = False
except sqlite.OperationalError:
id_, title, author = 1, data.args[1], data.host
self.create_db(conn)
except TypeError:
id_ = self.get_next_entry(conn)
title, author = data.args[1], data.host
permdb = self.config.irc["permissions"]
if author != data.host and not permdb.is_admin(data):
msg = "You must be an author or a bot admin to edit this entry."
self.reply(data, msg)
return
revid = self.get_next_revision(conn)
userid = self.get_user(conn, data.host)
now = datetime.utcnow().strftime("%b %d, %Y %H:%M:%S")
conn.execute(query2, (revid, id_, userid, now, content))
if create:
conn.execute(query3, (id_, slug, title, revid))
else:
conn.execute(query4, (revid, id_))

self.reply(data, "Entry \x0302{0}\x0F updated.".format(title))

def do_info(self, data):
"""Get info on an entry in the notes database."""
query = """SELECT entry_title, rev_timestamp, user_host FROM entries
INNER JOIN revisions ON entry_id = rev_entry
INNER JOIN users ON rev_user = user_id
WHERE entry_slug = ?"""
try:
slug = self.slugify(data.args[1])
except IndexError:
self.reply(data, "Please specify an entry to get info on.")
return

with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
try:
info = conn.execute(query, (slug,)).fetchall()
except sqlite.OperationalError:
info = []

if info:
title = info[0][0]
times = [datum[1] for datum in info]
earliest = min(times)
msg = "\x0302{0}\x0F: {1} edits since {2}"
msg = msg.format(title, len(info), earliest)
if len(times) > 1:
latest = max(times)
msg += "; last edit on {0}".format(latest)
names = [datum[2] for datum in info]
msg += "; authors: {0}.".format(", ".join(list(set(names))))
self.reply(data, msg)
else:
title = data.args[1]
self.reply(data, "Entry \x0302{0}\x0F not found.".format(title))

def do_rename(self, data):
"""Rename an entry in the notes database."""
query1 = """SELECT entry_id, user_host FROM entries
INNER JOIN revisions ON entry_revision = rev_id
INNER JOIN users ON rev_user = user_id
WHERE entry_slug = ?"""
query2 = """UPDATE entries SET entry_slug = ?, entry_title = ?
WHERE entry_id = ?"""
try:
slug = self.slugify(data.args[1])
except IndexError:
self.reply(data, "Please specify an entry to rename.")
return
try:
newtitle = data.args[2]
except IndexError:
self.reply(data, "Please specify a new name for the entry.")
return
if newtitle == data.args[1]:
self.reply(data, "The old and new names are identical.")
return

with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
try:
id_, author = conn.execute(query1, (slug,)).fetchone()
except (sqlite.OperationalError, TypeError):
msg = "Entry \x0302{0}\x0F not found.".format(data.args[1])
self.reply(data, msg)
return
permdb = self.config.irc["permissions"]
if author != data.host and not permdb.is_admin(data):
msg = "You must be an author or a bot admin to rename this entry."
self.reply(data, msg)
return
conn.execute(query2, (self.slugify(newtitle), newtitle, id_))

msg = "Entry \x0302{0}\x0F renamed to \x0302{1}\x0F."
self.reply(data, msg.format(data.args[1], newtitle))

def do_delete(self, data):
"""Delete an entry from the notes database."""
query1 = """SELECT entry_id, user_host FROM entries
INNER JOIN revisions ON entry_revision = rev_id
INNER JOIN users ON rev_user = user_id
WHERE entry_slug = ?"""
query2 = "DELETE FROM entries WHERE entry_id = ?"
query3 = "DELETE FROM revisions WHERE rev_entry = ?"
try:
slug = self.slugify(data.args[1])
except IndexError:
self.reply(data, "Please specify an entry to delete.")
return

with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
try:
id_, author = conn.execute(query1, (slug,)).fetchone()
except (sqlite.OperationalError, TypeError):
msg = "Entry \x0302{0}\x0F not found.".format(data.args[1])
self.reply(data, msg)
return
permdb = self.config.irc["permissions"]
if author != data.host and not permdb.is_admin(data):
msg = "You must be an author or a bot admin to delete this entry."
self.reply(data, msg)
return
conn.execute(query2, (id_,))
conn.execute(query3, (id_,))

self.reply(data, "Entry \x0302{0}\x0F deleted.".format(data.args[1]))

def slugify(self, name):
"""Convert *name* into an identifier for storing in the database."""
return name.lower().replace("_", "").replace("-", "")

def create_db(self, conn):
"""Initialize the notes database with its necessary tables."""
script = """
CREATE TABLE entries (entry_id, entry_slug, entry_title,
entry_revision);
CREATE TABLE users (user_id, user_host);
CREATE TABLE revisions (rev_id, rev_entry, rev_user, rev_timestamp,
rev_content);
"""
conn.executescript(script)

def get_next_entry(self, conn):
"""Get the next entry ID."""
query = "SELECT MAX(entry_id) FROM entries"
later = conn.execute(query).fetchone()[0]
return later + 1 if later else 1

def get_next_revision(self, conn):
"""Get the next revision ID."""
query = "SELECT MAX(rev_id) FROM revisions"
later = conn.execute(query).fetchone()[0]
return later + 1 if later else 1

def get_user(self, conn, host):
"""Get the user ID corresponding to a hostname, or make one."""
query1 = "SELECT user_id FROM users WHERE user_host = ?"
query2 = "SELECT MAX(user_id) FROM users"
query3 = "INSERT INTO users VALUES (?, ?)"
user = conn.execute(query1, (host,)).fetchone()
if user:
return user[0]
last = conn.execute(query2).fetchone()[0]
later = last + 1 if last else 1
conn.execute(query3, (later, host))
return later

+ 68
- 0
earwigbot/commands/quit.py View File

@@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot.commands import Command

class Quit(Command):
"""Quit, restart, or reload components from the bot. Only the owners can
run this command."""
name = "quit"
commands = ["quit", "restart", "reload"]

def process(self, data):
if not self.config.irc["permissions"].is_owner(data):
self.reply(data, "You must be a bot owner to use this command.")
return
if data.command == "quit":
self.do_quit(data)
elif data.command == "restart":
self.do_restart(data)
else:
self.do_reload(data)

def do_quit(self, data):
args = data.args
if data.trigger == data.my_nick:
reason = " ".join(args)
else:
if not args or args[0].lower() != data.my_nick:
self.reply(data, "To confirm this action, the first argument must be my name.")
return
reason = " ".join(args[1:])

if reason:
self.bot.stop("Stopped by {0}: {1}".format(data.nick, reason))
else:
self.bot.stop("Stopped by {0}".format(data.nick))

def do_restart(self, data):
if data.args:
msg = " ".join(data.args)
self.bot.restart("Restarted by {0}: {1}".format(data.nick, msg))
else:
self.bot.restart("Restarted by {0}".format(data.nick))

def do_reload(self, data):
self.logger.info("{0} requested command/task reload".format(data.nick))
self.bot.commands.load()
self.bot.tasks.load()
self.reply(data, "IRC commands and bot tasks reloaded.")

+ 72
- 0
earwigbot/commands/registration.py View File

@@ -0,0 +1,72 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import time

from earwigbot import exceptions
from earwigbot.commands import Command

class Registration(Command):
"""Return when a user registered."""
name = "registration"
commands = ["registration", "reg", "age"]

def process(self, data):
if not data.args:
name = data.nick
else:
name = ' '.join(data.args)

site = self.bot.wiki.get_site()
user = site.get_user(name)

try:
reg = user.registration
except exceptions.UserNotFoundError:
msg = "The user \x0302{0}\x0F does not exist."
self.reply(data, msg.format(name))
return

date = time.strftime("%b %d, %Y at %H:%M:%S UTC", reg)
age = self.get_diff(time.mktime(reg), time.mktime(time.gmtime()))

if user.gender == "male":
gender = "He's"
elif user.gender == "female":
gender = "She's"
else:
gender = "They're" # Singular they?

msg = "\x0302{0}\x0F registered on {1}. {2} {3} old."
self.reply(data, msg.format(name, date, gender, age))

def get_diff(self, t1, t2):
parts = [("year", 31536000), ("day", 86400), ("hour", 3600),
("minute", 60), ("second", 1)]
msg = []
for name, size in parts:
num = int(t2 - t1) / size
t1 += num * size
if num:
chunk = "{0} {1}".format(num, name if num == 1 else name + "s")
msg.append(chunk)
return ", ".join(msg) if msg else "0 seconds"

+ 62
- 0
earwigbot/commands/remind.py View File

@@ -0,0 +1,62 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from threading import Timer
import time

from earwigbot.commands import Command

class Remind(Command):
"""Set a message to be repeated to you in a certain amount of time."""
name = "remind"
commands = ["remind", "reminder"]

def process(self, data):
if not data.args:
msg = "Please specify a time (in seconds) and a message in the following format: !remind <time> <msg>."
self.reply(data, msg)
return

try:
wait = int(data.args[0])
except ValueError:
msg = "The time must be given as an integer, in seconds."
self.reply(data, msg)
return
message = ' '.join(data.args[1:])
if not message:
msg = "What message do you want me to give you when time is up?"
self.reply(data, msg)
return

end = time.localtime(time.time() + wait)
end_time = time.strftime("%b %d %H:%M:%S", end)
end_time_with_timezone = time.strftime("%b %d %H:%M:%S %Z", end)

msg = 'Set reminder for "{0}" in {1} seconds (ends {2}).'
msg = msg.format(message, wait, end_time_with_timezone)
self.reply(data, msg)

t_reminder = Timer(wait, self.reply, args=(data, message))
t_reminder.name = "reminder " + end_time
t_reminder.daemon = True
t_reminder.start()

+ 52
- 0
earwigbot/commands/rights.py View File

@@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot import exceptions
from earwigbot.commands import Command

class Rights(Command):
"""Retrieve a list of rights for a given username."""
name = "rights"
commands = ["rights", "groups", "permissions", "privileges"]

def process(self, data):
if not data.args:
name = data.nick
else:
name = ' '.join(data.args)

site = self.bot.wiki.get_site()
user = site.get_user(name)

try:
rights = user.groups
except exceptions.UserNotFoundError:
msg = "The user \x0302{0}\x0F does not exist."
self.reply(data, msg.format(name))
return

try:
rights.remove("*") # Remove the '*' group given to everyone
except ValueError:
pass
msg = "The rights for \x0302{0}\x0F are {1}."
self.reply(data, msg.format(name, ', '.join(rights)))

+ 37
- 0
earwigbot/commands/test.py View File

@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import random

from earwigbot.commands import Command

class Test(Command):
"""Test the bot!"""
name = "test"

def process(self, data):
user = "\x02" + data.nick + "\x0F" # Wrap nick in bold
hey = random.randint(0, 1)
if hey:
self.say(data.chan, "Hey {0}!".format(user))
else:
self.say(data.chan, "'Sup {0}?".format(user))

+ 143
- 0
earwigbot/commands/threads.py View File

@@ -0,0 +1,143 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import threading
import re

from earwigbot.commands import Command

class Threads(Command):
"""Manage wiki tasks from IRC, and check on thread status."""
name = "threads"
commands = ["tasks", "task", "threads", "tasklist"]

def process(self, data):
self.data = data
if not self.config.irc["permissions"].is_owner(data):
msg = "You must be a bot owner to use this command."
self.reply(data, msg)
return

if not data.args:
if data.command == "tasklist":
self.do_list()
else:
msg = "No arguments provided. Maybe you wanted '!{0} list', '!{0} start', or '!{0} listall'?"
self.reply(data, msg.format(data.command))
return

if data.args[0] == "list":
self.do_list()

elif data.args[0] == "start":
self.do_start()

elif data.args[0] in ["listall", "all"]:
self.do_listall()

else: # They asked us to do something we don't know
msg = "Unknown argument: \x0303{0}\x0F.".format(data.args[0])
self.reply(data, msg)

def do_list(self):
"""With !tasks list (or abbreviation !tasklist), list all running
threads. This includes the main threads, like the irc frontend and the
watcher, and task threads."""
threads = threading.enumerate()

normal_threads = []
daemon_threads = []

for thread in threads:
tname = thread.name
if tname == "MainThread":
t = "\x0302MainThread\x0F (id {0})"
normal_threads.append(t.format(thread.ident))
elif tname in self.config.components:
t = "\x0302{0}\x0F (id {1})"
normal_threads.append(t.format(tname, thread.ident))
elif tname.startswith("reminder"):
tname = tname.replace("reminder ", "")
t = "\x0302reminder\x0F (until {0})"
normal_threads.append(t.format(tname))
else:
tname, start_time = re.findall("^(.*?) \((.*?)\)$", tname)[0]
t = "\x0302{0}\x0F (id {1}, since {2})"
daemon_threads.append(t.format(tname, thread.ident,
start_time))

if daemon_threads:
if len(daemon_threads) > 1:
msg = "\x02{0}\x0F threads active: {1}, and \x02{2}\x0F command/task threads: {3}."
else:
msg = "\x02{0}\x0F threads active: {1}, and \x02{2}\x0F command/task thread: {3}."
msg = msg.format(len(threads), ', '.join(normal_threads),
len(daemon_threads), ', '.join(daemon_threads))
else:
msg = "\x02{0}\x0F threads active: {1}, and \x020\x0F command/task threads."
msg = msg.format(len(threads), ', '.join(normal_threads))

self.reply(self.data, msg)

def do_listall(self):
"""With !tasks listall or !tasks all, list all loaded tasks, and report
whether they are currently running or idle."""
threads = threading.enumerate()
tasklist = []
for task in sorted([task.name for task in self.bot.tasks]):
threadlist = [t for t in threads if t.name.startswith(task)]
ids = [str(t.ident) for t in threadlist]
if not ids:
tasklist.append("\x0302{0}\x0F (idle)".format(task))
elif len(ids) == 1:
t = "\x0302{0}\x0F (\x02active\x0F as id {1})"
tasklist.append(t.format(task, ids[0]))
else:
t = "\x0302{0}\x0F (\x02active\x0F as ids {1})"
tasklist.append(t.format(task, ', '.join(ids)))

tasks = ", ".join(tasklist)

msg = "\x02{0}\x0F tasks loaded: {1}.".format(len(tasklist), tasks)
self.reply(self.data, msg)

def do_start(self):
"""With !tasks start, start any loaded task by name with or without
kwargs."""
data = self.data

try:
task_name = data.args[1]
except IndexError: # No task name given
self.reply(data, "What task do you want me to start?")
return

if task_name not in [task.name for task in self.bot.tasks]:
# This task does not exist or hasn't been loaded:
msg = "Task could not be found; either it doesn't exist, or it wasn't loaded correctly."
self.reply(data, msg.format(task_name))
return

data.kwargs["fromIRC"] = True
self.bot.tasks.start(task_name, **data.kwargs)
msg = "Task \x0302{0}\x0F started.".format(task_name)
self.reply(data, msg)

+ 65
- 0
earwigbot/commands/time_command.py View File

@@ -0,0 +1,65 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from datetime import datetime
from math import floor
from time import time

import pytz

from earwigbot.commands import Command

class Time(Command):
"""Report the current time in any timezone (UTC default), or in beats."""
name = "time"
commands = ["time", "beats", "swatch"]

def process(self, data):
if data.command in ["beats", "swatch"]:
self.do_beats(data)
return
if data.args:
timezone = data.args[0]
else:
timezone = "UTC"
if timezone in ["beats", "swatch"]:
self.do_beats(data)
else:
self.do_time(data, timezone)

def do_beats(self, data):
beats = ((time() + 3600) % 86400) / 86.4
beats = int(floor(beats))
self.reply(data, "@{0:0>3}".format(beats))

def do_time(self, data, timezone):
if not pytz:
msg = "This command requires the 'pytz' module: http://pytz.sourceforge.net/"
self.reply(data, msg)
return
try:
tzinfo = pytz.timezone(timezone)
except pytz.exceptions.UnknownTimeZoneError:
self.reply(data, "Unknown timezone: {0}.".format(timezone))
return
now = pytz.utc.localize(datetime.utcnow()).astimezone(tzinfo)
self.reply(data, now.strftime("%Y-%m-%d %H:%M:%S %Z"))

+ 48
- 0
earwigbot/commands/trout.py View File

@@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from unicodedata import normalize

from earwigbot.commands import Command

class Trout(Command):
"""Slap someone with a trout, or related fish."""
name = "trout"
commands = ["trout", "whale"]

def setup(self):
try:
self.exceptions = self.config.commands[self.name]["exceptions"]
except KeyError:
self.exceptions = {}

def process(self, data):
animal = data.command
target = " ".join(data.args) or data.nick
target = "himself" if target == "yourself" else target

normal = normalize("NFKD", target.decode("utf8")).lower()
if normal in self.exceptions:
self.reply(data, self.exceptions[normal])
else:
msg = "slaps \x02{0}\x0F around a bit with a large {1}."
self.action(data.chan, msg.format(target, animal))

+ 347
- 0
earwigbot/config/__init__.py View File

@@ -0,0 +1,347 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from collections import OrderedDict
from getpass import getpass
from hashlib import sha256
import logging
import logging.handlers
from os import mkdir, path
import stat

from Crypto.Cipher import Blowfish
import bcrypt
import yaml

from earwigbot.config.formatter import BotFormatter
from earwigbot.config.node import ConfigNode
from earwigbot.config.ordered_yaml import OrderedLoader
from earwigbot.config.permissions import PermissionsDB
from earwigbot.config.script import ConfigScript
from earwigbot.exceptions import NoConfigError

__all__ = ["BotConfig"]

class BotConfig(object):
"""
**EarwigBot: YAML Config File Manager**

This handles all tasks involving reading and writing to our config file,
including encrypting and decrypting passwords and making a new config file
from scratch at the inital bot run.

BotConfig has a few attributes and methods, including the following:

- :py:attr:`root_dir`: bot's working directory; contains
:file:`config.yml`, :file:`logs/`
- :py:attr:`path`: path to the bot's config file
- :py:attr:`components`: enabled components
- :py:attr:`wiki`: information about wiki-editing
- :py:attr:`irc`: information about IRC
- :py:attr:`commands`: information about IRC commands
- :py:attr:`tasks`: information for bot tasks
- :py:attr:`metadata`: miscellaneous information
- :py:meth:`schedule`: tasks scheduled to run at a given time

BotConfig also has some methods used in config loading:

- :py:meth:`load`: loads (or reloads) and parses our config file
- :py:meth:`decrypt`: decrypts an object in the config tree
"""

def __init__(self, bot, root_dir, level):
self._bot = bot
self._root_dir = root_dir
self._logging_level = level
self._config_path = path.join(self.root_dir, "config.yml")
self._log_dir = path.join(self.root_dir, "logs")
perms_file = path.join(self.root_dir, "permissions.db")
self._permissions = PermissionsDB(perms_file)
self._decryption_cipher = None
self._data = None

self._components = ConfigNode()
self._wiki = ConfigNode()
self._irc = ConfigNode()
self._commands = ConfigNode()
self._tasks = ConfigNode()
self._metadata = ConfigNode()

self._nodes = [self._components, self._wiki, self._irc, self._commands,
self._tasks, self._metadata]

self._decryptable_nodes = [ # Default nodes to decrypt
(self._wiki, ("password",)),
(self._wiki, ("search", "credentials", "key")),
(self._wiki, ("search", "credentials", "secret")),
(self._irc, ("frontend", "nickservPassword")),
(self._irc, ("watcher", "nickservPassword")),
]

def __repr__(self):
"""Return the canonical string representation of the BotConfig."""
res = "BotConfig(root_dir={0!r}, level={1!r})"
return res.format(self.root_dir, self.logging_level)

def __str__(self):
"""Return a nice string representation of the BotConfig."""
return "<BotConfig at {0}>".format(self.root_dir)

def _handle_missing_config(self):
print "Config file missing or empty:", self._config_path
msg = "Would you like to create a config file now? [Y/n] "
choice = raw_input(msg)
if choice.lower().startswith("n"):
raise NoConfigError()
else:
try:
ConfigScript(self).make_new()
except KeyboardInterrupt:
raise NoConfigError()

def _load(self):
"""Load data from our JSON config file (config.yml) into self._data."""
filename = self._config_path
with open(filename, 'r') as fp:
try:
self._data = yaml.load(fp, OrderedLoader)
except yaml.YAMLError:
print "Error parsing config file {0}:".format(filename)
raise

def _setup_logging(self):
"""Configures the logging module so it works the way we want it to."""
log_dir = self._log_dir
logger = logging.getLogger("earwigbot")
logger.handlers = [] # Remove any handlers already attached to us
logger.setLevel(logging.DEBUG)
color_formatter = BotFormatter(color=True)
formatter = BotFormatter()

if self.metadata.get("enableLogging"):
hand = logging.handlers.TimedRotatingFileHandler
logfile = lambda f: path.join(log_dir, f)

if not path.isdir(log_dir):
if not path.exists(log_dir):
mkdir(log_dir, stat.S_IWUSR|stat.S_IRUSR|stat.S_IXUSR)
else:
msg = "log_dir ({0}) exists but is not a directory!"
print msg.format(log_dir)
return

main_handler = hand(logfile("bot.log"), "midnight", 1, 7)
error_handler = hand(logfile("error.log"), "W6", 1, 4)
debug_handler = hand(logfile("debug.log"), "H", 1, 6)

main_handler.setLevel(logging.INFO)
error_handler.setLevel(logging.WARNING)
debug_handler.setLevel(logging.DEBUG)

for h in (main_handler, error_handler, debug_handler):
h.setFormatter(formatter)
logger.addHandler(h)

self._stream_handler = stream = logging.StreamHandler()
stream.setLevel(self._logging_level)
stream.setFormatter(color_formatter)
logger.addHandler(stream)

def _decrypt(self, node, nodes):
"""Try to decrypt the contents of a config node. Use self.decrypt()."""
try:
node._decrypt(self._decryption_cipher, nodes[:-1], nodes[-1])
except ValueError:
print "Error decrypting passwords:"
raise

@property
def bot(self):
"""The config's Bot object."""
return self._bot

@property
def root_dir(self):
"""The bot's root directory containing its config file and more."""
return self._root_dir

@property
def logging_level(self):
"""The minimum logging level for messages logged via stdout."""
return self._logging_level

@logging_level.setter
def logging_level(self, level):
self._logging_level = level
self._stream_handler.setLevel(level)

@property
def path(self):
"""The path to the bot's config file."""
return self._config_path

@property
def log_dir(self):
"""The directory containing the bot's logs."""
return self._log_dir

@property
def data(self):
"""The entire config file as a decoded JSON object."""
return self._data

@property
def components(self):
"""A dict of enabled components."""
return self._components

@property
def wiki(self):
"""A dict of information about wiki-editing."""
return self._wiki

@property
def irc(self):
"""A dict of information about IRC."""
return self._irc

@property
def commands(self):
"""A dict of information for IRC commands."""
return self._commands

@property
def tasks(self):
"""A dict of information for bot tasks."""
return self._tasks

@property
def metadata(self):
"""A dict of miscellaneous information."""
return self._metadata

def is_loaded(self):
"""Return ``True`` if our config file has been loaded, or ``False``."""
return self._data is not None

def is_encrypted(self):
"""Return ``True`` if passwords are encrypted, otherwise ``False``."""
return self.metadata.get("encryptPasswords", False)

def load(self):
"""Load, or reload, our config file.

First, check if we have a valid config file, and if not, notify the
user. If there is no config file at all, offer to make one, otherwise
exit.

Data from the config file is stored in six
:py:class:`~earwigbot.config.ConfigNode`\ s (:py:attr:`components`,
:py:attr:`wiki`, :py:attr:`irc`, :py:attr:`commands`, :py:attr:`tasks`,
:py:attr:`metadata`) for easy access (as well as the lower-level
:py:attr:`data` attribute). If passwords are encrypted, we'll use
:py:func:`~getpass.getpass` for the key and then decrypt them. If the
config is being reloaded, encrypted items will be automatically
decrypted if they were decrypted earlier.
"""
if not path.exists(self._config_path):
self._handle_missing_config()
self._load()
if not self._data:
self._handle_missing_config()
self._load()

self.components._load(self._data.get("components", OrderedDict()))
self.wiki._load(self._data.get("wiki", OrderedDict()))
self.irc._load(self._data.get("irc", OrderedDict()))
self.commands._load(self._data.get("commands", OrderedDict()))
self.tasks._load(self._data.get("tasks", OrderedDict()))
self.metadata._load(self._data.get("metadata", OrderedDict()))

self._setup_logging()
if self.is_encrypted():
if not self._decryption_cipher:
key = getpass("Enter key to decrypt bot passwords: ")
self._decryption_cipher = Blowfish.new(sha256(key).digest())
signature = self.metadata["signature"]
if bcrypt.hashpw(key, signature) != signature:
raise RuntimeError("Incorrect password.")
for node, nodes in self._decryptable_nodes:
self._decrypt(node, nodes)

if self.irc:
self.irc["permissions"] = self._permissions
self._permissions.load()

def decrypt(self, node, *nodes):
"""Decrypt an object in our config tree.

:py:attr:`_decryption_cipher` is used as our key, retrieved using
:py:func:`~getpass.getpass` in :py:meth:`load` if it wasn't already
specified. If this is called when passwords are not encrypted (check
with :py:meth:`is_encrypted`), nothing will happen. We'll also keep
track of this node if :py:meth:`load` is called again (i.e. to reload)
and automatically decrypt it.

Example usage::

>>> config.decrypt(config.irc, "frontend", "nickservPassword")
# decrypts config.irc["frontend"]["nickservPassword"]
"""
signature = (node, nodes)
if signature in self._decryptable_nodes:
return # Already decrypted
self._decryptable_nodes.append(signature)
if self.is_encrypted():
self._decrypt(node, nodes)

def schedule(self, minute, hour, month_day, month, week_day):
"""Return a list of tasks scheduled to run at the specified time.

The schedule data comes from our config file's ``schedule`` field,
which is stored as :py:attr:`self.data["schedule"] <data>`.
"""
# Tasks to run this turn, each as a list of either [task_name, kwargs],
# or just the task_name:
tasks = []

now = {"minute": minute, "hour": hour, "month_day": month_day,
"month": month, "week_day": week_day}

data = self._data.get("schedule", [])
for event in data:
do = True
for key, value in now.items():
try:
requirement = event[key]
except KeyError:
continue
if requirement != value:
do = False
break
if do:
try:
tasks.extend(event["tasks"])
except KeyError:
pass

return tasks

+ 51
- 0
earwigbot/config/formatter.py View File

@@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import logging

__all__ = ["BotFormatter"]

class BotFormatter(logging.Formatter):
def __init__(self, color=False):
self._format = super(BotFormatter, self).format
if color:
fmt = "[%(asctime)s %(lvl)s] %(name)s: %(message)s"
self.format = lambda rec: self._format(self.format_color(rec))
else:
fmt = "[%(asctime)s %(levelname)-8s] %(name)s: %(message)s"
self.format = self._format
datefmt = "%Y-%m-%d %H:%M:%S"
super(BotFormatter, self).__init__(fmt=fmt, datefmt=datefmt)

def format_color(self, record):
l = record.levelname.ljust(8)
if record.levelno == logging.DEBUG:
record.lvl = l.join(("\x1b[34m", "\x1b[0m")) # Blue
if record.levelno == logging.INFO:
record.lvl = l.join(("\x1b[32m", "\x1b[0m")) # Green
if record.levelno == logging.WARNING:
record.lvl = l.join(("\x1b[33m", "\x1b[0m")) # Yellow
if record.levelno == logging.ERROR:
record.lvl = l.join(("\x1b[31m", "\x1b[0m")) # Red
if record.levelno == logging.CRITICAL:
record.lvl = l.join(("\x1b[1m\x1b[31m", "\x1b[0m")) # Bold red
return record

+ 104
- 0
earwigbot/config/node.py View File

@@ -0,0 +1,104 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from collections import OrderedDict

__all__ = ["ConfigNode"]

class ConfigNode(object):
def __init__(self):
self._data = OrderedDict()

def __repr__(self):
return self._data

def __nonzero__(self):
return bool(self._data)

def __len__(self):
return len(self._data)

def __getitem__(self, key):
return self._data[key]

def __setitem__(self, key, item):
self._data[key] = item

def __getattr__(self, key):
if key == "_data":
return super(ConfigNode, self).__getattr__(key)
return self._data[key]

def __setattr__(self, key, item):
if key == "_data":
super(ConfigNode, self).__setattr__(key, item)
else:
self._data[key] = item

def __iter__(self):
for key in self._data:
yield key

def __contains__(self, item):
return item in self._data

def _dump(self):
data = self._data.copy()
for key, val in data.iteritems():
if isinstance(val, ConfigNode):
data[key] = val._dump()
return data

def _load(self, data):
self._data = data.copy()

def _decrypt(self, cipher, intermediates, item):
base = self._data
for inter in intermediates:
try:
base = base[inter]
except KeyError:
return
if item in base:
ciphertext = base[item].decode("hex")
base[item] = cipher.decrypt(ciphertext).rstrip("\x00")

def get(self, *args, **kwargs):
return self._data.get(*args, **kwargs)

def keys(self):
return self._data.keys()

def values(self):
return self._data.values()

def items(self):
return self._data.items()

def iterkeys(self):
return self._data.iterkeys()

def itervalues(self):
return self._data.itervalues()

def iteritems(self):
return self._data.iteritems()

+ 106
- 0
earwigbot/config/ordered_yaml.py View File

@@ -0,0 +1,106 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""
Based on:

* https://gist.github.com/844388
* http://pyyaml.org/attachment/ticket/161/use_ordered_dict.py

with modifications.
"""

from collections import OrderedDict

import yaml

__all__ = ["OrderedLoader", "OrderedDumper"]

class OrderedLoader(yaml.Loader):
"""A YAML loader that loads mappings into ordered dictionaries."""

def __init__(self, *args, **kwargs):
super(OrderedLoader, self).__init__(*args, **kwargs)
constructor = type(self).construct_yaml_map
self.add_constructor(u"tag:yaml.org,2002:map", constructor)
self.add_constructor(u"tag:yaml.org,2002:omap", constructor)

def construct_yaml_map(self, node):
data = OrderedDict()
yield data
value = self.construct_mapping(node)
data.update(value)

def construct_mapping(self, node, deep=False):
if isinstance(node, yaml.MappingNode):
self.flatten_mapping(node)
else:
raise yaml.constructor.ConstructorError(None, None,
"expected a mapping node, but found {0}".format(node.id),
node.start_mark)

mapping = OrderedDict()
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
try:
hash(key)
except TypeError, exc:
raise yaml.constructor.ConstructorError(
"while constructing a mapping", node.start_mark,
"found unacceptable key ({0})".format(exc),
key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
mapping[key] = value
return mapping


class OrderedDumper(yaml.SafeDumper):
"""A YAML dumper that dumps ordered dictionaries into mappings."""

def __init__(self, *args, **kwargs):
super(OrderedDumper, self).__init__(*args, **kwargs)
self.add_representer(OrderedDict, type(self).represent_dict)

def represent_mapping(self, tag, mapping, flow_style=None):
value = []
node = yaml.MappingNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
best_style = True
if hasattr(mapping, "items"):
mapping = list(mapping.items())
for item_key, item_value in mapping:
node_key = self.represent_data(item_key)
node_value = self.represent_data(item_value)
if not (isinstance(node_key, yaml.ScalarNode) and not
node_key.style):
best_style = False
if not (isinstance(node_value, yaml.ScalarNode) and not
node_value.style):
best_style = False
value.append((node_key, node_value))
if flow_style is None:
if self.default_flow_style is not None:
node.flow_style = self.default_flow_style
else:
node.flow_style = best_style
return node

+ 176
- 0
earwigbot/config/permissions.py View File

@@ -0,0 +1,176 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from fnmatch import fnmatch
import sqlite3 as sqlite
from threading import Lock

__all__ = ["PermissionsDB"]

class PermissionsDB(object):
"""
**EarwigBot: Permissions Database Manager**

Controls the :file:`permissions.db` file, which stores the bot's owners and
admins for the purposes of using certain dangerous IRC commands.
"""
ADMIN = 1
OWNER = 2

def __init__(self, dbfile):
self._dbfile = dbfile
self._db_access_lock = Lock()
self._data = {}

def __repr__(self):
"""Return the canonical string representation of the PermissionsDB."""
res = "PermissionsDB(dbfile={0!r})"
return res.format(self._dbfile)

def __str__(self):
"""Return a nice string representation of the PermissionsDB."""
return "<PermissionsDB at {0}>".format(self._dbfile)

def _create(self, conn):
"""Initialize the permissions database with its necessary tables."""
query = """CREATE TABLE users (user_nick, user_ident, user_host,
user_rank)"""
conn.execute(query)

def _is_rank(self, user, rank):
"""Return True if the given user has the given rank, else False."""
try:
for rule in self._data[rank]:
if user in rule:
return rule
except KeyError:
pass
return False

def _set_rank(self, user, rank):
"""Add a User to the database under a given rank."""
query = "INSERT INTO users VALUES (?, ?, ?, ?)"
with self._db_access_lock:
with sqlite.connect(self._dbfile) as conn:
conn.execute(query, (user.nick, user.ident, user.host, rank))
try:
self._data[rank].append(user)
except KeyError:
self._data[rank] = [user]
return user

def _del_rank(self, user, rank):
"""Remove a User from the database."""
query = """DELETE FROM users WHERE user_nick = ? AND user_ident = ? AND
user_host = ? AND user_rank = ?"""
with self._db_access_lock:
try:
for rule in self._data[rank]:
if user in rule:
with sqlite.connect(self._dbfile) as conn:
args = (user.nick, user.ident, user.host, rank)
conn.execute(query, args)
self._data[rank].remove(rule)
return rule
except KeyError:
pass
return None

@property
def data(self):
"""A dict of all entries in the permissions database."""
return self._data

def load(self):
"""Load permissions from an existing database, or create a new one."""
query = "SELECT user_nick, user_ident, user_host, user_rank FROM users"
self._data = {}
with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
try:
for nick, ident, host, rank in conn.execute(query):
try:
self._data[rank].append(_User(nick, ident, host))
except KeyError:
self._data[rank] = [_User(nick, ident, host)]
except sqlite.OperationalError:
self._create(conn)

def has_exact(self, rank, nick="*", ident="*", host="*"):
"""Return ``True`` if there is an exact match for this rule."""
try:
for usr in self._data[rank]:
if nick != usr.nick or ident != usr.ident or host != usr.host:
continue
return usr
except KeyError:
pass
return False

def is_admin(self, data):
"""Return ``True`` if the given user is a bot admin, else ``False``."""
user = _User(data.nick, data.ident, data.host)
return self._is_rank(user, rank=self.ADMIN)

def is_owner(self, data):
"""Return ``True`` if the given user is a bot owner, else ``False``."""
user = _User(data.nick, data.ident, data.host)
return self._is_rank(user, rank=self.OWNER)

def add_admin(self, nick="*", ident="*", host="*"):
"""Add a nick/ident/host combo to the bot admins list."""
return self._set_rank(_User(nick, ident, host), rank=self.ADMIN)

def add_owner(self, nick="*", ident="*", host="*"):
"""Add a nick/ident/host combo to the bot owners list."""
return self._set_rank(_User(nick, ident, host), rank=self.OWNER)

def remove_admin(self, nick="*", ident="*", host="*"):
"""Remove a nick/ident/host combo to the bot admins list."""
return self._del_rank(_User(nick, ident, host), rank=self.ADMIN)

def remove_owner(self, nick="*", ident="*", host="*"):
"""Remove a nick/ident/host combo to the bot owners list."""
return self._del_rank(_User(nick, ident, host), rank=self.OWNER)


class _User(object):
"""A class that represents an IRC user for the purpose of testing rules."""
def __init__(self, nick, ident, host):
self.nick = nick
self.ident = ident
self.host = host

def __repr__(self):
"""Return the canonical string representation of the User."""
res = "_User(nick={0!r}, ident={1!r}, host={2!r})"
return res.format(self.nick, self.ident, self.host)

def __str__(self):
"""Return a nice string representation of the User."""
return "{0}!{1}@{2}".format(self.nick, self.ident, self.host)

def __contains__(self, user):
if fnmatch(user.nick, self.nick):
if fnmatch(user.ident, self.ident):
if fnmatch(user.host, self.host):
return True
return False

+ 446
- 0
earwigbot/config/script.py View File

@@ -0,0 +1,446 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from collections import OrderedDict
from getpass import getpass
from hashlib import sha256
from os import chmod, mkdir, path
import re
import stat
import sys
from textwrap import fill, wrap

from Crypto.Cipher import Blowfish
import bcrypt
import yaml

from earwigbot import exceptions
from earwigbot.config.ordered_yaml import OrderedDumper

__all__ = ["ConfigScript"]

RULES_TEMPLATE = """# -*- coding: utf-8 -*-

def process(bot, rc):
\"\"\"Given a Bot() object and an RC() object, return a list of channels
to report this event to. Also, start any wiki bot tasks within this
function if necessary.\"\"\"
pass
"""

class ConfigScript(object):
"""A script to guide a user through the creation of a new config file."""
WIDTH = 79
PROMPT = "\x1b[32m> \x1b[0m"
BCRYPT_ROUNDS = 12

def __init__(self, config):
self.config = config
self.data = OrderedDict([
("metadata", OrderedDict()),
("components", OrderedDict()),
("wiki", OrderedDict()),
("irc", OrderedDict()),
("commands", OrderedDict()),
("tasks", OrderedDict()),
("schedule", [])
])

self._cipher = None
self._wmf = False
self._proj = None
self._lang = None

def _print(self, text):
print fill(re.sub("\s\s+", " ", text), self.WIDTH)

def _print_no_nl(self, text):
sys.stdout.write(fill(re.sub("\s\s+", " ", text), self.WIDTH))
sys.stdout.flush()

def _pause(self):
raw_input(self.PROMPT + "Press enter to continue: ")

def _ask(self, text, default=None, require=True):
text = self.PROMPT + text
if default:
text += " \x1b[33m[{0}]\x1b[0m".format(default)
lines = wrap(re.sub("\s\s+", " ", text), self.WIDTH)
if len(lines) > 1:
print "\n".join(lines[:-1])
while True:
answer = raw_input(lines[-1] + " ") or default
if answer or not require:
return answer

def _ask_bool(self, text, default=True):
text = self.PROMPT + text
if default:
text += " \x1b[33m[Y/n]\x1b[0m"
else:
text += " \x1b[33m[y/N]\x1b[0m"
lines = wrap(re.sub("\s\s+", " ", text), self.WIDTH)
if len(lines) > 1:
print "\n".join(lines[:-1])
while True:
answer = raw_input(lines[-1] + " ").lower()
if not answer:
return default
if answer.startswith("y"):
return True
if answer.startswith("n"):
return False

def _ask_pass(self, text, encrypt=True):
password = getpass(self.PROMPT + text + " ")
if encrypt:
return self._encrypt(password)
return password

def _encrypt(self, password):
if self._cipher:
mod = len(password) % 8
if mod:
password = password.ljust(len(password) + (8 - mod), "\x00")
return self._cipher.encrypt(password).encode("hex")
else:
return password

def _ask_list(self, text):
print fill(re.sub("\s\s+", " ", self.PROMPT + text), self.WIDTH)
print "[one item per line; blank line to end]:"
result = []
while True:
line = raw_input(self.PROMPT)
if line:
result.append(line)
else:
return result

def _set_metadata(self):
print
self.data["metadata"] = OrderedDict([("version", 1)])
self._print("""I can encrypt passwords stored in your config file in
addition to preventing other users on your system from
reading the file. Encryption is recommended if the bot
is to run on a public computer like the Toolserver, but
otherwise the need to enter a key everytime you start
the bot may be annoying.""")
if self._ask_bool("Encrypt stored passwords?"):
self.data["metadata"]["encryptPasswords"] = True
key = getpass(self.PROMPT + "Enter an encryption key: ")
msg = "Running {0} rounds of bcrypt...".format(self.BCRYPT_ROUNDS)
self._print_no_nl(msg)
signature = bcrypt.hashpw(key, bcrypt.gensalt(self.BCRYPT_ROUNDS))
self.data["metadata"]["signature"] = signature
self._cipher = Blowfish.new(sha256(key).digest())
print " done."
else:
self.data["metadata"]["encryptPasswords"] = False

print
self._print("""The bot can temporarily store its logs in the logs/
subdirectory. Error logs are kept for a month whereas
normal logs are kept for a week. If you disable this,
the bot will still print logs to stdout.""")
logging = self._ask_bool("Enable logging?")
self.data["metadata"]["enableLogging"] = logging

def _set_components(self):
print
self._print("""The bot contains three separate components that can run
independently of each other.""")
self._print("""- The IRC front-end runs on a normal IRC server, like
freenode, and expects users to interact with it through
commands.""")
self._print("""- The IRC watcher runs on a wiki recent-changes server,
like irc.wikimedia.org, and listens for edits. Users
cannot interact with this component. It can detect
specific events and report them to "feed" channels on
the front-end or start bot tasks.""")
self._print("""- The wiki task scheduler runs wiki-editing bot tasks in
separate threads at user-defined times through a
cron-like interface. Tasks which are not scheduled can
be started by the IRC watcher manually through the IRC
front-end.""")
frontend = self._ask_bool("Enable the IRC front-end?")
watcher = self._ask_bool("Enable the IRC watcher?")
scheduler = self._ask_bool("Enable the wiki task scheduler?")
self.data["components"]["irc_frontend"] = frontend
self.data["components"]["irc_watcher"] = watcher
self.data["components"]["wiki_scheduler"] = scheduler

def _login(self, kwargs):
self.config.wiki._load(self.data["wiki"])
self._print_no_nl("Trying to connect to the site...")
try:
site = self.config.bot.wiki.add_site(**kwargs)
except exceptions.APIError as exc:
print " API error!"
print "\x1b[31m" + exc.message + "\x1b[0m"
question = "Would you like to re-enter the site information?"
if self._ask_bool(question):
return self._set_wiki()
question = "This will cancel the setup process. Are you sure?"
if self._ask_bool(question, default=False):
raise exceptions.NoConfigError()
return self._set_wiki()
except exceptions.LoginError as exc:
print " login error!"
print "\x1b[31m" + exc.message + "\x1b[0m"
question = "Would you like to re-enter your login information?"
if self._ask_bool(question):
self.data["wiki"]["username"] = self._ask("Bot username:")
password = self._ask_pass("Bot password:", encrypt=False)
self.data["wiki"]["password"] = password
return self._login(kwargs)
else:
password = self.data["wiki"]["password"]
question = "Would you like to re-enter the site information?"
if self._ask_bool(question):
return self._set_wiki()
print
self._print("""Moving on. You can modify the login information
stored in the bot's config in the future.""")
self.data["wiki"]["password"] = None # Clear so we don't login
self.config.wiki._load(self.data["wiki"])
self._print_no_nl("Trying to connect to the site...")
site = self.config.bot.wiki.add_site(**kwargs)
print " success."
self.data["wiki"]["password"] = password # Reset original value
else:
print " success."

# Remember to store the encrypted password:
password = self._encrypt(self.data["wiki"]["password"])
self.data["wiki"]["password"] = password
return site

def _set_wiki(self):
print
self._wmf = self._ask_bool("""Will this bot run on Wikimedia Foundation
wikis, like Wikipedia?""")
if self._wmf:
msg = "Site project (e.g. 'wikipedia', 'wiktionary', 'wikimedia'):"
self._proj = project = self._ask(msg, "wikipedia").lower()
msg = "Site language code (e.g. 'en', 'fr', 'commons'):"
self._lang = lang = self._ask(msg, "en").lower()
kwargs = {"project": project, "lang": lang}
else:
msg = "Site base URL, without the script path and trailing slash;"
msg += " can be protocol-insensitive (e.g. '//en.wikipedia.org'):"
url = self._ask(msg)
script = self._ask("Site script path:", "/w")
kwargs = {"base_url": url, "script_path": script}

self.data["wiki"]["username"] = self._ask("Bot username:")
password = self._ask_pass("Bot password:", encrypt=False)
self.data["wiki"]["password"] = password
self.data["wiki"]["userAgent"] = "EarwigBot/$1 (Python/$2; https://github.com/earwig/earwigbot)"
self.data["wiki"]["summary"] = "([[WP:BOT|Bot]]): $2"
self.data["wiki"]["useHTTPS"] = True
self.data["wiki"]["assert"] = "user"
self.data["wiki"]["maxlag"] = 10
self.data["wiki"]["waitTime"] = 2
self.data["wiki"]["defaultSite"] = self._login(kwargs).name
self.data["wiki"]["sql"] = {}

if self._wmf:
msg = "Will this bot run from the Wikimedia Toolserver?"
toolserver = self._ask_bool(msg, default=False)
if toolserver:
args = [("host", "$1-p.rrdb.toolserver.org"), ("db", "$1_p")]
self.data["wiki"]["sql"] = OrderedDict(args)

self.data["wiki"]["shutoff"] = {}
msg = "Would you like to enable an automatic shutoff page for the bot?"
if self._ask_bool(msg):
print
self._print("""The page title can contain two wildcards: $1 will be
substituted with the bot's username, and $2 with the
current task number. This can be used to implement a
separate shutoff page for each task.""")
page = self._ask("Page title:", "User:$1/Shutoff")
msg = "Page content to indicate the bot is *not* shut off:"
disabled = self._ask(msg, "run")
args = [("page", page), ("disabled", disabled)]
self.data["wiki"]["shutoff"] = OrderedDict(args)

self.data["wiki"]["search"] = {}

def _set_irc(self):
if self.data["components"]["irc_frontend"]:
print
frontend = self.data["irc"]["frontend"] = OrderedDict()
msg = "Hostname of the frontend's IRC server, without 'irc://':"
frontend["host"] = self._ask(msg, "irc.freenode.net")
frontend["port"] = self._ask("Frontend port:", 6667)
frontend["nick"] = self._ask("Frontend bot's nickname:")
frontend["ident"] = self._ask("Frontend bot's ident:",
frontend["nick"].lower())
question = "Frontend bot's real name (gecos):"
frontend["realname"] = self._ask(question, "EarwigBot")
if self._ask_bool("Should the bot identify to NickServ?"):
ns_user = self._ask("NickServ username:", frontend["nick"])
ns_pass = self._ask_pass("Nickserv password:")
frontend["nickservUsername"] = ns_user
frontend["nickservPassword"] = ns_pass
chan_question = "Frontend channels to join by default:"
frontend["channels"] = self._ask_list(chan_question)
print
self._print("""The bot keeps a database of its admins (users who
can use certain sensitive commands) and owners
(users who can quit the bot and modify its access
list), identified by nick, ident, and/or hostname.
Hostname is the most secure option since it cannot
be easily spoofed. If you have a cloak, this will
probably look like 'wikipedia/Username' or
'unaffiliated/nickname'.""")
host = self._ask("Your hostname on the frontend:", require=False)
if host:
permdb = self.config._permissions
permdb.load()
permdb.add_owner(host=host)
permdb.add_admin(host=host)
else:
frontend = {}

if self.data["components"]["irc_watcher"]:
print
watcher = self.data["irc"]["watcher"] = OrderedDict()
if self._wmf:
watcher["host"] = "irc.wikimedia.org"
watcher["port"] = 6667
else:
msg = "Hostname of the watcher's IRC server, without 'irc://':"
watcher["host"] = self._ask(msg)
watcher["port"] = self._ask("Watcher port:", 6667)
nick = self._ask("Watcher bot's nickname:", frontend.get("nick"))
ident = self._ask("Watcher bot's ident:", nick.lower())
watcher["nick"] = nick
watcher["ident"] = ident
question = "Watcher bot's real name (gecos):"
default = frontend.get("realname", "EarwigBot")
watcher["realname"] = self._ask(question, default)
watcher_ns = "Should the bot identify to NickServ?"
if not self._wmf and self._ask_bool(watcher_ns):
ns_user = self._ask("NickServ username:", watcher["nick"])
ns_pass = self._ask_pass("Nickserv password:")
watcher["nickservUsername"] = ns_user
watcher["nickservPassword"] = ns_pass
if self._wmf:
chan = "#{0}.{1}".format(self._lang, self._proj)
watcher["channels"] = [chan]
else:
chan_question = "Watcher channels to join by default:"
watcher["channels"] = self._ask_list(chan_question)
print
self._print("""I am now creating a blank 'rules.py' file, which
will determine how the bot handles messages received
from the IRC watcher. It contains a process()
function that takes a Bot object (allowing you to
start tasks) and an RC object (storing the message
from the watcher). See the documentation for
details.""")
with open(path.join(self.config.root_dir, "rules.py"), "w") as fp:
fp.write(RULES_TEMPLATE)
self._pause()

self.data["irc"]["version"] = "EarwigBot - $1 - Python/$2 https://github.com/earwig/earwigbot"

def _set_commands(self):
print
msg = """Would you like to disable the default IRC commands? You can
fine-tune which commands are disabled later on."""
if (not self.data["components"]["irc_frontend"] or
self._ask_bool(msg, default=False)):
self.data["commands"]["disable"] = True
print
self._print("""I am now creating the 'commands/' directory, where you
can place custom IRC commands and plugins. Creating your
own commands is described in the documentation.""")
mkdir(path.join(self.config.root_dir, "commands"))
self._pause()

def _set_tasks(self):
print
self._print("""I am now creating the 'tasks/' directory, where you can
place custom bot tasks and plugins. Creating your own
tasks is described in the documentation.""")
mkdir(path.join(self.config.root_dir, "tasks"))
self._pause()

def _set_schedule(self):
print
self._print("""The final section of your config file, 'schedule', is a
list of bot tasks to be started by the wiki scheduler.
Each entry contains cron-like time quantifiers and a
list of tasks. For example, the following starts the
'foobot' task every hour on the half-hour:""")
print "\x1b[33mschedule:"
print " - minute: 30"
print " tasks:"
print " - foobot\x1b[0m"
self._print("""The following starts the 'barbot' task with the keyword
arguments 'action="baz"' every Monday at 05:00 UTC:""")
print "\x1b[33m - week_day: 1"
print " hour: 5"
print " tasks:"
print ' - ["barbot", {"action": "baz"}]\x1b[0m'
self._print("""The full list of quantifiers is minute, hour, month_day,
month, and week_day. See the documentation for more
information.""")
self._pause()

def _save(self):
with open(self.config.path, "w") as stream:
yaml.dump(self.data, stream, OrderedDumper, indent=4,
allow_unicode=True, default_flow_style=False)

def make_new(self):
"""Make a new config file based on the user's input."""
try:
open(self.config.path, "w").close()
chmod(self.config.path, stat.S_IRUSR|stat.S_IWUSR)
except IOError:
print "I can't seem to write to the config file:"
raise
self._set_metadata()
self._set_components()
self._set_wiki()
components = self.data["components"]
if components["irc_frontend"] or components["irc_watcher"]:
self._set_irc()
self._set_commands()
self._set_tasks()
if components["wiki_scheduler"]:
self._set_schedule()
print
self._print("""I am now saving config.yml with your settings. YAML is a
relatively straightforward format and you should be able
to update these settings in the future when necessary.
I will start the bot at your signal. Feel free to
contact me at wikipedia.earwig@gmail.com if you have any
questions.""")
self._save()
if not self._ask_bool("Start the bot now?"):
exit()

+ 256
- 0
earwigbot/exceptions.py View File

@@ -0,0 +1,256 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""
**EarwigBot: Exceptions**

This module contains all exceptions used by EarwigBot::

EarwigBotError
+-- NoConfigError
+-- IRCError
| +-- BrokenSocketError
+-- WikiToolsetError
+-- SiteNotFoundError
+-- ServiceError
| +-- APIError
| +-- SQLError
+-- NoServiceError
+-- LoginError
+-- NamespaceNotFoundError
+-- PageNotFoundError
+-- InvalidPageError
+-- RedirectError
+-- UserNotFoundError
+-- EditError
| +-- PermissionsError
| +-- EditConflictError
| +-- NoContentError
| +-- ContentTooBigError
| +-- SpamDetectedError
| +-- FilteredError
+-- CopyvioCheckError
+-- UnknownSearchEngineError
+-- UnsupportedSearchEngineError
+-- SearchQueryError
"""

class EarwigBotError(Exception):
"""Base exception class for errors in EarwigBot."""

class NoConfigError(EarwigBotError):
"""The bot cannot be run without a config file.

This occurs if no config file exists, and the user said they did not want
one to be created.
"""

class IRCError(EarwigBotError):
"""Base exception class for errors in IRC-relation sections of the bot."""

class BrokenSocketError(IRCError):
"""A socket has broken, because it is not sending data.

Raised by :py:meth:`IRCConnection._get
<earwigbot.irc.connection.IRCConnection._get>`.
"""

class WikiToolsetError(EarwigBotError):
"""Base exception class for errors in the Wiki Toolset."""

class SiteNotFoundError(WikiToolsetError):
"""A particular site could not be found in the sites database.

Raised by :py:class:`~earwigbot.wiki.sitesdb.SitesDB`.
"""

class ServiceError(WikiToolsetError):
"""Base exception class for an error within a service (the API or SQL).

This is caught by :py:meth:`Site.delegate
<earwigbot.wiki.site.Site.delegate>` to indicate a service is
non-functional so another, less-preferred one can be tried.
"""

class APIError(ServiceError):
"""Couldn't connect to a site's API.

Perhaps the server doesn't exist, our URL is wrong or incomplete, or
there are temporary problems on their end.

Raised by :py:meth:`Site.api_query <earwigbot.wiki.site.Site.api_query>`.
"""

class SQLError(ServiceError):
"""Some error involving SQL querying occurred.

Raised by :py:meth:`Site.sql_query <earwigbot.wiki.site.Site.sql_query>`.
"""

class NoServiceError(WikiToolsetError):
"""No service is functioning to handle a specific task.

Raised by :py:meth:`Site.delegate <earwigbot.wiki.site.Site.delegate>`.
"""

class LoginError(WikiToolsetError):
"""An error occured while trying to login.

Perhaps the username/password is incorrect.

Raised by :py:meth:`Site._login <earwigbot.wiki.site.Site._login>`.
"""

class NamespaceNotFoundError(WikiToolsetError):
"""A requested namespace name or namespace ID does not exist.

Raised by :py:meth:`Site.namespace_id_to_name
<earwigbot.wiki.site.Site.namespace_id_to_name>` and
:py:meth:`Site.namespace_name_to_id
<earwigbot.wiki.site.Site.namespace_name_to_id>`.
"""

class PageNotFoundError(WikiToolsetError):
"""Attempted to get information about a page that does not exist.

Raised by :py:class:`~earwigbot.wiki.page.Page`.
"""

class InvalidPageError(WikiToolsetError):
"""Attempted to get information about a page whose title is invalid.

Raised by :py:class:`~earwigbot.wiki.page.Page`.
"""

class RedirectError(WikiToolsetError):
"""A redirect-only method was called on a malformed or non-redirect page.

Raised by :py:meth:`Page.get_redirect_target
<earwigbot.wiki.page.Page.get_redirect_target>`.
"""

class UserNotFoundError(WikiToolsetError):
"""Attempted to get certain information about a user that does not exist.

Raised by :py:class:`~earwigbot.wiki.user.User`.
"""

class EditError(WikiToolsetError):
"""An error occured while editing.

This is used as a base class for all editing errors; this one specifically
is used only when a generic error occurs that we don't know about.

Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`.
"""

class PermissionsError(EditError):
"""A permissions error ocurred while editing.

We tried to do something we don't have permission to, like trying to delete
a page as a non-admin, or trying to edit a page without login information
and AssertEdit enabled.

Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`.
"""

class EditConflictError(EditError):
"""We gotten an edit conflict or a (rarer) delete/recreate conflict.

Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`.
"""

class NoContentError(EditError):
"""We tried to create a page or new section with no content.

Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`.
"""

class ContentTooBigError(EditError):
"""The edit we tried to push exceeded the article size limit.

Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`.
"""

class SpamDetectedError(EditError):
"""The spam filter refused our edit.

Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`.
"""

class FilteredError(EditError):
"""The edit filter refused our edit.

Raised by :py:meth:`Page.edit <earwigbot.wiki.page.Page.edit>` and
:py:meth:`Page.add_section <earwigbot.wiki.page.Page.add_section>`.
"""

class CopyvioCheckError(WikiToolsetError):
"""An error occured when checking a page for copyright violations.

This is a base class for multiple exceptions; usually one of those will be
raised instead of this.

Raised by :py:meth:`Page.copyvio_check
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>` and
:py:meth:`Page.copyvio_compare
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`.
"""

class UnknownSearchEngineError(CopyvioCheckError):
"""Attempted to do a copyvio check with an unknown search engine.

Search engines are specified in :file:`config.yml` as
:py:attr:`config.wiki["search"]["engine"]`.

Raised by :py:meth:`Page.copyvio_check
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>` and
:py:meth:`Page.copyvio_compare
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`.
"""

class UnsupportedSearchEngineError(CopyvioCheckError):
"""Attmpted to do a copyvio check using an unavailable engine.

This might occur if, for example, an engine requires oauth2 but the package
couldn't be imported.

Raised by :py:meth:`Page.copyvio_check
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>` and
:py:meth:`Page.copyvio_compare
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`.
"""

class SearchQueryError(CopyvioCheckError):
"""Some error ocurred while doing a search query.

Raised by :py:meth:`Page.copyvio_check
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_check>` and
:py:meth:`Page.copyvio_compare
<earwigbot.wiki.copyvios.CopyvioMixIn.copyvio_compare>`.
"""

+ 27
- 0
earwigbot/irc/__init__.py View File

@@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot.irc.connection import *
from earwigbot.irc.data import *
from earwigbot.irc.frontend import *
from earwigbot.irc.rc import *
from earwigbot.irc.watcher import *

+ 259
- 0
earwigbot/irc/connection.py View File

@@ -0,0 +1,259 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import socket
from threading import Lock
from time import sleep, time

from earwigbot.exceptions import BrokenSocketError

__all__ = ["IRCConnection"]

class IRCConnection(object):
"""Interface with an IRC server."""

def __init__(self, host, port, nick, ident, realname, logger):
self._host = host
self._port = port
self._nick = nick
self._ident = ident
self._realname = realname
self.logger = logger

self._is_running = False
self._send_lock = Lock()

self._last_recv = time()
self._last_send = 0
self._last_ping = 0

def __repr__(self):
"""Return the canonical string representation of the IRCConnection."""
res = "IRCConnection(host={0!r}, port={1!r}, nick={2!r}, ident={3!r}, realname={4!r})"
return res.format(self.host, self.port, self.nick, self.ident,
self.realname)

def __str__(self):
"""Return a nice string representation of the IRCConnection."""
res = "<IRCConnection {0}!{1} at {2}:{3}>"
return res.format(self.nick, self.ident, self.host, self.port)

def _connect(self):
"""Connect to our IRC server."""
self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
self._sock.connect((self.host, self.port))
except socket.error:
self.logger.exception("Couldn't connect to IRC server; retrying")
sleep(8)
self._connect()
self._send("NICK {0}".format(self.nick))
self._send("USER {0} {1} * :{2}".format(self.ident, self.host, self.realname))

def _close(self):
"""Completely close our connection with the IRC server."""
try:
self._sock.shutdown(socket.SHUT_RDWR) # Shut down connection first
except socket.error:
pass # Ignore if the socket is already down
self._sock.close()

def _get(self, size=4096):
"""Receive (i.e. get) data from the server."""
data = self._sock.recv(size)
if not data:
# Socket isn't giving us any data, so it is dead or broken:
raise BrokenSocketError()
return data

def _send(self, msg, hidelog=False):
"""Send data to the server."""
with self._send_lock:
time_since_last = time() - self._last_send
if time_since_last < 0.75:
sleep(0.75 - time_since_last)
try:
self._sock.sendall(msg + "\r\n")
except socket.error:
self._is_running = False
else:
if not hidelog:
self.logger.debug(msg)
self._last_send = time()

def _split(self, msgs, maxlen, maxsplits=3):
"""Split a large message into multiple messages smaller than maxlen."""
words = msgs.split(" ")
splits = 0
while words and splits < maxsplits:
splits += 1
if len(words[0]) > maxlen:
word = words.pop(0)
yield word[:maxlen]
words.insert(0, word[maxlen:])
else:
msg = []
while words and len(" ".join(msg + [words[0]])) <= maxlen:
msg.append(words.pop(0))
yield " ".join(msg)

def _quit(self, msg=None):
"""Issue a quit message to the server. Doesn't close the connection."""
if msg:
self._send("QUIT :{0}".format(msg))
else:
self._send("QUIT")

def _process_defaults(self, line):
"""Default process hooks for lines received on IRC."""
self._last_recv = time()
if line[0] == "PING": # If we are pinged, pong back
self.pong(line[1][1:])

def _process_message(self, line):
"""To be overridden in subclasses."""
raise NotImplementedError()

@property
def host(self):
"""The hostname of the IRC server, like ``"irc.freenode.net"``."""
return self._host

@property
def port(self):
"""The port of the IRC server, like ``6667``."""
return self._port

@property
def nick(self):
"""Our nickname on the server, like ``"EarwigBot"``."""
return self._nick

@property
def ident(self):
"""Our ident on the server, like ``"earwig"``.

See http://en.wikipedia.org/wiki/Ident.
"""
return self._ident

@property
def realname(self):
"""Our realname (gecos field) on the server."""
return self._realname

def say(self, target, msg, hidelog=False):
"""Send a private message to a target on the server."""
for msg in self._split(msg, 400):
msg = "PRIVMSG {0} :{1}".format(target, msg)
self._send(msg, hidelog)

def reply(self, data, msg, hidelog=False):
"""Send a private message as a reply to a user on the server."""
if data.is_private:
self.say(data.chan, msg, hidelog)
else:
msg = "\x02{0}\x0F: {1}".format(data.nick, msg)
self.say(data.chan, msg, hidelog)

def action(self, target, msg, hidelog=False):
"""Send a private message to a target on the server as an action."""
msg = "\x01ACTION {0}\x01".format(msg)
self.say(target, msg, hidelog)

def notice(self, target, msg, hidelog=False):
"""Send a notice to a target on the server."""
for msg in self._split(msg, 400):
msg = "NOTICE {0} :{1}".format(target, msg)
self._send(msg, hidelog)

def join(self, chan, hidelog=False):
"""Join a channel on the server."""
msg = "JOIN {0}".format(chan)
self._send(msg, hidelog)

def part(self, chan, msg=None, hidelog=False):
"""Part from a channel on the server, optionally using an message."""
if msg:
self._send("PART {0} :{1}".format(chan, msg), hidelog)
else:
self._send("PART {0}".format(chan), hidelog)

def mode(self, target, level, msg, hidelog=False):
"""Send a mode message to the server."""
msg = "MODE {0} {1} {2}".format(target, level, msg)
self._send(msg, hidelog)

def ping(self, target, hidelog=False):
"""Ping another entity on the server."""
msg = "PING {0}".format(target)
self._send(msg, hidelog)

def pong(self, target, hidelog=False):
"""Pong another entity on the server."""
msg = "PONG {0}".format(target)
self._send(msg, hidelog)

def loop(self):
"""Main loop for the IRC connection."""
self._is_running = True
read_buffer = ""
while 1:
try:
read_buffer += self._get()
except BrokenSocketError:
self._is_running = False
break

lines = read_buffer.split("\n")
read_buffer = lines.pop()
for line in lines:
line = line.strip().split()
self._process_defaults(line)
self._process_message(line)
if self.is_stopped():
break

self._close()

def keep_alive(self):
"""Ensure that we stay connected, stopping if the connection breaks."""
now = time()
if now - self._last_recv > 120:
if self._last_ping < self._last_recv:
log = "Last message was received over 120 seconds ago. Pinging."
self.logger.debug(log)
self.ping(self.host)
self._last_ping = now
elif now - self._last_ping > 60:
self.logger.debug("No ping response in 60 seconds. Stopping.")
self.stop()

def stop(self, msg=None):
"""Request the IRC connection to close at earliest convenience."""
if self._is_running:
self._quit(msg)
self._is_running = False

def is_stopped(self):
"""Return whether the IRC connection has been (or is to be) closed."""
return not self._is_running

+ 212
- 0
earwigbot/irc/data.py View File

@@ -0,0 +1,212 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re

__all__ = ["Data"]

class Data(object):
"""Store data from an individual line received on IRC."""

def __init__(self, bot, my_nick, line, msgtype):
self._bot = bot
self._my_nick = my_nick.lower()
self._line = line

self._is_private = self._is_command = False
self._msg = self._command = self._trigger = None
self._args = []
self._kwargs = {}

self._parse(msgtype)

def __repr__(self):
"""Return the canonical string representation of the Data."""
res = "Data(bot={0!r}, my_nick={1!r}, line={2!r})"
return res.format(self._bot, self.my_nick, self.line)

def __str__(self):
"""Return a nice string representation of the Data."""
return "<Data of {0!r}>".format(" ".join(self.line))

def _parse(self, msgtype):
"""Parse a line from IRC into its components as instance attributes."""
sender = re.findall(r":(.*?)!(.*?)@(.*?)\Z", self.line[0])[0]
self._nick, self._ident, self._host = sender
self._chan = self.line[2]

if msgtype == "PRIVMSG":
if self.chan.lower() == self.my_nick:
# This is a privmsg to us, so set 'chan' as the nick of the
# sender instead of the 'channel', which is ourselves:
self._chan = self._nick
self._is_private = True
self._msg = " ".join(self.line[3:])[1:]
self._parse_args()
self._parse_kwargs()

def _parse_args(self):
"""Parse command arguments from the message.

self.msg is converted into the string self.command and the argument
list self.args if the message starts with a "trigger" ("!", ".", or the
bot's name); self.is_command will be set to True, and self.trigger will
store the trigger string. Otherwise, is_command will be set to False.
"""
self._args = self.msg.strip().split()

try:
self._command = self.args.pop(0).lower()
except IndexError:
return

if self.command.startswith("!") or self.command.startswith("."):
# e.g. "!command arg1 arg2"
self._is_command = True
self._trigger = self.command[0]
self._command = self.command[1:] # Strip the "!" or "."
elif re.match(r"{0}\W*?$".format(re.escape(self.my_nick)),
self.command, re.U):
# e.g. "EarwigBot, command arg1 arg2"
self._is_command = True
self._trigger = self.my_nick
try:
self._command = self.args.pop(0).lower()
except IndexError:
self._command = ""
else:
try:
if self.msg[-1] == "." and self.msg[-2] != ".":
if self.args:
self.args[-1] = self.args[-1][:-1]
else:
self._command = self.command[:-1]
except IndexError:
pass

def _parse_kwargs(self):
"""Parse keyword arguments embedded in self.args.

Parse a command given as "!command key1=value1 key2=value2..." into a
dict, self.kwargs, like {'key1': 'value2', 'key2': 'value2'...}.
"""
for arg in self.args:
try:
key, value = re.findall(r"^(.*?)\=(.*?)$", arg)[0]
except IndexError:
continue
if key and value:
self.kwargs[key] = value

@property
def my_nick(self):
"""Our nickname, *not* the nickname of the sender."""
return self._my_nick

@property
def line(self):
"""The full message received on IRC, including escape characters."""
return self._line

@property
def chan(self):
"""Channel the message was sent from.

This will be equal to :py:attr:`nick` if the message is a private
message.
"""
return self._chan

@property
def nick(self):
"""Nickname of the sender."""
return self._nick

@property
def ident(self):
"""`Ident <http://en.wikipedia.org/wiki/Ident>`_ of the sender."""
return self._ident

@property
def host(self):
"""Hostname of the sender."""
return self._host

@property
def msg(self):
"""Text of the sent message, if it is a message, else ``None``."""
return self._msg

@property
def is_private(self):
"""``True`` if this message was sent to us *only*, else ``False``."""
return self._is_private

@property
def is_command(self):
"""Boolean telling whether or not this message is a bot command.

A message is considered a command if and only if it begins with the
character ``"!"``, ``"."``, or the bot's name followed by optional
punctuation and a space (so ``EarwigBot: do something``, ``EarwigBot,
do something``, and ``EarwigBot do something`` are all valid).
"""
return self._is_command

@property
def command(self):
"""If the message is a command, this is the name of the command used.

See :py:attr:`is_command <self.is_command>` for when a message is
considered a command. If it's not a command, this will be set to
``None``.
"""
return self._command

@property
def trigger(self):
"""If this message is a command, this is what triggered it.

It can be either "!" (``"!help"``), "." (``".help"``), or the bot's
name (``"EarwigBot: help"``). Otherwise, it will be ``None``."""
return self._trigger

@property
def args(self):
"""List of all arguments given to this command.

For example, the message ``"!command arg1 arg2 arg3=val3"`` will
produce the args ``["arg1", "arg2", "arg3=val3"]``. This is empty if
the message was not a command or if it doesn't have arguments.
"""
return self._args

@property
def kwargs(self):
"""Dictionary of keyword arguments given to this command.

For example, the message ``"!command arg1=val1 arg2=val2"`` will
produce the kwargs ``{"arg1": "val1", "arg2": "val2"}``. This is empty
if the message was not a command or if it doesn't have keyword
arguments.
"""
return self._kwargs

+ 86
- 0
earwigbot/irc/frontend.py View File

@@ -0,0 +1,86 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot.irc import IRCConnection, Data

__all__ = ["Frontend"]

class Frontend(IRCConnection):
"""
**EarwigBot: IRC Frontend Component**

The IRC frontend runs on a normal IRC server and expects users to interact
with it and give it commands. Commands are stored as "command classes",
subclasses of :py:class:`~earwigbot.commands.Command`. All command classes
are automatically imported by :py:meth:`commands.load()
<earwigbot.managers._ResourceManager.load>` if they are in
:py:mod:`earwigbot.commands` or the bot's custom command directory
(explained in the :doc:`documentation </customizing>`).
"""

def __init__(self, bot):
self.bot = bot
cf = bot.config.irc["frontend"]
base = super(Frontend, self)
base.__init__(cf["host"], cf["port"], cf["nick"], cf["ident"],
cf["realname"], bot.logger.getChild("frontend"))
self._connect()

def __repr__(self):
"""Return the canonical string representation of the Frontend."""
res = "Frontend(host={0!r}, port={1!r}, nick={2!r}, ident={3!r}, realname={4!r}, bot={5!r})"
return res.format(self.host, self.port, self.nick, self.ident,
self.realname, self.bot)

def __str__(self):
"""Return a nice string representation of the Frontend."""
res = "<Frontend {0}!{1} at {2}:{3}>"
return res.format(self.nick, self.ident, self.host, self.port)

def _process_message(self, line):
"""Process a single message from IRC."""
if line[1] == "JOIN":
data = Data(self.bot, self.nick, line, msgtype="JOIN")
self.bot.commands.call("join", data)

elif line[1] == "PRIVMSG":
data = Data(self.bot, self.nick, line, msgtype="PRIVMSG")
if data.is_private:
self.bot.commands.call("msg_private", data)
else:
self.bot.commands.call("msg_public", data)
self.bot.commands.call("msg", data)

elif line[1] == "376": # On successful connection to the server
# If we're supposed to auth to NickServ, do that:
try:
username = self.bot.config.irc["frontend"]["nickservUsername"]
password = self.bot.config.irc["frontend"]["nickservPassword"]
except KeyError:
pass
else:
msg = "IDENTIFY {0} {1}".format(username, password)
self.say("NickServ", msg, hidelog=True)

# Join all of our startup channels:
for chan in self.bot.config.irc["frontend"]["channels"]:
self.join(chan)

+ 96
- 0
earwigbot/irc/rc.py View File

@@ -0,0 +1,96 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re

__all__ = ["RC"]

class RC(object):
"""Store data from an event received from our IRC watcher."""
re_color = re.compile("\x03([0-9]{1,2}(,[0-9]{1,2})?)?")
re_edit = re.compile("\A\[\[(.*?)\]\]\s(.*?)\s(http://.*?)\s\*\s(.*?)\s\*\s(.*?)\Z")
re_log = re.compile("\A\[\[(.*?)\]\]\s(.*?)\s\s\*\s(.*?)\s\*\s(.*?)\Z")

pretty_edit = "\x02New {0}\x0F: \x0314[[\x0307{1}\x0314]]\x0306 * \x0303{2}\x0306 * \x0302{3}\x0306 * \x0310{4}"
pretty_log = "\x02New {0}\x0F: \x0303{1}\x0306 * \x0302{2}\x0306 * \x0310{3}"

def __init__(self, chan, msg):
self.chan = chan
self.msg = msg

def __repr__(self):
"""Return the canonical string representation of the RC."""
return "RC(chan={0!r}, msg={1!r})".format(self.chan, self.msg)

def __str__(self):
"""Return a nice string representation of the RC."""
return "<RC of {0!r} on {1}>".format(self.msg, self.chan)

def parse(self):
"""Parse a recent change event into some variables."""
# Strip IRC color codes; we don't want or need 'em:
self.msg = self.re_color.sub("", self.msg).strip()
msg = self.msg
self.is_edit = True

# Flags: 'M' for minor edit, 'B' for bot edit, 'create' for a user
# creation log entry, etc:
try:
page, self.flags, url, user, comment = self.re_edit.findall(msg)[0]
except IndexError:
# We're probably missing the http:// part, because it's a log
# entry, which lacks a URL:
page, flags, user, comment = self.re_log.findall(msg)[0]
url = "http://{0}.org/wiki/{1}".format(self.chan[1:], page)

self.is_edit = False # This is a log entry, not edit

# Flags tends to have extra whitespace at the end when they're
# log entries:
self.flags = flags.strip()

self.page, self.url, self.user, self.comment = page, url, user, comment

def prettify(self):
"""Make a nice, colorful message to send back to the IRC front-end."""
flags = self.flags
if self.is_edit:
if "N" in flags:
event = "page" # "New page:"
else:
event = "edit" # "New edit:"
if "B" in flags:
event = "bot edit" # "New bot edit:"
if "M" in flags:
event = "minor " + event # "New minor (bot)? edit:"
return self.pretty_edit.format(event, self.page, self.user,
self.url, self.comment)

if flags == "delete":
event = "deletion" # "New deletion:"
elif flags == "protect":
event = "protection" # "New protection:"
elif flags == "create":
event = "user" # "New user:"
else:
event = flags # Works for "move", "block", etc
return self.pretty_log.format(event, self.user, self.url, self.comment)

+ 129
- 0
earwigbot/irc/watcher.py View File

@@ -0,0 +1,129 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import imp
import os

from earwigbot.irc import IRCConnection, RC

__all__ = ["Watcher"]

class Watcher(IRCConnection):
"""
**EarwigBot: IRC Watcher Component**

The IRC watcher runs on a wiki recent-changes server and listens for
edits. Users cannot interact with this part of the bot. When an event
occurs, we run it through some rules stored in our working directory under
:file:`rules.py`, which can result in wiki bot tasks being started or
messages being sent to channels on the IRC frontend.
"""

def __init__(self, bot):
self.bot = bot
cf = bot.config.irc["watcher"]
base = super(Watcher, self)
base.__init__(cf["host"], cf["port"], cf["nick"], cf["ident"],
cf["realname"], bot.logger.getChild("watcher"))
self._prepare_process_hook()
self._connect()

def __repr__(self):
"""Return the canonical string representation of the Watcher."""
res = "Watcher(host={0!r}, port={1!r}, nick={2!r}, ident={3!r}, realname={4!r}, bot={5!r})"
return res.format(self.host, self.port, self.nick, self.ident,
self.realname, self.bot)

def __str__(self):
"""Return a nice string representation of the Watcher."""
res = "<Watcher {0}!{1} at {2}:{3}>"
return res.format(self.nick, self.ident, self.host, self.port)

def _process_message(self, line):
"""Process a single message from IRC."""
if line[1] == "PRIVMSG":
chan = line[2]

# Ignore messages originating from channels not in our list, to
# prevent someone PMing us false data:
if chan not in self.bot.config.irc["watcher"]["channels"]:
return

msg = " ".join(line[3:])[1:]
rc = RC(chan, msg) # New RC object to store this event's data
rc.parse() # Parse a message into pagenames, usernames, etc.
self._process_rc_event(rc)

# When we've finished starting up, join all watcher channels:
elif line[1] == "376":
for chan in self.bot.config.irc["watcher"]["channels"]:
self.join(chan)

def _prepare_process_hook(self):
"""Create our RC event process hook from information in rules.py.

This will get put in the function self._process_hook, which takes the
Bot object and an RC object and returns a list of frontend channels to
report this event to.
"""
# Set a default RC process hook that does nothing:
self._process_hook = lambda bot, rc: ()

path = self.bot.config.root_dir
try:
f, path, desc = imp.find_module("rules", [path])
except ImportError:
return
try:
module = imp.load_module("rules", f, path, desc)
except Exception:
return
finally:
f.close()

self._process_hook_module = module
try:
self._process_hook = module.process
except AttributeError:
e = "RC event rules imported correctly, but no process(bot, rc) function was found"
self.logger.error(e)
return

def _process_rc_event(self, rc):
"""Process a recent change event from IRC (or, an RC object).

The actual processing is configurable, so we don't have that hard-coded
here. We simply call our process hook (self._process_hook), created by
self._prepare_process_hook() from information in the "rules" section of
our config.
"""
chans = self._process_hook(self.bot, rc)
with self.bot.component_lock:
frontend = self.bot.frontend
if chans and frontend and not frontend.is_stopped():
pretty = rc.prettify()
if len(pretty) > 400:
msg = pretty[:397] + "..."
else:
msg = pretty[:400]
for chan in chans:
frontend.say(chan, msg)

+ 81
- 0
earwigbot/lazy.py View File

@@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""
Implements a hierarchy of importing classes as defined in PEP 302 to load
modules in a safe yet lazy manner.
"""

from imp import acquire_lock, release_lock
import sys
from types import ModuleType

__all__ = ["LazyImporter"]

def _getattribute(self, attr):
_load(self)
return self.__getattribute__(attr)

def _setattr(self, attr, value):
_load(self)
self.__setattr__(attr, value)

def _load(self):
type(self).__getattribute__ = ModuleType.__getattribute__
type(self).__setattr__ = ModuleType.__setattr__
reload(self)


class _LazyModule(type):
def __new__(cls, name):
acquire_lock()
try:
if name not in sys.modules:
attributes = {
"__name__": name,
"__getattribute__": _getattribute,
"__setattr__": _setattr
}
parents = (ModuleType,)
klass = type.__new__(cls, "module", parents, attributes)
sys.modules[name] = klass(name)
return sys.modules[name]
finally:
release_lock()


class LazyImporter(object):
def __init__(self):
self._modules = {}
sys.meta_path.append(self)

def new(self, name):
module = _LazyModule(name)
self._modules[name] = module
return module

def find_module(self, fullname, path=None):
if fullname in self._modules and fullname not in sys.modules:
return self

def load_module(self, fullname):
return self._modules.pop(fullname)

+ 269
- 0
earwigbot/managers.py View File

@@ -0,0 +1,269 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import imp
from os import listdir, path
from re import sub
from threading import RLock, Thread
from time import gmtime, strftime

from earwigbot.commands import Command
from earwigbot.tasks import Task

__all__ = ["CommandManager", "TaskManager"]

class _ResourceManager(object):
"""
**EarwigBot: Resource Manager**

Resources are essentially objects dynamically loaded by the bot, both
packaged with it (built-in resources) and created by users (plugins, aka
custom resources). Currently, the only two types of resources are IRC
commands and bot tasks. These are both loaded from two locations: the
:py:mod:`earwigbot.commands` and :py:mod:`earwigbot.tasks packages`, and
the :file:`commands/` and :file:`tasks/` directories within the bot's
working directory.

This class handles the low-level tasks of (re)loading resources via
:py:meth:`load`, retrieving specific resources via :py:meth:`get`, and
iterating over all resources via :py:meth:`__iter__`.
"""
def __init__(self, bot, name, base):
self.bot = bot
self.logger = bot.logger.getChild(name)

self._resources = {}
self._resource_name = name # e.g. "commands" or "tasks"
self._resource_base = base # e.g. Command or Task
self._resource_access_lock = RLock()

def __repr__(self):
"""Return the canonical string representation of the manager."""
res = "{0}(bot={1!r}, name={2!r}, base={3!r})"
return res.format(self.__class__.__name__, self.bot,
self._resource_name, self._resource_base)

def __str__(self):
"""Return a nice string representation of the manager."""
return "<{0} of {1}>".format(self.__class__.__name__, self.bot)

def __iter__(self):
with self.lock:
for resource in self._resources.itervalues():
yield resource

def _load_resource(self, name, path, klass):
"""Instantiate a resource class and add it to the dictionary."""
res_type = self._resource_name[:-1] # e.g. "command" or "task"
if hasattr(klass, "name"):
res_config = getattr(self.bot.config, self._resource_name)
if getattr(klass, "name") in res_config.get("disable", []):
log = "Skipping disabled {0} {1}"
self.logger.debug(log.format(res_type, getattr(klass, "name")))
return
try:
resource = klass(self.bot) # Create instance of resource
except Exception:
e = "Error instantiating {0} class in '{1}' (from {2})"
self.logger.exception(e.format(res_type, name, path))
else:
self._resources[resource.name] = resource
self.logger.debug("Loaded {0} {1}".format(res_type, resource.name))

def _load_module(self, name, path):
"""Load a specific resource from a module, identified by name and path.

We'll first try to import it using imp magic, and if that works, make
instances of any classes inside that are subclasses of the base
(:py:attr:`self._resource_base <_resource_base>`), add them to the
resources dictionary with :py:meth:`self._load_resource()
<_load_resource>`, and finally log the addition. Any problems along
the way will either be ignored or logged.
"""
f, path, desc = imp.find_module(name, [path])
try:
module = imp.load_module(name, f, path, desc)
except Exception:
e = "Couldn't load module '{0}' (from {1})"
self.logger.exception(e.format(name, path))
return
finally:
f.close()

for obj in vars(module).values():
if type(obj) is type:
isresource = issubclass(obj, self._resource_base)
if isresource and not obj is self._resource_base:
self._load_resource(name, path, obj)

def _load_directory(self, dir):
"""Load all valid resources in a given directory."""
self.logger.debug("Loading directory {0}".format(dir))
res_config = getattr(self.bot.config, self._resource_name)
disabled = res_config.get("disable", [])
processed = []
for name in listdir(dir):
if not name.endswith(".py") and not name.endswith(".pyc"):
continue
if name.startswith("_") or name.startswith("."):
continue
modname = sub("\.pyc?$", "", name) # Remove extension
if modname in disabled:
log = "Skipping disabled module {0}".format(modname)
self.logger.debug(log)
continue
if modname not in processed:
self._load_module(modname, dir)
processed.append(modname)

@property
def lock(self):
"""The resource access/modify lock."""
return self._resource_access_lock

def load(self):
"""Load (or reload) all valid resources into :py:attr:`_resources`."""
name = self._resource_name # e.g. "commands" or "tasks"
with self.lock:
self._resources.clear()
builtin_dir = path.join(path.dirname(__file__), name)
plugins_dir = path.join(self.bot.config.root_dir, name)
if getattr(self.bot.config, name).get("disable") is True:
log = "Skipping disabled builtins directory: {0}"
self.logger.debug(log.format(builtin_dir))
else:
self._load_directory(builtin_dir) # Built-in resources
if path.exists(plugins_dir) and path.isdir(plugins_dir):
self._load_directory(plugins_dir) # Custom resources, plugins
else:
log = "Skipping nonexistent plugins directory: {0}"
self.logger.debug(log.format(plugins_dir))

if self._resources:
msg = "Loaded {0} {1}: {2}"
resources = ", ".join(self._resources.keys())
self.logger.info(msg.format(len(self._resources), name, resources))
else:
self.logger.info("Loaded 0 {0}".format(name))

def get(self, key):
"""Return the class instance associated with a certain resource.

Will raise :py:exc:`KeyError` if the resource (a command or task) is
not found.
"""
with self.lock:
return self._resources[key]


class CommandManager(_ResourceManager):
"""
Manages (i.e., loads, reloads, and calls) IRC commands.
"""
def __init__(self, bot):
super(CommandManager, self).__init__(bot, "commands", Command)

def _wrap_check(self, command, data):
"""Check whether a command should be called, catching errors."""
try:
return command.check(data)
except Exception:
e = "Error checking command '{0}' with data: {1}:"
self.logger.exception(e.format(command.name, data))

def _wrap_process(self, command, data):
"""process() the message, catching and reporting any errors."""
try:
command.process(data)
except Exception:
e = "Error executing command '{0}':"
self.logger.exception(e.format(command.name))

def call(self, hook, data):
"""Respond to a hook type and a :py:class:`Data` object."""
for command in self:
if hook in command.hooks and self._wrap_check(command, data):
thread = Thread(target=self._wrap_process,
args=(command, data))
start_time = strftime("%b %d %H:%M:%S")
thread.name = "irc:{0} ({1})".format(command.name, start_time)
thread.daemon = True
thread.start()
return


class TaskManager(_ResourceManager):
"""
Manages (i.e., loads, reloads, schedules, and runs) wiki bot tasks.
"""
def __init__(self, bot):
super(TaskManager, self).__init__(bot, "tasks", Task)

def _wrapper(self, task, **kwargs):
"""Wrapper for task classes: run the task and catch any errors."""
try:
task.run(**kwargs)
except Exception:
msg = "Task '{0}' raised an exception and had to stop:"
self.logger.exception(msg.format(task.name))
else:
msg = "Task '{0}' finished successfully"
self.logger.info(msg.format(task.name))

def start(self, task_name, **kwargs):
"""Start a given task in a new daemon thread, and return the thread.

kwargs are passed to :py:meth:`task.run() <earwigbot.tasks.Task.run>`.
If the task is not found, ``None`` will be returned and an error will
be logged.
"""
msg = "Starting task '{0}' in a new thread"
self.logger.info(msg.format(task_name))

try:
task = self.get(task_name)
except KeyError:
e = "Couldn't find task '{0}'"
self.logger.error(e.format(task_name))
return

task_thread = Thread(target=self._wrapper, args=(task,), kwargs=kwargs)
start_time = strftime("%b %d %H:%M:%S")
task_thread.name = "{0} ({1})".format(task_name, start_time)
task_thread.daemon = True
task_thread.start()
return task_thread

def schedule(self, now=None):
"""Start all tasks that are supposed to be run at a given time."""
if not now:
now = gmtime()
# Get list of tasks to run this turn:
tasks = self.bot.config.schedule(now.tm_min, now.tm_hour, now.tm_mday,
now.tm_mon, now.tm_wday)

for task in tasks:
if isinstance(task, list): # They've specified kwargs,
self.start(task[0], **task[1]) # so pass those to start
else: # Otherwise, just pass task_name
self.start(task)

+ 143
- 0
earwigbot/tasks/__init__.py View File

@@ -0,0 +1,143 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot import exceptions
from earwigbot import wiki

__all__ = ["Task"]

class Task(object):
"""
**EarwigBot: Base Bot Task**

This package provides built-in wiki bot "tasks" EarwigBot runs. Additional
tasks can be installed as plugins in the bot's working directory.

This class (import with ``from earwigbot.tasks import Task``) can be
subclassed to create custom bot tasks.

To run a task, use :py:meth:`bot.tasks.start(name, **kwargs)
<earwigbot.managers.TaskManager.start>`. ``**kwargs`` get passed to the
Task's :meth:`run` method.
"""
name = None
number = 0

def __init__(self, bot):
"""Constructor for new tasks.

This is called once immediately after the task class is loaded by
the task manager (in :py:meth:`tasks.load()
<earwigbot.managers._ResourceManager.load>`). Don't override this
directly; if you do, remember to place ``super(Task, self).__init()``
first. Use :py:meth:`setup` for typical task-init/setup needs.
"""
self.bot = bot
self.config = bot.config
self.logger = bot.tasks.logger.getChild(self.name)
self.setup()

def __repr__(self):
"""Return the canonical string representation of the Task."""
res = "Task(name={0!r}, number={1!r}, bot={2!r})"
return res.format(self.name, self.number, self.bot)

def __str__(self):
"""Return a nice string representation of the Task."""
res = "<Task {0} ({1}) of {2}>"
return res.format(self.name, self.number, self.bot)

def setup(self):
"""Hook called immediately after the task is loaded.

Does nothing by default; feel free to override.
"""
pass

def run(self, **kwargs):
"""Main entry point to run a given task.

This is called directly by :py:meth:`tasks.start()
<earwigbot.managers.TaskManager.start>` and is the main way to make a
task do stuff. *kwargs* will be any keyword arguments passed to
:py:meth:`~earwigbot.managers.TaskManager.start`, which are entirely
optional.
"""
pass

def make_summary(self, comment):
"""Make an edit summary by filling in variables in a config value.

:py:attr:`config.wiki["summary"] <earwigbot.config.BotConfig.wiki>` is
used, where ``$2`` is replaced by the main summary body, given by the
*comment* argument, and ``$1`` is replaced by the task number.

If the config value is not found, we'll just return *comment* as-is.
"""
try:
summary = self.bot.config.wiki["summary"]
except KeyError:
return comment
return summary.replace("$1", str(self.number)).replace("$2", comment)

def shutoff_enabled(self, site=None):
"""Return whether on-wiki shutoff is enabled for this task.

We check a certain page for certain content. This is determined by
our config file: :py:attr:`config.wiki["shutoff"]["page"]
<earwigbot.config.BotConfig.wiki>` is used as the title, with any
embedded ``$1`` replaced by our username and ``$2`` replaced by the
task number; and :py:attr:`config.wiki["shutoff"]["disabled"]
<earwigbot.config.BotConfig.wiki>` is used as the content.

If the page has that exact content or the page does not exist, then
shutoff is "disabled", meaning the bot is supposed to run normally, and
we return ``False``. If the page's content is something other than
what we expect, shutoff is enabled, and we return ``True``.

If a site is not provided, we'll try to use :py:attr:`self.site <site>`
if it's set. Otherwise, we'll use our default site.
"""
if not site:
if hasattr(self, "site"):
site = getattr(self, "site")
else:
site = self.bot.wiki.get_site()

try:
cfg = self.config.wiki["shutoff"]
except KeyError:
return False
title = cfg.get("page", "User:$1/Shutoff/Task $2")
username = site.get_user().name
title = title.replace("$1", username).replace("$2", str(self.number))
page = site.get_page(title)

try:
content = page.get()
except exceptions.PageNotFoundError:
return False
if content == cfg.get("disabled", "run"):
return False

self.logger.warn("Emergency task shutoff has been enabled!")
return True

+ 329
- 0
earwigbot/tasks/wikiproject_tagger.py View File

@@ -0,0 +1,329 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re

from earwigbot import exceptions
from earwigbot.tasks import Task
from earwigbot.wiki import constants

class WikiProjectTagger(Task):
"""A task to tag talk pages with WikiProject banners.

Usage: :command:`earwigbot -t wikiproject_tagger PATH
--banner BANNER (--category CAT | --file FILE) [--summary SUM]
[--append TEXT] [--autoassess] [--nocreate] [--recursive NUM]
[--site SITE]`

.. glossary::

``--banner BANNER``
the page name of the banner to add, without a namespace (unless the
namespace is something other than ``Template``) so
``--banner WikiProject Biography`` for ``{{WikiProject Biography}}``
``--category CAT`` or ``--file FILE``
determines which pages to tag; either all pages in a category (to
include subcategories as well, see ``--recursive``) or all
pages/categories in a file (utf-8 encoded and path relative to the
current directory)
``--summary SUM``
an optional edit summary to use; defaults to
``"Adding WikiProject banner {{BANNER}}."``
``--append TEXT``
optional text to append to the banner (after an autoassessment, if
any), like ``|importance=low``
``--autoassess``
try to assess each article's class automatically based on the class of
other banners on the same page
``--nocreate``
don't create new talk pages with just a banner if the page doesn't
already exist
``--recursive NUM``
recursively go through subcategories up to a maximum depth of ``NUM``,
or if ``NUM`` isn't provided, go infinitely (this can be dangerous)
``--site SITE``
the ID of the site to tag pages on, defaulting to the... default site

"""
name = "wikiproject_tagger"

# Regexes for template names that should always go above the banner, based
# on [[Wikipedia:Talk page layout]]:
TOP_TEMPS = [
r"skip ?to ?(toc|talk|toctalk)$",

r"ga ?nominee$",

r"(user ?)?talk ?(header|page|page ?header)$",

r"community ?article ?probation$",
r"censor(-nudity)?$",
r"blp(o| ?others?)?$",
r"controvers(ial2?|y)$",

r"(not ?(a ?)?)?forum$",
r"tv(episode|series)talk$",
r"recurring ?themes$",
r"faq$",
r"(round ?in ?)?circ(les|ular)$",

r"ar(ti|it)cle ?(history|milestones)$",
r"failed ?ga$",
r"old ?prod( ?full)?$",
r"(old|previous) ?afd$",

r"((wikiproject|wp) ?)?bio(graph(y|ies))?$",
]

def _upperfirst(self, text):
"""Try to uppercase the first letter of a string."""
try:
return text[0].upper() + text[1:]
except IndexError:
return text

def run(self, **kwargs):
"""Main entry point for the bot task."""
if "file" not in kwargs and "category" not in kwargs:
log = "No pages to tag; I need either a 'category' or a 'file' passed as kwargs"
self.logger.error(log)
return
if "banner" not in kwargs:
log = "Needs a banner to add passed as the 'banner' kwarg"
self.logger.error(log)
return

site = self.bot.wiki.get_site(name=kwargs.get("site"))
banner = kwargs["banner"]
summary = kwargs.get("summary", "Adding WikiProject banner $3.")
append = kwargs.get("append")
autoassess = kwargs.get("autoassess", False)
nocreate = kwargs.get("nocreate", False)
recursive = kwargs.get("recursive", 0)
banner, names = self.get_names(site, banner)
if not names:
return
job = _Job(banner, names, summary, append, autoassess, nocreate)

try:
self.run_job(kwargs, site, job, recursive)
except _ShutoffEnabled:
return

def run_job(self, kwargs, site, job, recursive):
"""Run a tagging *job* on a given *site*."""
if "category" in kwargs:
title = kwargs["category"]
title = self.guess_namespace(site, title, constants.NS_CATEGORY)
self.process_category(site.get_page(title), job, recursive)

if "file" in kwargs:
with open(kwargs["file"], "r") as fileobj:
for line in fileobj:
if line.strip():
line = line.decode("utf8")
if line.startswith("[[") and line.endswith("]]"):
line = line[2:-2]
page = site.get_page(line)
if page.namespace == constants.NS_CATEGORY:
self.process_category(page, job, recursive)
else:
self.process_page(page, job)

def guess_namespace(self, site, title, assumed):
"""If the given *title* does not have an explicit namespace, guess it.

For example, when transcluding templates, the namespace is guessed to
be ``NS_TEMPLATE`` unless one is explicitly declared (so ``{{foo}}`` ->
``[[Template:Foo]]``, but ``{{:foo}}`` -> ``[[Foo]]``).
"""
prefix = title.split(":", 1)[0]
if prefix == title:
return u":".join((site.namespace_id_to_name(assumed), title))
try:
site.namespace_name_to_id(prefix)
except exceptions.NamespaceNotFoundError:
return u":".join((site.namespace_id_to_name(assumed), title))
return title

def get_names(self, site, banner):
"""Return all possible aliases for a given *banner* template."""
title = self.guess_namespace(site, banner, constants.NS_TEMPLATE)
if title == banner:
banner = banner.split(":", 1)[1]
page = site.get_page(title)
if page.exists != page.PAGE_EXISTS:
self.logger.error(u"Banner [[{0}]] does not exist".format(title))
return banner, None

if banner == title:
names = [self._upperfirst(banner)]
else:
names = [self._upperfirst(banner), self._upperfirst(title)]
result = site.api_query(action="query", list="backlinks", bllimit=500,
blfilterredir="redirects", bltitle=title)
for backlink in result["query"]["backlinks"]:
names.append(backlink["title"])
if backlink["ns"] == constants.NS_TEMPLATE:
names.append(backlink["title"].split(":", 1)[1])

log = u"Found {0} aliases for banner [[{1}]]".format(len(names), title)
self.logger.debug(log)
return banner, names

def process_category(self, page, job, recursive):
"""Try to tag all pages in the given category."""
self.logger.info(u"Processing category: [[{0]]".format(page.title))
for member in page.get_members():
if member.namespace == constants.NS_CATEGORY:
if recursive is True:
self.process_category(member, job, True)
elif recursive:
self.process_category(member, job, recursive - 1)
else:
self.process_page(member, job)

def process_page(self, page, job):
"""Try to tag a specific *page* using the *job* description."""
if job.counter % 10 == 0: # Do a shutoff check every ten pages
if self.shutoff_enabled(page.site):
raise _ShutoffEnabled()
job.counter += 1

if not page.is_talkpage:
page = page.toggle_talk()
try:
code = page.parse()
except exceptions.PageNotFoundError:
if job.nocreate:
log = u"Skipping nonexistent page: [[{0}]]".format(page.title)
self.logger.info(log)
else:
log = u"Tagging new page: [[{0}]]".format(page.title)
self.logger.info(log)
banner = "{{" + job.banner + job.append + "}}"
summary = job.summary.replace("$3", banner)
page.edit(banner, self.make_summary(summary))
return
except exceptions.InvalidPageError:
log = u"Skipping invalid page: [[{0}]]".format(page.title)
self.logger.error(log)
return

for template in code.ifilter_templates(recursive=True):
name = self._upperfirst(template.name.strip())
if name in job.names:
log = u"Skipping page: [[{0}]]; already tagged with '{1}'"
self.logger.info(log.format(page.title, name))
return

banner = self.make_banner(job, code)
shell = self.get_banner_shell(code)
if shell:
if shell.has_param(1):
shell.get(1).value.insert(0, banner + "\n")
else:
shell.add(1, banner)
else:
self.add_banner(code, banner)
self.apply_genfixes(code)

self.logger.info(u"Tagging page: [[{0}]]".format(page.title))
summary = job.summary.replace("$3", banner)
page.edit(unicode(code), self.make_summary(summary))

def make_banner(self, job, code):
"""Return banner text to add based on a *job* and a page's *code*."""
banner = "{{" + job.banner
if job.autoassess:
classes = {"fa": 0, "fl": 0, "ga": 0, "a": 0, "b": 0, "start": 0,
"stub": 0, "list": 0, "dab": 0, "c": 0, "redirect": 0,
"book": 0, "template": 0, "category": 0}
for template in code.ifilter_templates(recursive=True):
if template.has_param("class"):
value = unicode(template.get("class").value).lower()
if value in classes:
classes[value] += 1
values = tuple(classes.values())
best = max(values)
confidence = float(best) / sum(values)
if confidence > 0.75:
rank = tuple(classes.keys())[values.index(best)]
if rank in ("fa", "fl", "ga"):
banner += "|class=" + rank.upper()
else:
banner += "|class=" + self._upperfirst(rank)
return banner + job.append + "}}"

def get_banner_shell(self, code):
"""Return the banner shell template within *code*, else ``None``."""
regex = r"^\{\{\s*((WikiProject|WP)[ _]?Banner[ _]?S(hell)?|W(BPS|PBS|PB)|Shell)"
shells = code.filter_templates(matches=regex)
if not shells:
shells = code.filter_templates(matches=regex, recursive=True)
if shells:
log = u"Inserting banner into shell: {0}"
self.logger.debug(log.format(shells[0].name))
return shells[0]

def add_banner(self, code, banner):
"""Add *banner* to *code*, following template order conventions."""
index = 0
for i, template in enumerate(code.ifilter_templates()):
name = template.name.lower().replace("_", " ")
for regex in self.TOP_TEMPS:
if re.match(regex, name):
self.logger.info("Skipping top template: {0}".format(name))
index = i + 1

self.logger.debug(u"Inserting banner at index {0}".format(index))
code.insert(index, banner)

def apply_genfixes(self, code):
"""Apply general fixes to *code*, such as template substitution."""
regex = r"^\{\{\s*((un|no)?s(i((gn|ng)(ed3?)?|g))?|usu|tilde|forgot to sign|without signature)"
for template in code.ifilter_templates(matches=regex):
self.logger.debug("Applying genfix: substitute {{unsigned}}")
template.name = "subst:unsigned"


class _Job(object):
"""Represents a single wikiproject-tagging task.

Stores information on the banner to add, the edit summary to use, whether
or not to autoassess and create new pages from scratch, and a counter of
the number of pages edited.
"""
def __init__(self, banner, names, summary, append, autoassess, nocreate):
self.banner = banner
self.names = names
self.summary = summary
self.append = append
self.autoassess = autoassess
self.nocreate = nocreate
self.counter = 0


class _ShutoffEnabled(Exception):
"""Raised by process_page() if shutoff is enabled. Caught by run(), which
will then stop the task."""
pass

+ 157
- 0
earwigbot/util.py View File

@@ -0,0 +1,157 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""
usage: :command:`earwigbot [-h] [-v] [-d | -q] [-t NAME] [PATH] ...`

This is EarwigBot's command-line utility, enabling you to easily start the bot
or run specific tasks.

.. glossary::

``PATH``
path to the bot's working directory, which will be created if it doesn't
exist; current directory assumed if not specified
``-h``, ``--help``
show this help message and exit
``-v``, ``--version``
show program's version number and exit
``-d``, ``--debug``
print all logs, including ``DEBUG``-level messages
``-q``, ``--quiet``
don't print any logs except warnings and errors
``-t NAME``, ``--task NAME``
given the name of a task, the bot will run it instead of the main bot and
then exit
``TASK_ARGS``
with --task, will pass any remaining arguments to the task's
:py:meth:`.Task.run` method

"""

from argparse import Action, ArgumentParser, REMAINDER
import logging
from os import path
from time import sleep

from earwigbot import __version__
from earwigbot.bot import Bot

__all__ = ["main"]

class _StoreTaskArg(Action):
"""A custom argparse action to read remaining command-line arguments."""
def __call__(self, parser, namespace, values, option_string=None):
kwargs = {}
name = None
for value in values:
if value.startswith("-") and "=" in value:
key, value = value.split("=", 1)
self.insert(kwargs, key.lstrip("-"), value)
elif name:
if value.startswith("-"):
if name not in kwargs:
kwargs[name] = True
name = value.lstrip("-")
else:
self.insert(kwargs, name, value)
name = None
else:
if value.startswith("-"):
name = value.lstrip("-")
if name and name not in kwargs:
kwargs[name] = True
namespace.task_args = kwargs

def insert(self, kwargs, key, value):
"""Add a key/value pair to kwargs; support multiple values per key."""
if key in kwargs:
try:
kwargs[key].append(value)
except AttributeError:
kwargs[key] = [kwargs[key], value]
else:
kwargs[key] = value


def main():
"""Main entry point for the command-line utility."""
version = "EarwigBot v{0}".format(__version__)
desc = """This is EarwigBot's command-line utility, enabling you to easily
start the bot or run specific tasks."""
parser = ArgumentParser(description=desc)
parser.add_argument("path", nargs="?", metavar="PATH", default=path.curdir,
help="""path to the bot's working directory, which will
be created if it doesn't exist; current
directory assumed if not specified""")
parser.add_argument("-v", "--version", action="version", version=version)
logger = parser.add_mutually_exclusive_group()
logger.add_argument("-d", "--debug", action="store_true",
help="print all logs, including DEBUG-level messages")
logger.add_argument("-q", "--quiet", action="store_true",
help="don't print any logs except warnings and errors")
parser.add_argument("-t", "--task", metavar="NAME",
help="""given the name of a task, the bot will run it
instead of the main bot and then exit""")
parser.add_argument("task_args", nargs=REMAINDER, action=_StoreTaskArg,
metavar="TASK_ARGS",
help="""with --task, will pass these arguments to the
task's run() method""")
args = parser.parse_args()

if not args.task and args.task_args:
unrecognized = " ".join(args.task_args)
parser.error("unrecognized arguments: {0}".format(unrecognized))

level = logging.INFO
if args.debug:
level = logging.DEBUG
elif args.quiet:
level = logging.WARNING
print version
print

bot = Bot(path.abspath(args.path), level=level)
if args.task:
thread = bot.tasks.start(args.task, **args.task_args)
if not thread:
return
try:
while thread.is_alive(): # Keep it alive; it's a daemon
sleep(1)
except KeyboardInterrupt:
pass
finally:
if thread.is_alive():
bot.tasks.logger.warn("The task is will be killed")
else:
try:
bot.run()
except KeyboardInterrupt:
pass
finally:
if bot.is_running:
bot.stop()

if __name__ == "__main__":
main()

+ 51
- 0
earwigbot/wiki/__init__.py View File

@@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""
**EarwigBot: Wiki Toolset**

This is a collection of classes and functions to read from and write to
Wikipedia and other wiki sites. No connection whatsoever to `python-wikitools
<http://code.google.com/p/python-wikitools/>`_ written by `Mr.Z-man
<http://en.wikipedia.org/wiki/User:Mr.Z-man>`_, other than a similar purpose.
We share no code.

Import the toolset directly with ``from earwigbot import wiki``. If using the
built-in integration with the rest of the bot, :py:class:`~earwigbot.bot.Bot`
objects contain a :py:attr:`~earwigbot.bot.Bot.wiki` attribute, which is a
:py:class:`~earwigbot.wiki.sitesdb.SitesDB` object tied to the :file:`sites.db`
file located in the same directory as :file:`config.yml`. That object has the
principal methods :py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site`,
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site`, and
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.remove_site` that should handle all
of your :py:class:`~earwigbot.wiki.site.Site` (and thus,
:py:class:`~earwigbot.wiki.page.Page`,
:py:class:`~earwigbot.wiki.category.Category`, and
:py:class:`~earwigbot.wiki.user.User`) needs.
"""

from earwigbot.wiki.category import *
from earwigbot.wiki.constants import *
from earwigbot.wiki.page import *
from earwigbot.wiki.site import *
from earwigbot.wiki.sitesdb import *
from earwigbot.wiki.user import *

+ 205
- 0
earwigbot/wiki/category.py View File

@@ -0,0 +1,205 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot.wiki.page import Page

__all__ = ["Category"]

class Category(Page):
"""
**EarwigBot: Wiki Toolset: Category**

Represents a category on a given :py:class:`~earwigbot.wiki.site.Site`, a
subclass of :py:class:`~earwigbot.wiki.page.Page`. Provides additional
methods, but :py:class:`~earwigbot.wiki.page.Page`'s own methods should
work fine on :py:class:`Category` objects. :py:meth:`site.get_page()
<earwigbot.wiki.site.Site.get_page>` will return a :py:class:`Category`
instead of a :py:class:`~earwigbot.wiki.page.Page` if the given title is in
the category namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category`
is shorthand, accepting category names without the namespace prefix.

*Attributes:*

- :py:attr:`size`: the total number of members in the category
- :py:attr:`pages`: the number of pages in the category
- :py:attr:`files`: the number of files in the category
- :py:attr:`subcats`: the number of subcategories in the category

*Public methods:*

- :py:meth:`get_members`: iterates over Pages in the category
"""

def __repr__(self):
"""Return the canonical string representation of the Category."""
res = "Category(title={0!r}, follow_redirects={1!r}, site={2!r})"
return res.format(self._title, self._follow_redirects, self._site)

def __str__(self):
"""Return a nice string representation of the Category."""
return '<Category "{0}" of {1}>'.format(self.title, str(self.site))

def _get_members_via_api(self, limit, follow):
"""Iterate over Pages in the category using the API."""
params = {"action": "query", "list": "categorymembers",
"cmtitle": self.title}

while 1:
params["cmlimit"] = limit if limit else "max"
result = self.site.api_query(**params)
for member in result["query"]["categorymembers"]:
title = member["title"]
yield self.site.get_page(title, follow_redirects=follow)

if "query-continue" in result:
qcontinue = result["query-continue"]["categorymembers"]
params["cmcontinue"] = qcontinue["cmcontinue"]
if limit:
limit -= len(result["query"]["categorymembers"])
else:
break

def _get_members_via_sql(self, limit, follow):
"""Iterate over Pages in the category using SQL."""
query = """SELECT page_title, page_namespace, page_id FROM page
JOIN categorylinks ON page_id = cl_from
WHERE cl_to = ?"""
title = self.title.replace(" ", "_").split(":", 1)[1]

if limit:
query += " LIMIT ?"
result = self.site.sql_query(query, (title, limit))
else:
result = self.site.sql_query(query, (title,))

members = list(result)
for row in members:
base = row[0].replace("_", " ").decode("utf8")
namespace = self.site.namespace_id_to_name(row[1])
if namespace:
title = u":".join((namespace, base))
else: # Avoid doing a silly (albeit valid) ":Pagename" thing
title = base
yield self.site.get_page(title, follow_redirects=follow,
pageid=row[2])

def _get_size_via_api(self, member_type):
"""Return the size of the category using the API."""
result = self.site.api_query(action="query", prop="categoryinfo",
titles=self.title)
info = result["query"]["pages"].values()[0]["categoryinfo"]
return info[member_type]

def _get_size_via_sql(self, member_type):
"""Return the size of the category using SQL."""
query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?"
title = self.title.replace(" ", "_").split(":", 1)[1]
if member_type == "size":
result = self.site.sql_query(query, (title,))
else:
query += " AND cl_type = ?"
result = self.site.sql_query(query, (title, member_type[:-1]))
return list(result)[0][0]

def _get_size(self, member_type):
"""Return the size of the category."""
services = {
self.site.SERVICE_API: self._get_size_via_api,
self.site.SERVICE_SQL: self._get_size_via_sql
}
return self.site.delegate(services, (member_type,))

@property
def size(self):
"""The total number of members in the category.

Includes pages, files, and subcats. Equal to :py:attr:`pages` +
:py:attr:`files` + :py:attr:`subcats`. This will use either the API or
SQL depending on which are enabled and the amount of lag on each. This
is handled by :py:meth:`site.delegate()
<earwigbot.wiki.site.Site.delegate>`.
"""
return self._get_size("size")

@property
def pages(self):
"""The number of pages in the category.

This will use either the API or SQL depending on which are enabled and
the amount of lag on each. This is handled by :py:meth:`site.delegate()
<earwigbot.wiki.site.Site.delegate>`.
"""
return self._get_size("pages")

@property
def files(self):
"""The number of files in the category.

This will use either the API or SQL depending on which are enabled and
the amount of lag on each. This is handled by :py:meth:`site.delegate()
<earwigbot.wiki.site.Site.delegate>`.
"""
return self._get_size("files")

@property
def subcats(self):
"""The number of subcategories in the category.

This will use either the API or SQL depending on which are enabled and
the amount of lag on each. This is handled by :py:meth:`site.delegate()
<earwigbot.wiki.site.Site.delegate>`.
"""
return self._get_size("subcats")

def get_members(self, limit=None, follow_redirects=None):
"""Iterate over Pages in the category.

If *limit* is given, we will provide this many pages, or less if the
category is smaller. By default, *limit* is ``None``, meaning we will
keep iterating over members until the category is exhausted.
*follow_redirects* is passed directly to :py:meth:`site.get_page()
<earwigbot.wiki.site.Site.get_page>`; it defaults to ``None``, which
will use the value passed to our :py:meth:`__init__`.

This will use either the API or SQL depending on which are enabled and
the amount of lag on each. This is handled by :py:meth:`site.delegate()
<earwigbot.wiki.site.Site.delegate>`.

.. note::
Be careful when iterating over very large categories with no limit.
If using the API, at best, you will make one query per 5000 pages,
which can add up significantly for categories with hundreds of
thousands of members. As for SQL, note that *all page titles are
stored internally* as soon as the query is made, so the site-wide
SQL lock can be freed and unrelated queries can be made without
requiring a separate connection to be opened. This is generally not
an issue unless your category's size approaches several hundred
thousand, in which case the sheer number of titles in memory becomes
problematic.
"""
services = {
self.site.SERVICE_API: self._get_members_via_api,
self.site.SERVICE_SQL: self._get_members_via_sql
}
if follow_redirects is None:
follow_redirects = self._follow_redirects
return self.site.delegate(services, (limit, follow_redirects))

+ 61
- 0
earwigbot/wiki/constants.py View File

@@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""
**EarwigBot: Wiki Toolset: Constants**

This module defines some useful constants:

- :py:const:`USER_AGENT`: our default User Agent when making API queries
- :py:const:`NS_*`: default namespace IDs for easy lookup

Import directly with ``from earwigbot.wiki import constants`` or
``from earwigbot.wiki.constants import *``. These are also available from
:py:mod:`earwigbot.wiki` directly (e.g. ``earwigbot.wiki.USER_AGENT``).
"""

# Default User Agent when making API queries:
from earwigbot import __version__ as _v
from platform import python_version as _p
USER_AGENT = "EarwigBot/{0} (Python/{1}; https://github.com/earwig/earwigbot)"
USER_AGENT = USER_AGENT.format(_v, _p())
del _v, _p

# Default namespace IDs:
NS_MAIN = 0
NS_TALK = 1
NS_USER = 2
NS_USER_TALK = 3
NS_PROJECT = 4
NS_PROJECT_TALK = 5
NS_FILE = 6
NS_FILE_TALK = 7
NS_MEDIAWIKI = 8
NS_MEDIAWIKI_TALK = 9
NS_TEMPLATE = 10
NS_TEMPLATE_TALK = 11
NS_HELP = 12
NS_HELP_TALK = 13
NS_CATEGORY = 14
NS_CATEGORY_TALK = 15
NS_SPECIAL = -1
NS_MEDIA = -2

+ 229
- 0
earwigbot/wiki/copyvios/__init__.py View File

@@ -0,0 +1,229 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from gzip import GzipFile
from socket import timeout
from StringIO import StringIO
from time import sleep, time
from urllib2 import build_opener, URLError

import oauth2 as oauth

from earwigbot import exceptions
from earwigbot.wiki.copyvios.markov import MarkovChain, MarkovChainIntersection
from earwigbot.wiki.copyvios.parsers import ArticleTextParser, HTMLTextParser
from earwigbot.wiki.copyvios.result import CopyvioCheckResult
from earwigbot.wiki.copyvios.search import YahooBOSSSearchEngine

__all__ = ["CopyvioMixIn"]

class CopyvioMixIn(object):
"""
**EarwigBot: Wiki Toolset: Copyright Violation MixIn**

This is a mixin that provides two public methods, :py:meth:`copyvio_check`
and :py:meth:`copyvio_compare`. The former checks the page for copyright
violations using a search engine API, and the latter compares the page
against a given URL. Credentials for the search engine API are stored in
the :py:class:`~earwigbot.wiki.site.Site`'s config.
"""

def __init__(self, site):
self._search_config = site._search_config
self._exclusions_db = self._search_config.get("exclusions_db")
self._opener = build_opener()
self._opener.addheaders = site._opener.addheaders

def _open_url_ignoring_errors(self, url):
"""Open a URL using self._opener and return its content, or None.

Will decompress the content if the headers contain "gzip" as its
content encoding, and will return None if URLError is raised while
opening the URL. IOErrors while gunzipping a compressed response are
ignored, and the original content is returned.
"""
try:
response = self._opener.open(url.encode("utf8"), timeout=5)
except (URLError, timeout):
return None
result = response.read()

if response.headers.get("Content-Encoding") == "gzip":
stream = StringIO(result)
gzipper = GzipFile(fileobj=stream)
try:
result = gzipper.read()
except IOError:
pass

return result

def _select_search_engine(self):
"""Return a function that can be called to do web searches.

The function takes one argument, a search query, and returns a list of
URLs, ranked by importance. The underlying logic depends on the
*engine* argument within our config; for example, if *engine* is
"Yahoo! BOSS", we'll use YahooBOSSSearchEngine for querying.

Raises UnknownSearchEngineError if the 'engine' listed in our config is
unknown to us, and UnsupportedSearchEngineError if we are missing a
required package or module, like oauth2 for "Yahoo! BOSS".
"""
engine = self._search_config["engine"]
credentials = self._search_config["credentials"]

if engine == "Yahoo! BOSS":
if not oauth:
e = "The package 'oauth2' could not be imported"
raise exceptions.UnsupportedSearchEngineError(e)
return YahooBOSSSearchEngine(credentials)

raise exceptions.UnknownSearchEngineError(engine)

def _copyvio_compare_content(self, article, url):
"""Return a number comparing an article and a URL.

The *article* is a Markov chain, whereas the *url* is just a string
that we'll try to open and read ourselves.
"""
html = self._open_url_ignoring_errors(url)
if not html:
return 0

source = MarkovChain(HTMLTextParser(html).strip())
delta = MarkovChainIntersection(article, source)
return float(delta.size()) / article.size(), (source, delta)

def copyvio_check(self, min_confidence=0.5, max_queries=-1,
interquery_sleep=1):
"""Check the page for copyright violations.

Returns a
:py:class:`~earwigbot.wiki.copyvios.result.CopyvioCheckResult` object
with information on the results of the check.

*max_queries* is self-explanatory; we will never make more than this
number of queries in a given check. If it's lower than 0, we will not
limit the number of queries.

*interquery_sleep* is the minimum amount of time we will sleep between
search engine queries, in seconds.

Raises :py:exc:`~earwigbot.exceptions.CopyvioCheckError` or subclasses
(:py:exc:`~earwigbot.exceptions.UnknownSearchEngineError`,
:py:exc:`~earwigbot.exceptions.SearchQueryError`, ...) on errors.
"""
searcher = self._select_search_engine()
if self._exclusions_db:
self._exclusions_db.sync(self.site.name)
handled_urls = []
best_confidence = 0
best_match = None
num_queries = 0
empty = MarkovChain("")
best_chains = (empty, MarkovChainIntersection(empty, empty))
parser = ArticleTextParser(self.get())
clean = parser.strip()
chunks = parser.chunk(self._search_config["nltk_dir"], max_queries)
article_chain = MarkovChain(clean)
last_query = time()

if article_chain.size() < 20: # Auto-fail very small articles
return CopyvioCheckResult(False, best_confidence, best_match,
num_queries, article_chain, best_chains)

while (chunks and best_confidence < min_confidence and
(max_queries < 0 or num_queries < max_queries)):
chunk = chunks.pop(0)
log = u"[[{0}]] -> querying {1} for {2!r}"
self._logger.debug(log.format(self.title, searcher.name, chunk))
urls = searcher.search(chunk)
urls = [url for url in urls if url not in handled_urls]
for url in urls:
handled_urls.append(url)
if self._exclusions_db:
if self._exclusions_db.check(self.site.name, url):
continue
conf, chains = self._copyvio_compare_content(article_chain, url)
if conf > best_confidence:
best_confidence = conf
best_match = url
best_chains = chains
num_queries += 1
diff = time() - last_query
if diff < interquery_sleep:
sleep(interquery_sleep - diff)
last_query = time()

if best_confidence >= min_confidence:
is_violation = True
log = u"Violation detected for [[{0}]] (confidence: {1}; URL: {2}; using {3} queries)"
self._logger.debug(log.format(self.title, best_confidence,
best_match, num_queries))
else:
is_violation = False
log = u"No violation for [[{0}]] (confidence: {1}; using {2} queries)"
self._logger.debug(log.format(self.title, best_confidence,
num_queries))

return CopyvioCheckResult(is_violation, best_confidence, best_match,
num_queries, article_chain, best_chains)

def copyvio_compare(self, url, min_confidence=0.5):
"""Check the page like :py:meth:`copyvio_check` against a specific URL.

This is essentially a reduced version of the above - a copyivo
comparison is made using Markov chains and the result is returned in a
:py:class:`~earwigbot.wiki.copyvios.result.CopyvioCheckResult` object -
but without using a search engine, since the suspected "violated" URL
is supplied from the start.

Its primary use is to generate a result when the URL is retrieved from
a cache, like the one used in EarwigBot's Toolserver site. After a
search is done, the resulting URL is stored in a cache for 24 hours so
future checks against that page will not require another set of
time-and-money-consuming search engine queries. However, the comparison
itself (which includes the article's and the source's content) cannot
be stored for data retention reasons, so a fresh comparison is made
using this function.

Since no searching is done, neither
:py:exc:`~earwigbot.exceptions.UnknownSearchEngineError` nor
:py:exc:`~earwigbot.exceptions.SearchQueryError` will be raised.
"""
content = self.get()
clean = ArticleTextParser(content).strip()
article_chain = MarkovChain(clean)
confidence, chains = self._copyvio_compare_content(article_chain, url)

if confidence >= min_confidence:
is_violation = True
log = u"Violation detected for [[{0}]] (confidence: {1}; URL: {2})"
self._logger.debug(log.format(self.title, confidence, url))
else:
is_violation = False
log = u"No violation for [[{0}]] (confidence: {1}; URL: {2})"
self._logger.debug(log.format(self.title, confidence, url))

return CopyvioCheckResult(is_violation, confidence, url, 0,
article_chain, chains)

+ 171
- 0
earwigbot/wiki/copyvios/exclusions.py View File

@@ -0,0 +1,171 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re
import sqlite3 as sqlite
from threading import Lock
from time import time
from urlparse import urlparse

from earwigbot import exceptions

__all__ = ["ExclusionsDB"]

default_sources = {
"enwiki": [
"Wikipedia:Mirrors and forks/Abc", "Wikipedia:Mirrors and forks/Def",
"Wikipedia:Mirrors and forks/Ghi", "Wikipedia:Mirrors and forks/Jkl",
"Wikipedia:Mirrors and forks/Mno", "Wikipedia:Mirrors and forks/Pqr",
"Wikipedia:Mirrors and forks/Stu", "Wikipedia:Mirrors and forks/Vwxyz",
"User:EarwigBot/Copyvios/Exclusions"
]
}

class ExclusionsDB(object):
"""
**EarwigBot: Wiki Toolset: Exclusions Database Manager**

Controls the :file:`exclusions.db` file, which stores URLs excluded from
copyright violation checks on account of being known mirrors, for example.
"""

def __init__(self, sitesdb, dbfile, logger):
self._sitesdb = sitesdb
self._dbfile = dbfile
self._logger = logger
self._db_access_lock = Lock()

def __repr__(self):
"""Return the canonical string representation of the ExclusionsDB."""
res = "ExclusionsDB(sitesdb={0!r}, dbfile={1!r}, logger={2!r})"
return res.format(self._sitesdb, self._dbfile, self._logger)

def __str__(self):
"""Return a nice string representation of the ExclusionsDB."""
return "<ExclusionsDB at {0}>".format(self._dbfile)

def _create(self):
"""Initialize the exclusions database with its necessary tables."""
script = """
CREATE TABLE sources (source_sitename, source_page);
CREATE TABLE updates (update_sitename, update_time);
CREATE TABLE exclusions (exclusion_sitename, exclusion_url);
"""
query = "INSERT INTO sources VALUES (?, ?);"
sources = []
for sitename, pages in default_sources.iteritems():
[sources.append((sitename, page)) for page in pages]

with sqlite.connect(self._dbfile) as conn:
conn.executescript(script)
conn.executemany(query, sources)

def _load_source(self, site, source):
"""Load from a specific source and return a set of URLs."""
urls = set()
try:
data = site.get_page(source).get()
except exceptions.PageNotFoundError:
return urls

regexes = [
"url\s*=\s*<nowiki>(?:https?:)?(?://)?(.*)</nowiki>",
"\*\s*Site:\s*\[?(?:https?:)?(?://)?(.*)\]?"
]
for regex in regexes:
[urls.add(url.lower()) for (url,) in re.findall(regex, data, re.I)]
return urls

def _update(self, sitename):
"""Update the database from listed sources in the index."""
query1 = "SELECT source_page FROM sources WHERE source_sitename = ?;"
query2 = "SELECT exclusion_url FROM exclusions WHERE exclusion_sitename = ?"
query3 = "DELETE FROM exclusions WHERE exclusion_sitename = ? AND exclusion_url = ?"
query4 = "INSERT INTO exclusions VALUES (?, ?);"
query5 = "SELECT 1 FROM updates WHERE update_sitename = ?;"
query6 = "UPDATE updates SET update_time = ? WHERE update_sitename = ?;"
query7 = "INSERT INTO updates VALUES (?, ?);"

site = self._sitesdb.get_site(sitename)
with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
urls = set()
for (source,) in conn.execute(query1, (sitename,)):
urls |= self._load_source(site, source)
for (url,) in conn.execute(query2, (sitename,)):
if url in urls:
urls.remove(url)
else:
conn.execute(query3, (sitename, url))
conn.executemany(query4, [(sitename, url) for url in urls])
if conn.execute(query5, (sitename,)).fetchone():
conn.execute(query6, (int(time()), sitename))
else:
conn.execute(query7, (sitename, int(time())))

def _get_last_update(self, sitename):
"""Return the UNIX timestamp of the last time the db was updated."""
query = "SELECT update_time FROM updates WHERE update_sitename = ?;"
with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
try:
result = conn.execute(query, (sitename,)).fetchone()
except sqlite.OperationalError:
self._create()
return 0
return result[0] if result else 0

def sync(self, sitename):
"""Update the database if it hasn't been updated in the past week.

This only updates the exclusions database for the *sitename* site.
"""
max_staleness = 60 * 60 * 24 * 7
time_since_update = int(time() - self._get_last_update(sitename))
if time_since_update > max_staleness:
log = u"Updating stale database: {0} (last updated {1} seconds ago)"
self._logger.info(log.format(sitename, time_since_update))
self._update(sitename)
else:
log = u"Database for {0} is still fresh (last updated {1} seconds ago)"
self._logger.debug(log.format(sitename, time_since_update))

def check(self, sitename, url):
"""Check whether a given URL is in the exclusions database.

Return ``True`` if the URL is in the database, or ``False`` otherwise.
"""
normalized = re.sub("https?://", "", url.lower())
query = "SELECT exclusion_url FROM exclusions WHERE exclusion_sitename = ?"
with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
for (excl,) in conn.execute(query, (sitename,)):
if excl.startswith("*."):
netloc = urlparse(url.lower()).netloc
matches = True if excl[2:] in netloc else False
else:
matches = True if normalized.startswith(excl) else False
if matches:
log = u"Exclusion detected in {0} for {1}"
self._logger.debug(log.format(sitename, url))
return True

log = u"No exclusions in {0} for {1}".format(sitename, url)
self._logger.debug(log)
return False

+ 87
- 0
earwigbot/wiki/copyvios/markov.py View File

@@ -0,0 +1,87 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from collections import defaultdict
from re import sub, UNICODE

__all__ = ["MarkovChain", "MarkovChainIntersection"]

class MarkovChain(object):
"""Implements a basic ngram Markov chain of words."""
START = -1
END = -2
degree = 3 # 2 for bigrams, 3 for trigrams, etc.

def __init__(self, text):
self.text = text
self.chain = defaultdict(lambda: defaultdict(lambda: 0))
words = sub("[^\w\s-]", "", text.lower(), flags=UNICODE).split()

padding = self.degree - 1
words = ([self.START] * padding) + words + ([self.END] * padding)
for i in range(len(words) - self.degree + 1):
last = i + self.degree - 1
self.chain[tuple(words[i:last])][words[last]] += 1

def __repr__(self):
"""Return the canonical string representation of the MarkovChain."""
return "MarkovChain(text={0!r})".format(self.text)

def __str__(self):
"""Return a nice string representation of the MarkovChain."""
return "<MarkovChain of size {0}>".format(self.size())

def size(self):
"""Return the size of the Markov chain: the total number of nodes."""
count = 0
for node in self.chain.itervalues():
for hits in node.itervalues():
count += hits
return count


class MarkovChainIntersection(MarkovChain):
"""Implements the intersection of two chains (i.e., their shared nodes)."""

def __init__(self, mc1, mc2):
self.chain = defaultdict(lambda: defaultdict(lambda: 0))
self.mc1, self.mc2 = mc1, mc2
c1 = mc1.chain
c2 = mc2.chain

for word, nodes1 in c1.iteritems():
if word in c2:
nodes2 = c2[word]
for node, count1 in nodes1.iteritems():
if node in nodes2:
count2 = nodes2[node]
self.chain[word][node] = min(count1, count2)

def __repr__(self):
"""Return the canonical string representation of the intersection."""
res = "MarkovChainIntersection(mc1={0!r}, mc2={1!r})"
return res.format(self.mc1, self.mc2)

def __str__(self):
"""Return a nice string representation of the intersection."""
res = "<MarkovChainIntersection of size {0} ({1} ^ {2})>"
return res.format(self.size(), self.mc1, self.mc2)

+ 138
- 0
earwigbot/wiki/copyvios/parsers.py View File

@@ -0,0 +1,138 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from os import path

import bs4
import mwparserfromhell
import nltk

__all__ = ["BaseTextParser", "ArticleTextParser", "HTMLTextParser"]

class BaseTextParser(object):
"""Base class for a parser that handles text."""

def __init__(self, text):
self.text = text

def __repr__(self):
"""Return the canonical string representation of the text parser."""
return "{0}(text={1!r})".format(self.__class__.__name__, self.text)

def __str__(self):
"""Return a nice string representation of the text parser."""
name = self.__class__.__name__
return "<{0} of text with size {1}>".format(name, len(self.text))


class ArticleTextParser(BaseTextParser):
"""A parser that can strip and chunk wikicode article text."""

def strip(self):
"""Clean the page's raw text by removing templates and formatting.

Return the page's text with all HTML and wikicode formatting removed,
including templates, tables, and references. It retains punctuation
(spacing, paragraphs, periods, commas, (semi)-colons, parentheses,
quotes), original capitalization, and so forth. HTML entities are
replaced by their unicode equivalents.

The actual stripping is handled by :py:mod:`mwparserfromhell`.
"""
wikicode = mwparserfromhell.parse(self.text)
clean = wikicode.strip_code(normalize=True, collapse=True)
self.clean = clean.replace("\n\n", "\n") # Collapse extra newlines
return self.clean

def chunk(self, nltk_dir, max_chunks, max_query=256):
"""Convert the clean article text into a list of web-searchable chunks.

No greater than *max_chunks* will be returned. Each chunk will only be
a sentence or two long at most (no more than *max_query*). The idea is
to return a sample of the article text rather than the whole, so we'll
pick and choose from parts of it, especially if the article is large
and *max_chunks* is low, so we don't end up just searching for just the
first paragraph.

This is implemented using :py:mod:`nltk` (http://nltk.org/). A base
directory (*nltk_dir*) is required to store nltk's punctuation
database. This is typically located in the bot's working directory.
"""
datafile = path.join(nltk_dir, "tokenizers", "punkt", "english.pickle")
try:
tokenizer = nltk.data.load("file:" + datafile)
except LookupError:
nltk.download("punkt", nltk_dir)
tokenizer = nltk.data.load("file:" + datafile)

sentences = []
for sentence in tokenizer.tokenize(self.clean):
if len(sentence) > max_query:
words = sentence.split()
while len(" ".join(words)) > max_query:
words.pop()
sentence = " ".join(words)
sentences.append(sentence)

if max_chunks >= len(sentences):
return sentences

chunks = []
while len(chunks) < max_chunks:
if len(chunks) % 5 == 0:
chunk = sentences.pop(0) # Pop from beginning
elif len(chunks) % 5 == 1:
chunk = sentences.pop() # Pop from end
elif len(chunks) % 5 == 2:
chunk = sentences.pop(len(sentences) / 2) # Pop from Q2
elif len(chunks) % 5 == 3:
chunk = sentences.pop(len(sentences) / 4) # Pop from Q1
else:
chunk = sentences.pop(3 * len(sentences) / 4) # Pop from Q3
chunks.append(chunk)

return chunks


class HTMLTextParser(BaseTextParser):
"""A parser that can extract the text from an HTML document."""
hidden_tags = [
"script", "style"
]

def strip(self):
"""Return the actual text contained within an HTML document.

Implemented using :py:mod:`BeautifulSoup <bs4>`
(http://www.crummy.com/software/BeautifulSoup/).
"""
try:
soup = bs4.BeautifulSoup(self.text, "lxml").body
except ValueError:
soup = bs4.BeautifulSoup(self.text).body

is_comment = lambda text: isinstance(text, bs4.element.Comment)
[comment.extract() for comment in soup.find_all(text=is_comment)]
for tag in self.hidden_tags:
[element.extract() for element in soup.find_all(tag)]

return "\n".join(soup.stripped_strings)

+ 60
- 0
earwigbot/wiki/copyvios/result.py View File

@@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

__all__ = ["CopyvioCheckResult"]

class CopyvioCheckResult(object):
"""
**EarwigBot: Wiki Toolset: Copyvio Check Result**

A class holding information about the results of a copyvio check.

*Attributes:*

- :py:attr:`violation`: ``True`` if this is a violation, else ``False``
- :py:attr:`confidence`: a float between 0 and 1 indicating accuracy
- :py:attr:`url`: the URL of the violated page
- :py:attr:`queries`: the number of queries used to reach a result
- :py:attr:`article_chain`: the MarkovChain of the article text
- :py:attr:`source_chain`: the MarkovChain of the violated page text
- :py:attr:`delta_chain`: the MarkovChainIntersection comparing the two
"""

def __init__(self, violation, confidence, url, queries, article, chains):
self.violation = violation
self.confidence = confidence
self.url = url
self.queries = queries
self.article_chain = article
self.source_chain = chains[0]
self.delta_chain = chains[1]

def __repr__(self):
"""Return the canonical string representation of the result."""
res = "CopyvioCheckResult(violation={0!r}, confidence={1!r}, url={2!r}, queries={3|r})"
return res.format(self.violation, self.confidence, self.url,
self.queries)

def __str__(self):
"""Return a nice string representation of the result."""
res = "<CopyvioCheckResult ({0} with {1} conf)>"
return res.format(self.violation, self.confidence)

+ 91
- 0
earwigbot/wiki/copyvios/search.py View File

@@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from json import loads
from urllib import quote_plus, urlencode

import oauth2 as oauth

from earwigbot.exceptions import SearchQueryError

__all__ = ["BaseSearchEngine", "YahooBOSSSearchEngine"]

class BaseSearchEngine(object):
"""Base class for a simple search engine interface."""
name = "Base"

def __init__(self, cred):
"""Store credentials *cred* for searching later on."""
self.cred = cred

def __repr__(self):
"""Return the canonical string representation of the search engine."""
return "{0}()".format(self.__class__.__name__)

def __str__(self):
"""Return a nice string representation of the search engine."""
return "<{0}>".format(self.__class__.__name__)

def search(self, query):
"""Use this engine to search for *query*.

Not implemented in this base class; overridden in subclasses.
"""
raise NotImplementedError()


class YahooBOSSSearchEngine(BaseSearchEngine):
"""A search engine interface with Yahoo! BOSS."""
name = "Yahoo! BOSS"

def search(self, query):
"""Do a Yahoo! BOSS web search for *query*.

Returns a list of URLs, no more than fifty, ranked by relevance (as
determined by Yahoo). Raises
:py:exc:`~earwigbot.exceptions.SearchQueryError` on errors.
"""
base_url = "http://yboss.yahooapis.com/ysearch/web"
query = quote_plus(query.join('"', '"'))
params = {"q": query, "type": "html,text", "format": "json"}
url = "{0}?{1}".format(base_url, urlencode(params))

consumer = oauth.Consumer(key=self.cred["key"],
secret=self.cred["secret"])
client = oauth.Client(consumer)
headers, body = client.request(url, "GET")

if headers["status"] != "200":
e = "Yahoo! BOSS Error: got response code '{0}':\n{1}'"
raise SearchQueryError(e.format(headers["status"], body))

try:
res = loads(body)
except ValueError:
e = "Yahoo! BOSS Error: JSON could not be decoded"
raise SearchQueryError(e)

try:
results = res["bossresponse"]["web"]["results"]
except KeyError:
return []
return [result["url"] for result in results]

+ 787
- 0
earwigbot/wiki/page.py View File

@@ -0,0 +1,787 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from hashlib import md5
from logging import getLogger, NullHandler
import re
from time import gmtime, strftime
from urllib import quote

import mwparserfromhell

from earwigbot import exceptions
from earwigbot.wiki.copyvios import CopyvioMixIn

__all__ = ["Page"]

class Page(CopyvioMixIn):
"""
**EarwigBot: Wiki Toolset: Page**

Represents a page on a given :py:class:`~earwigbot.wiki.site.Site`. Has
methods for getting information about the page, getting page content, and
so on. :py:class:`~earwigbot.wiki.category.Category` is a subclass of
:py:class:`Page` with additional methods.

*Attributes:*

- :py:attr:`site`: the page's corresponding Site object
- :py:attr:`title`: the page's title, or pagename
- :py:attr:`exists`: whether or not the page exists
- :py:attr:`pageid`: an integer ID representing the page
- :py:attr:`url`: the page's URL
- :py:attr:`namespace`: the page's namespace as an integer
- :py:attr:`protection`: the page's current protection status
- :py:attr:`is_talkpage`: ``True`` if this is a talkpage, else ``False``
- :py:attr:`is_redirect`: ``True`` if this is a redirect, else ``False``

*Public methods:*

- :py:meth:`reload`: forcibly reloads the page's attributes
- :py:meth:`toggle_talk`: returns a content page's talk page, or vice versa
- :py:meth:`get`: returns the page's content
- :py:meth:`get_redirect_target`: returns the page's destination if it is a
redirect
- :py:meth:`get_creator`: returns a User object representing the first
person to edit the page
- :py:meth:`parse`: parses the page content for templates, links, etc
- :py:meth:`edit`: replaces the page's content or creates a new page
- :py:meth:`add_section`: adds a new section at the bottom of the page
- :py:meth:`check_exclusion`: checks whether or not we are allowed to edit
the page, per ``{{bots}}``/``{{nobots}}``

- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_check`:
checks the page for copyright violations
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixIn.copyvio_compare`:
checks the page like :py:meth:`copyvio_check`, but against a specific URL
"""
PAGE_UNKNOWN = 0
PAGE_INVALID = 1
PAGE_MISSING = 2
PAGE_EXISTS = 3

def __init__(self, site, title, follow_redirects=False, pageid=None,
logger=None):
"""Constructor for new Page instances.

Takes four arguments: a Site object, the Page's title (or pagename),
whether or not to follow redirects (optional, defaults to False), and
a page ID to supplement the title (optional, defaults to None - i.e.,
we will have to query the API to get it).

As with User, site.get_page() is preferred.

__init__() will not do any API queries, but it will use basic namespace
logic to determine our namespace ID and if we are a talkpage.
"""
super(Page, self).__init__(site)
self._site = site
self._title = title.strip()
self._follow_redirects = self._keep_following = follow_redirects
self._pageid = pageid

# Set up our internal logger:
if logger:
self._logger = logger
else: # Just set up a null logger to eat up our messages:
self._logger = getLogger("earwigbot.wiki")
self._logger.addHandler(NullHandler())

# Attributes to be loaded through the API:
self._exists = self.PAGE_UNKNOWN
self._is_redirect = None
self._lastrevid = None
self._protection = None
self._fullurl = None
self._content = None
self._creator = None

# Attributes used for editing/deleting/protecting/etc:
self._token = None
self._basetimestamp = None
self._starttimestamp = None

# Try to determine the page's namespace using our site's namespace
# converter:
prefix = self._title.split(":", 1)[0]
if prefix != title: # ignore a page that's titled "Category" or "User"
try:
self._namespace = self.site.namespace_name_to_id(prefix)
except exceptions.NamespaceNotFoundError:
self._namespace = 0
else:
self._namespace = 0

# Is this a talkpage? Talkpages have odd IDs, while content pages have
# even IDs, excluding the "special" namespaces:
if self._namespace < 0:
self._is_talkpage = False
else:
self._is_talkpage = self._namespace % 2 == 1

def __repr__(self):
"""Return the canonical string representation of the Page."""
res = "Page(title={0!r}, follow_redirects={1!r}, site={2!r})"
return res.format(self._title, self._follow_redirects, self._site)

def __str__(self):
"""Return a nice string representation of the Page."""
return '<Page "{0}" of {1}>'.format(self.title, str(self.site))

def _assert_validity(self):
"""Used to ensure that our page's title is valid.

If this method is called when our page is not valid (and after
_load_attributes() has been called), InvalidPageError will be raised.

Note that validity != existence. If a page's title is invalid (e.g, it
contains "[") it will always be invalid, and cannot be edited.
"""
if self._exists == self.PAGE_INVALID:
e = u"Page '{0}' is invalid.".format(self._title)
raise exceptions.InvalidPageError(e)

def _assert_existence(self):
"""Used to ensure that our page exists.

If this method is called when our page doesn't exist (and after
_load_attributes() has been called), PageNotFoundError will be raised.
It will also call _assert_validity() beforehand.
"""
self._assert_validity()
if self._exists == self.PAGE_MISSING:
e = u"Page '{0}' does not exist.".format(self._title)
raise exceptions.PageNotFoundError(e)

def _load(self):
"""Call _load_attributes() and follows redirects if we're supposed to.

This method will only follow redirects if follow_redirects=True was
passed to __init__() (perhaps indirectly passed by site.get_page()).
It avoids the API's &redirects param in favor of manual following,
so we can act more realistically (we don't follow double redirects, and
circular redirects don't break us).

This will raise RedirectError if we have a problem following, but that
is a bug and should NOT happen.

If we're following a redirect, this will make a grand total of three
API queries. It's a lot, but each one is quite small.
"""
self._load_attributes()

if self._keep_following and self._is_redirect:
self._title = self.get_redirect_target()
self._keep_following = False # don't follow double redirects
self._content = None # reset the content we just loaded
self._load_attributes()

def _load_attributes(self, result=None):
"""Load various data from the API in a single query.

Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl,
._protection, ._namespace, ._is_talkpage, ._creator, ._lastrevid,
._token, and ._starttimestamp using the API. It will do a query of
its own unless *result* is provided, in which case we'll pretend
*result* is what the query returned.

Assuming the API is sound, this should not raise any exceptions.
"""
if not result:
query = self.site.api_query
result = query(action="query", rvprop="user", intoken="edit",
prop="info|revisions", rvlimit=1, rvdir="newer",
titles=self._title, inprop="protection|url")

res = result["query"]["pages"].values()[0]

self._title = res["title"] # Normalize our pagename/title
self._is_redirect = "redirect" in res

self._pageid = int(result["query"]["pages"].keys()[0])
if self._pageid < 0:
if "missing" in res:
# If it has a negative ID and it's missing; we can still get
# data like the namespace, protection, and URL:
self._exists = self.PAGE_MISSING
else:
# If it has a negative ID and it's invalid, then break here,
# because there's no other data for us to get:
self._exists = self.PAGE_INVALID
return
else:
self._exists = self.PAGE_EXISTS

self._fullurl = res["fullurl"]
self._protection = res["protection"]

try:
self._token = res["edittoken"]
except KeyError:
pass
else:
self._starttimestamp = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())

# We've determined the namespace and talkpage status in __init__()
# based on the title, but now we can be sure:
self._namespace = res["ns"]
self._is_talkpage = self._namespace % 2 == 1 # talkpages have odd IDs

# These last two fields will only be specified if the page exists:
self._lastrevid = res.get("lastrevid")
try:
self._creator = res['revisions'][0]['user']
except KeyError:
pass

def _load_content(self, result=None):
"""Load current page content from the API.

If *result* is provided, we'll pretend that is the result of an API
query and try to get content from that. Otherwise, we'll do an API
query on our own.

Don't call this directly, ever; use reload() followed by get() if you
want to force content reloading.
"""
if not result:
query = self.site.api_query
result = query(action="query", prop="revisions", rvlimit=1,
rvprop="content|timestamp", titles=self._title)

res = result["query"]["pages"].values()[0]
try:
self._content = res["revisions"][0]["*"]
self._basetimestamp = res["revisions"][0]["timestamp"]
except KeyError:
# This can only happen if the page was deleted since we last called
# self._load_attributes(). In that case, some of our attributes are
# outdated, so force another self._load_attributes():
self._load_attributes()
self._assert_existence()

def _edit(self, params=None, text=None, summary=None, minor=None, bot=None,
force=None, section=None, captcha_id=None, captcha_word=None,
tries=0):
"""Edit the page!

If *params* is given, we'll use it as our API query parameters.
Otherwise, we'll build params using the given kwargs via
_build_edit_params().

We'll then try to do the API query, and catch any errors the API raises
in _handle_edit_errors(). We'll then throw these back as subclasses of
EditError.
"""
# Try to get our edit token, and die if we can't:
if not self._token:
self._load_attributes()
if not self._token:
e = "You don't have permission to edit this page."
raise exceptions.PermissionsError(e)

# Weed out invalid pages before we get too far:
self._assert_validity()

# Build our API query string:
if not params:
params = self._build_edit_params(text, summary, minor, bot, force,
section, captcha_id, captcha_word)
else: # Make sure we have the right token:
params["token"] = self._token

# Try the API query, catching most errors with our handler:
try:
result = self.site.api_query(**params)
except exceptions.APIError as error:
if not hasattr(error, "code"):
raise # We can only handle errors with a code attribute
result = self._handle_edit_errors(error, params, tries)

# If everything was successful, reset invalidated attributes:
if result["edit"]["result"] == "Success":
self._content = None
self._basetimestamp = None
self._exists = self.PAGE_UNKNOWN
return

# If we're here, then the edit failed. If it's because of AssertEdit,
# handle that. Otherwise, die - something odd is going on:
try:
assertion = result["edit"]["assert"]
except KeyError:
raise exceptions.EditError(result["edit"])
self._handle_assert_edit(assertion, params, tries)

def _build_edit_params(self, text, summary, minor, bot, force, section,
captcha_id, captcha_word):
"""Given some keyword arguments, build an API edit query string."""
unitxt = text.encode("utf8") if isinstance(text, unicode) else text
hashed = md5(unitxt).hexdigest() # Checksum to ensure text is correct
params = {"action": "edit", "title": self._title, "text": text,
"token": self._token, "summary": summary, "md5": hashed}

if section:
params["section"] = section
if captcha_id and captcha_word:
params["captchaid"] = captcha_id
params["captchaword"] = captcha_word
if minor:
params["minor"] = "true"
else:
params["notminor"] = "true"
if bot:
params["bot"] = "true"

if not force:
params["starttimestamp"] = self._starttimestamp
if self._basetimestamp:
params["basetimestamp"] = self._basetimestamp
if self._exists == self.PAGE_MISSING:
# Page does not exist; don't edit if it already exists:
params["createonly"] = "true"
else:
params["recreate"] = "true"

return params

def _handle_edit_errors(self, error, params, tries):
"""If our edit fails due to some error, try to handle it.

We'll either raise an appropriate exception (for example, if the page
is protected), or we'll try to fix it (for example, if we can't edit
due to being logged out, we'll try to log in).
"""
if error.code in ["noedit", "cantcreate", "protectedtitle",
"noimageredirect"]:
raise exceptions.PermissionsError(error.info)

elif error.code in ["noedit-anon", "cantcreate-anon",
"noimageredirect-anon"]:
if not all(self.site._login_info):
# Insufficient login info:
raise exceptions.PermissionsError(error.info)
if tries == 0:
# We have login info; try to login:
self.site._login(self.site._login_info)
self._token = None # Need a new token; old one is invalid now
return self._edit(params=params, tries=1)
else:
# We already tried to log in and failed!
e = "Although we should be logged in, we are not. This may be a cookie problem or an odd bug."
raise exceptions.LoginError(e)

elif error.code in ["editconflict", "pagedeleted", "articleexists"]:
# These attributes are now invalidated:
self._content = None
self._basetimestamp = None
self._exists = self.PAGE_UNKNOWN
raise exceptions.EditConflictError(error.info)

elif error.code in ["emptypage", "emptynewsection"]:
raise exceptions.NoContentError(error.info)

elif error.code == "contenttoobig":
raise exceptions.ContentTooBigError(error.info)

elif error.code == "spamdetected":
raise exceptions.SpamDetectedError(error.info)

elif error.code == "filtered":
raise exceptions.FilteredError(error.info)

raise exceptions.EditError(": ".join((error.code, error.info)))

def _handle_assert_edit(self, assertion, params, tries):
"""If we can't edit due to a failed AssertEdit assertion, handle that.

If the assertion was 'user' and we have valid login information, try to
log in. Otherwise, raise PermissionsError with details.
"""
if assertion == "user":
if not all(self.site._login_info):
# Insufficient login info:
e = "AssertEdit: user assertion failed, and no login info was provided."
raise exceptions.PermissionsError(e)
if tries == 0:
# We have login info; try to login:
self.site._login(self.site._login_info)
self._token = None # Need a new token; old one is invalid now
return self._edit(params=params, tries=1)
else:
# We already tried to log in and failed!
e = "Although we should be logged in, we are not. This may be a cookie problem or an odd bug."
raise exceptions.LoginError(e)

elif assertion == "bot":
if not all(self.site._login_info):
# Insufficient login info:
e = "AssertEdit: bot assertion failed, and no login info was provided."
raise exceptions.PermissionsError(e)
if tries == 0:
# Try to log in if we got logged out:
self.site._login(self.site._login_info)
self._token = None # Need a new token; old one is invalid now
return self._edit(params=params, tries=1)
else:
# We already tried to log in, so we don't have a bot flag:
e = "AssertEdit: bot assertion failed: we don't have a bot flag!"
raise exceptions.PermissionsError(e)

# Unknown assertion, maybe "true", "false", or "exists":
e = "AssertEdit: assertion '{0}' failed.".format(assertion)
raise exceptions.PermissionsError(e)

@property
def site(self):
"""The page's corresponding Site object."""
return self._site

@property
def title(self):
"""The page's title, or "pagename".

This won't do any API queries on its own. Any other attributes or
methods that do API queries will reload the title, however, like
:py:attr:`exists` and :py:meth:`get`, potentially "normalizing" it or
following redirects if :py:attr:`self._follow_redirects` is ``True``.
"""
return self._title

@property
def exists(self):
"""Whether or not the page exists.

This will be a number; its value does not matter, but it will equal
one of :py:attr:`self.PAGE_INVALID <PAGE_INVALID>`,
:py:attr:`self.PAGE_MISSING <PAGE_MISSING>`, or
:py:attr:`self.PAGE_EXISTS <PAGE_EXISTS>`.

Makes an API query only if we haven't already made one.
"""
if self._exists == self.PAGE_UNKNOWN:
self._load()
return self._exists

@property
def pageid(self):
"""An integer ID representing the page.

Makes an API query only if we haven't already made one and the *pageid*
parameter to :py:meth:`__init__` was left as ``None``, which should be
true for all cases except when pages are returned by an SQL generator
(like :py:meth:`category.get_members()
<earwigbot.wiki.category.Category.get_members>`).

Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
invalid or the page does not exist, respectively.
"""
if self._pageid:
return self._pageid
if self._exists == self.PAGE_UNKNOWN:
self._load()
self._assert_existence() # Missing pages do not have IDs
return self._pageid

@property
def url(self):
"""The page's URL.

Like :py:meth:`title`, this won't do any API queries on its own. If the
API was never queried for this page, we will attempt to determine the
URL ourselves based on the title.
"""
if self._fullurl:
return self._fullurl
else:
encoded = self._title.encode("utf8").replace(" ", "_")
slug = quote(encoded, safe="/:").decode("utf8")
path = self.site._article_path.replace("$1", slug)
return u"".join((self.site.url, path))

@property
def namespace(self):
"""The page's namespace ID (an integer).

Like :py:meth:`title`, this won't do any API queries on its own. If the
API was never queried for this page, we will attempt to determine the
namespace ourselves based on the title.
"""
return self._namespace

@property
def protection(self):
"""The page's current protection status.

Makes an API query only if we haven't already made one.

Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` if the page
name is invalid. Won't raise an error if the page is missing because
those can still be create-protected.
"""
if self._exists == self.PAGE_UNKNOWN:
self._load()
self._assert_validity() # Invalid pages cannot be protected
return self._protection

@property
def is_talkpage(self):
"""``True`` if the page is a talkpage, otherwise ``False``.

Like :py:meth:`title`, this won't do any API queries on its own. If the
API was never queried for this page, we will attempt to determine
whether it is a talkpage ourselves based on its namespace.
"""
return self._is_talkpage

@property
def is_redirect(self):
"""``True`` if the page is a redirect, otherwise ``False``.

Makes an API query only if we haven't already made one.

We will return ``False`` even if the page does not exist or is invalid.
"""
if self._exists == self.PAGE_UNKNOWN:
self._load()
return self._is_redirect

def reload(self):
"""Forcibly reload the page's attributes.

Emphasis on *reload*: this is only necessary if there is reason to
believe they have changed.
"""
self._load()
if self._content is not None:
# Only reload content if it has already been loaded:
self._load_content()

def toggle_talk(self, follow_redirects=None):
"""Return a content page's talk page, or vice versa.

The title of the new page is determined by namespace logic, not API
queries. We won't make any API queries on our own.

If *follow_redirects* is anything other than ``None`` (the default), it
will be passed to the new :py:class:`~earwigbot.wiki.page.Page`
object's :py:meth:`__init__`. Otherwise, we'll use the value passed to
our own :py:meth:`__init__`.

Will raise :py:exc:`~earwigbot.exceptions.InvalidPageError` if we try
to get the talk page of a special page (in the ``Special:`` or
``Media:`` namespaces), but we won't raise an exception if our page is
otherwise missing or invalid.
"""
if self._namespace < 0:
ns = self.site.namespace_id_to_name(self._namespace)
e = u"Pages in the {0} namespace can't have talk pages.".format(ns)
raise exceptions.InvalidPageError(e)

if self._is_talkpage:
new_ns = self._namespace - 1
else:
new_ns = self._namespace + 1

try:
body = self._title.split(":", 1)[1]
except IndexError:
body = self._title

new_prefix = self.site.namespace_id_to_name(new_ns)

# If the new page is in namespace 0, don't do ":Title" (it's correct,
# but unnecessary), just do "Title":
if new_prefix:
new_title = u":".join((new_prefix, body))
else:
new_title = body

if follow_redirects is None:
follow_redirects = self._follow_redirects
return Page(self.site, new_title, follow_redirects)

def get(self):
"""Return page content, which is cached if you try to call get again.

Raises InvalidPageError or PageNotFoundError if the page name is
invalid or the page does not exist, respectively.
"""
if self._exists == self.PAGE_UNKNOWN:
# Kill two birds with one stone by doing an API query for both our
# attributes and our page content:
query = self.site.api_query
result = query(action="query", rvlimit=1, titles=self._title,
prop="info|revisions", inprop="protection|url",
intoken="edit", rvprop="content|timestamp")
self._load_attributes(result=result)
self._assert_existence()
self._load_content(result=result)

# Follow redirects if we're told to:
if self._keep_following and self._is_redirect:
self._title = self.get_redirect_target()
self._keep_following = False # Don't follow double redirects
self._exists = self.PAGE_UNKNOWN # Force another API query
self.get()

return self._content

# Make sure we're dealing with a real page here. This may be outdated
# if the page was deleted since we last called self._load_attributes(),
# but self._load_content() can handle that:
self._assert_existence()

if self._content is None:
self._load_content()

return self._content

def get_redirect_target(self):
"""If the page is a redirect, return its destination.

Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
invalid or the page does not exist, respectively. Raises
:py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a
redirect.
"""
re_redirect = "^\s*\#\s*redirect\s*\[\[(.*?)\]\]"
content = self.get()
try:
return re.findall(re_redirect, content, flags=re.I)[0]
except IndexError:
e = "The page does not appear to have a redirect target."
raise exceptions.RedirectError(e)

def get_creator(self):
"""Return the User object for the first person to edit the page.

Makes an API query only if we haven't already made one. Normally, we
can get the creator along with everything else (except content) in
:py:meth:`_load_attributes`. However, due to a limitation in the API
(can't get the editor of one revision and the content of another at
both ends of the history), if our other attributes were only loaded
through :py:meth:`get`, we'll have to do another API query.

Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
invalid or the page does not exist, respectively.
"""
if self._exists == self.PAGE_UNKNOWN:
self._load()
self._assert_existence()
if not self._creator:
self._load()
self._assert_existence()
return self.site.get_user(self._creator)

def parse(self):
"""Parse the page content for templates, links, etc.

Actual parsing is handled by :py:mod:`mwparserfromhell`. Raises
:py:exc:`~earwigbot.exceptions.InvalidPageError` or
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
invalid or the page does not exist, respectively.
"""
return mwparserfromhell.parse(self.get())

def edit(self, text, summary, minor=False, bot=True, force=False):
"""Replace the page's content or creates a new page.

*text* is the new page content, with *summary* as the edit summary.
If *minor* is ``True``, the edit will be marked as minor. If *bot* is
``True``, the edit will be marked as a bot edit, but only if we
actually have a bot flag.

Use *force* to push the new content even if there's an edit conflict or
the page was deleted/recreated between getting our edit token and
editing our page. Be careful with this!
"""
self._edit(text=text, summary=summary, minor=minor, bot=bot,
force=force)

def add_section(self, text, title, minor=False, bot=True, force=False):
"""Add a new section to the bottom of the page.

The arguments for this are the same as those for :py:meth:`edit`, but
instead of providing a summary, you provide a section title. Likewise,
raised exceptions are the same as :py:meth:`edit`'s.

This should create the page if it does not already exist, with just the
new section as content.
"""
self._edit(text=text, summary=title, minor=minor, bot=bot, force=force,
section="new")

def check_exclusion(self, username=None, optouts=None):
"""Check whether or not we are allowed to edit the page.

Return ``True`` if we *are* allowed to edit this page, and ``False`` if
we aren't.

*username* is used to determine whether we are part of a specific list
of allowed or disallowed bots (e.g. ``{{bots|allow=EarwigBot}}`` or
``{{bots|deny=FooBot,EarwigBot}}``). It's ``None`` by default, which
will swipe our username from :py:meth:`site.get_user()
<earwigbot.wiki.site.Site.get_user>`.\
:py:attr:`~earwigbot.wiki.user.User.name`.

*optouts* is a list of messages to consider this check as part of for
the purpose of opt-out; it defaults to ``None``, which ignores the
parameter completely. For example, if *optouts* is ``["nolicense"]``,
we'll return ``False`` on ``{{bots|optout=nolicense}}`` or
``{{bots|optout=all}}``, but `True` on
``{{bots|optout=orfud,norationale,replaceable}}``.
"""
def parse_param(template, param):
value = template.get(param).value
return [item.strip().lower() for item in value.split(",")]

if not username:
username = self.site.get_user().name

# Lowercase everything:
username = username.lower()
optouts = [optout.lower() for optout in optouts] if optouts else []

r_bots = "\{\{\s*(no)?bots\s*(\||\}\})"
filter = self.parse().ifilter_templates(recursive=True, matches=r_bots)
for template in filter:
if template.has_param("deny"):
denies = parse_param(template, "deny")
if "all" in denies or username in denies:
return False
if template.has_param("allow"):
allows = parse_param(template, "allow")
if "all" in allows or username in allows:
continue
if optouts and template.has_param("optout"):
tasks = parse_param(template, "optout")
matches = [optout in tasks for optout in optouts]
if "all" in tasks or any(matches):
return False
if template.name.strip().lower() == "nobots":
return False

return True

+ 849
- 0
earwigbot/wiki/site.py View File

@@ -0,0 +1,849 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from cookielib import CookieJar
from gzip import GzipFile
from json import loads
from logging import getLogger, NullHandler
from os.path import expanduser
from StringIO import StringIO
from threading import Lock
from time import sleep, time
from urllib import quote_plus, unquote_plus
from urllib2 import build_opener, HTTPCookieProcessor, URLError
from urlparse import urlparse

import oursql

from earwigbot import exceptions
from earwigbot.wiki import constants
from earwigbot.wiki.category import Category
from earwigbot.wiki.page import Page
from earwigbot.wiki.user import User

__all__ = ["Site"]

class Site(object):
"""
**EarwigBot: Wiki Toolset: Site**

Represents a site, with support for API queries and returning
:py:class:`~earwigbot.wiki.page.Page`,
:py:class:`~earwigbot.wiki.user.User`,
and :py:class:`~earwigbot.wiki.category.Category` objects. The constructor
takes a bunch of arguments and you probably won't need to call it directly,
rather :py:meth:`wiki.get_site() <earwigbot.wiki.sitesdb.SitesDB.get_site>`
for returning :py:class:`Site`
instances, :py:meth:`wiki.add_site()
<earwigbot.wiki.sitesdb.SitesDB.add_site>` for adding new ones to our
database, and :py:meth:`wiki.remove_site()
<earwigbot.wiki.sitesdb.SitesDB.remove_site>` for removing old ones from
our database, should suffice.

*Attributes:*

- :py:attr:`name`: the site's name (or "wikiid"), like ``"enwiki"``
- :py:attr:`project`: the site's project name, like ``"wikipedia"``
- :py:attr:`lang`: the site's language code, like ``"en"``
- :py:attr:`domain`: the site's web domain, like ``"en.wikipedia.org"``
- :py:attr:`url`: the site's URL, like ``"https://en.wikipedia.org"``

*Public methods:*

- :py:meth:`api_query`: does an API query with kwargs as params
- :py:meth:`sql_query`: does an SQL query and yields its results
- :py:meth:`get_maxlag`: returns the internal database lag
- :py:meth:`get_replag`: estimates the external database lag
- :py:meth:`namespace_id_to_name`: returns names associated with an NS id
- :py:meth:`namespace_name_to_id`: returns the ID associated with a NS name
- :py:meth:`get_page`: returns a Page for the given title
- :py:meth:`get_category`: returns a Category for the given title
- :py:meth:`get_user`: returns a User object for the given name
- :py:meth:`delegate`: controls when the API or SQL is used
"""
SERVICE_API = 1
SERVICE_SQL = 2

def __init__(self, name=None, project=None, lang=None, base_url=None,
article_path=None, script_path=None, sql=None,
namespaces=None, login=(None, None), cookiejar=None,
user_agent=None, use_https=False, assert_edit=None,
maxlag=None, wait_between_queries=2, logger=None,
search_config=None):
"""Constructor for new Site instances.

This probably isn't necessary to call yourself unless you're building a
Site that's not in your config and you don't want to add it - normally
all you need is wiki.get_site(name), which creates the Site for you
based on your config file and the sites database. We accept a bunch of
kwargs, but the only ones you really "need" are *base_url* and
*script_path*; this is enough to figure out an API url. *login*, a
tuple of (username, password), is highly recommended. *cookiejar* will
be used to store cookies, and we'll use a normal CookieJar if none is
given.

First, we'll store the given arguments as attributes, then set up our
URL opener. We'll load any of the attributes that weren't given from
the API, and then log in if a username/pass was given and we aren't
already logged in.
"""
# Attributes referring to site information, filled in by an API query
# if they are missing (and an API url can be determined):
self._name = name
self._project = project
self._lang = lang
self._base_url = base_url
self._article_path = article_path
self._script_path = script_path
self._namespaces = namespaces

# Attributes used for API queries:
self._use_https = use_https
self._assert_edit = assert_edit
self._maxlag = maxlag
self._wait_between_queries = wait_between_queries
self._max_retries = 6
self._last_query_time = 0
self._api_lock = Lock()
self._api_info_cache = {"maxlag": 0, "lastcheck": 0}

# Attributes used for SQL queries:
if sql:
self._sql_data = sql
else:
self._sql_data = {}
self._sql_conn = None
self._sql_lock = Lock()
self._sql_info_cache = {"replag": 0, "lastcheck": 0, "usable": None}

# Attribute used in copyright violation checks (see CopyrightMixIn):
if search_config:
self._search_config = search_config
else:
self._search_config = {}

# Set up cookiejar and URL opener for making API queries:
if cookiejar is not None:
self._cookiejar = cookiejar
else:
self._cookiejar = CookieJar()
if not user_agent:
user_agent = constants.USER_AGENT # Set default UA
self._opener = build_opener(HTTPCookieProcessor(self._cookiejar))
self._opener.addheaders = [("User-Agent", user_agent),
("Accept-Encoding", "gzip")]

# Set up our internal logger:
if logger:
self._logger = logger
else: # Just set up a null logger to eat up our messages:
self._logger = getLogger("earwigbot.wiki")
self._logger.addHandler(NullHandler())

# Get all of the above attributes that were not specified as arguments:
self._load_attributes()

# If we have a name/pass and the API says we're not logged in, log in:
self._login_info = name, password = login
if name and password:
logged_in_as = self._get_username_from_cookies()
if not logged_in_as or name.replace("_", " ") != logged_in_as:
self._login(login)

def __repr__(self):
"""Return the canonical string representation of the Site."""
res = ", ".join((
"Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}",
"base_url={_base_url!r}", "article_path={_article_path!r}",
"script_path={_script_path!r}", "use_https={_use_https!r}",
"assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}",
"sql={_sql_data!r}", "login={0}", "user_agent={2!r}",
"cookiejar={1})"))
name, password = self._login_info
login = "({0}, {1})".format(repr(name), "hidden" if password else None)
cookies = self._cookiejar.__class__.__name__
if hasattr(self._cookiejar, "filename"):
cookies += "({0!r})".format(getattr(self._cookiejar, "filename"))
else:
cookies += "()"
agent = self._opener.addheaders[0][1]
return res.format(login, cookies, agent, **self.__dict__)

def __str__(self):
"""Return a nice string representation of the Site."""
res = "<Site {0} ({1}:{2}) at {3}>"
return res.format(self.name, self.project, self.lang, self.domain)

def _unicodeify(self, value, encoding="utf8"):
"""Return input as unicode if it's not unicode to begin with."""
if isinstance(value, unicode):
return value
return unicode(value, encoding)

def _urlencode_utf8(self, params):
"""Implement urllib.urlencode() with support for unicode input."""
enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s)
args = []
for key, val in params.iteritems():
key = quote_plus(enc(key))
val = quote_plus(enc(val))
args.append(key + "=" + val)
return "&".join(args)

def _api_query(self, params, tries=0, wait=5, ignore_maxlag=False):
"""Do an API query with *params* as a dict of parameters.

See the documentation for :py:meth:`api_query` for full implementation
details.
"""
since_last_query = time() - self._last_query_time # Throttling support
if since_last_query < self._wait_between_queries:
wait_time = self._wait_between_queries - since_last_query
log = "Throttled: waiting {0} seconds".format(round(wait_time, 2))
self._logger.debug(log)
sleep(wait_time)
self._last_query_time = time()

url, data = self._build_api_query(params, ignore_maxlag)
if "lgpassword" in params:
self._logger.debug("{0} -> <hidden>".format(url))
else:
self._logger.debug("{0} -> {1}".format(url, data))

try:
response = self._opener.open(url, data)
except URLError as error:
if hasattr(error, "reason"):
e = "API query failed: {0}.".format(error.reason)
elif hasattr(error, "code"):
e = "API query failed: got an error code of {0}."
e = e.format(error.code)
else:
e = "API query failed."
raise exceptions.APIError(e)

result = response.read()
if response.headers.get("Content-Encoding") == "gzip":
stream = StringIO(result)
gzipper = GzipFile(fileobj=stream)
result = gzipper.read()

return self._handle_api_query_result(result, params, tries, wait)

def _build_api_query(self, params, ignore_maxlag):
"""Given API query params, return the URL to query and POST data."""
if not self._base_url or self._script_path is None:
e = "Tried to do an API query, but no API URL is known."
raise exceptions.APIError(e)

url = ''.join((self.url, self._script_path, "/api.php"))
params["format"] = "json" # This is the only format we understand
if self._assert_edit: # If requested, ensure that we're logged in
params["assert"] = self._assert_edit
if self._maxlag and not ignore_maxlag:
# If requested, don't overload the servers:
params["maxlag"] = self._maxlag

data = self._urlencode_utf8(params)
return url, data

def _handle_api_query_result(self, result, params, tries, wait):
"""Given the result of an API query, attempt to return useful data."""
try:
res = loads(result) # Try to parse as a JSON object
except ValueError:
e = "API query failed: JSON could not be decoded."
raise exceptions.APIError(e)

try:
code = res["error"]["code"]
info = res["error"]["info"]
except (TypeError, KeyError): # Having these keys indicates a problem
return res # All is well; return the decoded JSON

if code == "maxlag": # We've been throttled by the server
if tries >= self._max_retries:
e = "Maximum number of retries reached ({0})."
raise exceptions.APIError(e.format(self._max_retries))
tries += 1
msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})'
self._logger.info(msg.format(info, wait, tries, self._max_retries))
sleep(wait)
return self._api_query(params, tries=tries, wait=wait*2)
else: # Some unknown error occurred
e = 'API query failed: got error "{0}"; server says: "{1}".'
error = exceptions.APIError(e.format(code, info))
error.code, error.info = code, info
raise error

def _load_attributes(self, force=False):
"""Load data about our Site from the API.

This function is called by __init__() when one of the site attributes
was not given as a keyword argument. We'll do an API query to get the
missing data, but only if there actually *is* missing data.

Additionally, you can call this with *force* set to True to forcibly
reload all attributes.
"""
# All attributes to be loaded, except _namespaces, which is a special
# case because it requires additional params in the API query:
attrs = [self._name, self._project, self._lang, self._base_url,
self._article_path, self._script_path]

params = {"action": "query", "meta": "siteinfo", "siprop": "general"}

if not self._namespaces or force:
params["siprop"] += "|namespaces|namespacealiases"
result = self.api_query(**params)
self._load_namespaces(result)
elif all(attrs): # Everything is already specified and we're not told
return # to force a reload, so do nothing
else: # We're only loading attributes other than _namespaces
result = self.api_query(**params)

res = result["query"]["general"]
self._name = res["wikiid"]
self._project = res["sitename"].lower()
self._lang = res["lang"]
self._base_url = res["server"]
self._article_path = res["articlepath"]
self._script_path = res["scriptpath"]

def _load_namespaces(self, result):
"""Fill self._namespaces with a dict of namespace IDs and names.

Called by _load_attributes() with API data as *result* when
self._namespaces was not given as an kwarg to __init__().
"""
self._namespaces = {}

for namespace in result["query"]["namespaces"].values():
ns_id = namespace["id"]
name = namespace["*"]
try:
canonical = namespace["canonical"]
except KeyError:
self._namespaces[ns_id] = [name]
else:
if name != canonical:
self._namespaces[ns_id] = [name, canonical]
else:
self._namespaces[ns_id] = [name]

for namespace in result["query"]["namespacealiases"]:
ns_id = namespace["id"]
alias = namespace["*"]
self._namespaces[ns_id].append(alias)

def _get_cookie(self, name, domain):
"""Return the named cookie unless it is expired or doesn't exist."""
for cookie in self._cookiejar:
if cookie.name == name and cookie.domain == domain:
if cookie.is_expired():
break
return cookie

def _get_username_from_cookies(self):
"""Try to return our username based solely on cookies.

First, we'll look for a cookie named self._name + "Token", like
"enwikiToken". If it exists and isn't expired, we'll assume it's valid
and try to return the value of the cookie self._name + "UserName" (like
"enwikiUserName"). This should work fine on wikis without single-user
login.

If `enwikiToken` doesn't exist, we'll try to find a cookie named
`centralauth_Token`. If this exists and is not expired, we'll try to
return the value of `centralauth_User`.

If we didn't get any matches, we'll return None. Our goal here isn't to
return the most likely username, or what we *want* our username to be
(for that, we'd do self._login_info[0]), but rather to get our current
username without an unnecessary ?action=query&meta=userinfo API query.
"""
name = ''.join((self._name, "Token"))
cookie = self._get_cookie(name, self.domain)

if cookie:
name = ''.join((self._name, "UserName"))
user_name = self._get_cookie(name, self.domain)
if user_name:
return unquote_plus(user_name.value)

for cookie in self._cookiejar:
if cookie.name != "centralauth_Token" or cookie.is_expired():
continue
base = cookie.domain
if base.startswith(".") and not cookie.domain_initial_dot:
base = base[1:]
if self.domain.endswith(base):
user_name = self._get_cookie("centralauth_User", cookie.domain)
if user_name:
return unquote_plus(user_name.value)

def _get_username_from_api(self):
"""Do a simple API query to get our username and return it.

This is a reliable way to make sure we are actually logged in, because
it doesn't deal with annoying cookie logic, but it results in an API
query that is unnecessary in some cases.

Called by _get_username() (in turn called by get_user() with no
username argument) when cookie lookup fails, probably indicating that
we are logged out.
"""
result = self.api_query(action="query", meta="userinfo")
return result["query"]["userinfo"]["name"]

def _get_username(self):
"""Return the name of the current user, whether logged in or not.

First, we'll try to deduce it solely from cookies, to avoid an
unnecessary API query. For the cookie-detection method, see
_get_username_from_cookies()'s docs.

If our username isn't in cookies, then we're probably not logged in, or
something fishy is going on (like forced logout). In this case, do a
single API query for our username (or IP address) and return that.
"""
name = self._get_username_from_cookies()
if name:
return name
return self._get_username_from_api()

def _save_cookiejar(self):
"""Try to save our cookiejar after doing a (normal) login or logout.

Calls the standard .save() method with no filename. Don't fret if our
cookiejar doesn't support saving (CookieJar raises AttributeError,
FileCookieJar raises NotImplementedError) or no default filename was
given (LWPCookieJar and MozillaCookieJar raise ValueError).
"""
if hasattr(self._cookiejar, "save"):
try:
getattr(self._cookiejar, "save")()
except (NotImplementedError, ValueError):
pass

def _login(self, login, token=None, attempt=0):
"""Safely login through the API.

Normally, this is called by __init__() if a username and password have
been provided and no valid login cookies were found. The only other
time it needs to be called is when those cookies expire, which is done
automatically by api_query() if a query fails.

Recent versions of MediaWiki's API have fixed a CSRF vulnerability,
requiring login to be done in two separate requests. If the response
from from our initial request is "NeedToken", we'll do another one with
the token. If login is successful, we'll try to save our cookiejar.

Raises LoginError on login errors (duh), like bad passwords and
nonexistent usernames.

*login* is a (username, password) tuple. *token* is the token returned
from our first request, and *attempt* is to prevent getting stuck in a
loop if MediaWiki isn't acting right.
"""
name, password = login
if token:
result = self.api_query(action="login", lgname=name,
lgpassword=password, lgtoken=token)
else:
result = self.api_query(action="login", lgname=name,
lgpassword=password)

res = result["login"]["result"]
if res == "Success":
self._save_cookiejar()
elif res == "NeedToken" and attempt == 0:
token = result["login"]["token"]
return self._login(login, token, attempt=1)
else:
if res == "Illegal":
e = "The provided username is illegal."
elif res == "NotExists":
e = "The provided username does not exist."
elif res == "EmptyPass":
e = "No password was given."
elif res == "WrongPass" or res == "WrongPluginPass":
e = "The given password is incorrect."
else:
e = "Couldn't login; server says '{0}'.".format(res)
raise exceptions.LoginError(e)

def _logout(self):
"""Safely logout through the API.

We'll do a simple API request (api.php?action=logout), clear our
cookiejar (which probably contains now-invalidated cookies) and try to
save it, if it supports that sort of thing.
"""
self.api_query(action="logout")
self._cookiejar.clear()
self._save_cookiejar()

def _sql_connect(self, **kwargs):
"""Attempt to establish a connection with this site's SQL database.

oursql.connect() will be called with self._sql_data as its kwargs.
Any kwargs given to this function will be passed to connect() and will
have precedence over the config file.

Will raise SQLError() if the module "oursql" is not available. oursql
may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot
establish a connection.
"""
if not oursql:
e = "Module 'oursql' is required for SQL queries."
raise exceptions.SQLError(e)

args = self._sql_data
for key, value in kwargs.iteritems():
args[key] = value

if "read_default_file" not in args and "user" not in args and "passwd" not in args:
args["read_default_file"] = expanduser("~/.my.cnf")

if "autoping" not in args:
args["autoping"] = True

if "autoreconnect" not in args:
args["autoreconnect"] = True

self._sql_conn = oursql.connect(**args)

def _get_service_order(self):
"""Return a preferred order for using services (e.g. the API and SQL).

A list is returned, starting with the most preferred service first and
ending with the least preferred one. Currently, there are only two
services. SERVICE_API will always be included since the API is expected
to be always usable. In normal circumstances, self.SERVICE_SQL will be
first (with the API second), since using SQL directly is easier on the
servers than making web queries with the API. self.SERVICE_SQL will be
second if replag is greater than three minutes (a cached value updated
every two minutes at most), *unless* API lag is also very high.
self.SERVICE_SQL will not be included in the list if we cannot form a
proper SQL connection.
"""
now = time()
if now - self._sql_info_cache["lastcheck"] > 120:
self._sql_info_cache["lastcheck"] = now
try:
self._sql_info_cache["replag"] = sqllag = self.get_replag()
except (exceptions.SQLError, oursql.Error):
self._sql_info_cache["usable"] = False
return [self.SERVICE_API]
self._sql_info_cache["usable"] = True
else:
if not self._sql_info_cache["usable"]:
return [self.SERVICE_API]
sqllag = self._sql_info_cache["replag"]

if sqllag > 300:
if not self._maxlag:
return [self.SERVICE_API, self.SERVICE_SQL]
if now - self._api_info_cache["lastcheck"] > 300:
self._api_info_cache["lastcheck"] = now
try:
self._api_info_cache["maxlag"] = apilag = self.get_maxlag()
except exceptions.APIError:
self._api_info_cache["maxlag"] = apilag = 0
else:
apilag = self._api_info_cache["maxlag"]
if apilag > self._maxlag:
return [self.SERVICE_SQL, self.SERVICE_API]
return [self.SERVICE_API, self.SERVICE_SQL]

return [self.SERVICE_SQL, self.SERVICE_API]

@property
def name(self):
"""The Site's name (or "wikiid" in the API), like ``"enwiki"``."""
return self._name

@property
def project(self):
"""The Site's project name in lowercase, like ``"wikipedia"``."""
return self._project

@property
def lang(self):
"""The Site's language code, like ``"en"`` or ``"es"``."""
return self._lang

@property
def domain(self):
"""The Site's web domain, like ``"en.wikipedia.org"``."""
return urlparse(self._base_url).netloc

@property
def url(self):
"""The Site's full base URL, like ``"https://en.wikipedia.org"``."""
url = self._base_url
if url.startswith("//"): # Protocol-relative URLs from 1.18
if self._use_https:
url = "https:" + url
else:
url = "http:" + url
return url

def api_query(self, **kwargs):
"""Do an API query with `kwargs` as the parameters.

This will first attempt to construct an API url from
:py:attr:`self._base_url` and :py:attr:`self._script_path`. We need
both of these, or else we'll raise
:py:exc:`~earwigbot.exceptions.APIError`. If
:py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki
1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is
``True``, otherwise HTTP.

We'll encode the given params, adding ``format=json`` along the way, as
well as ``&assert=`` and ``&maxlag=`` based on
:py:attr:`self._assert_edit` and :py:attr:`_maxlag` respectively.
Additionally, we'll sleep a bit if the last query was made fewer than
:py:attr:`self._wait_between_queries` seconds ago. The request is made
through :py:attr:`self._opener`, which has cookie support
(:py:attr:`self._cookiejar`), a ``User-Agent``
(:py:const:`earwigbot.wiki.constants.USER_AGENT`), and
``Accept-Encoding`` set to ``"gzip"``.

Assuming everything went well, we'll gunzip the data (if compressed),
load it as a JSON object, and return it.

If our request failed for some reason, we'll raise
:py:exc:`~earwigbot.exceptions.APIError` with details. If that
reason was due to maxlag, we'll sleep for a bit and then repeat the
query until we exceed :py:attr:`self._max_retries`.

There is helpful MediaWiki API documentation at `MediaWiki.org
<http://www.mediawiki.org/wiki/API>`_.
"""
with self._api_lock:
return self._api_query(kwargs)

def sql_query(self, query, params=(), plain_query=False, dict_cursor=False,
cursor_class=None, show_table=False):
"""Do an SQL query and yield its results.

If *plain_query* is ``True``, we will force an unparameterized query.
Specifying both *params* and *plain_query* will cause an error. If
*dict_cursor* is ``True``, we will use :py:class:`oursql.DictCursor` as
our cursor, otherwise the default :py:class:`oursql.Cursor`. If
*cursor_class* is given, it will override this option. If *show_table*
is True, the name of the table will be prepended to the name of the
column. This will mainly affect an :py:class:`~oursql.DictCursor`.

Example usage::

>>> query = "SELECT user_id, user_registration FROM user WHERE user_name = ?"
>>> params = ("The Earwig",)
>>> result1 = site.sql_query(query, params)
>>> result2 = site.sql_query(query, params, dict_cursor=True)
>>> for row in result1: print row
(7418060L, '20080703215134')
>>> for row in result2: print row
{'user_id': 7418060L, 'user_registration': '20080703215134'}

This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
:py:exc:`oursql.InterfaceError`, ...) if there were problems with the
query.

See :py:meth:`_sql_connect` for information on how a connection is
acquired. Also relevant is `oursql's documentation
<http://packages.python.org/oursql>`_ for details on that package.
"""
if not cursor_class:
if dict_cursor:
cursor_class = oursql.DictCursor
else:
cursor_class = oursql.Cursor
klass = cursor_class

with self._sql_lock:
if not self._sql_conn:
self._sql_connect()
with self._sql_conn.cursor(klass, show_table=show_table) as cur:
cur.execute(query, params, plain_query)
for result in cur:
yield result

def get_maxlag(self, showall=False):
"""Return the internal database replication lag in seconds.

In a typical setup, this function returns the replication lag *within*
the WMF's cluster, *not* external replication lag affecting the
Toolserver (see :py:meth:`get_replag` for that). This is useful when
combined with the ``maxlag`` API query param (added by config), in
which queries will be halted and retried if the lag is too high,
usually above five seconds.

With *showall*, will return a list of the lag for all servers in the
cluster, not just the one with the highest lag.
"""
params = {"action": "query", "meta": "siteinfo", "siprop": "dbrepllag"}
if showall:
params["sishowalldb"] = 1
with self._api_lock:
result = self._api_query(params, ignore_maxlag=True)
if showall:
return [server["lag"] for server in result["query"]["dbrepllag"]]
return result["query"]["dbrepllag"][0]["lag"]

def get_replag(self):
"""Return the estimated external database replication lag in seconds.

Requires SQL access. This function only makes sense on a replicated
database (e.g. the Wikimedia Toolserver) and on a wiki that receives a
large number of edits (ideally, at least one per second), or the result
may be larger than expected, since it works by subtracting the current
time from the timestamp of the latest recent changes event.

This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
:py:exc:`oursql.InterfaceError`, ...) if there were problems.
"""
query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM
recentchanges ORDER BY rc_timestamp DESC LIMIT 1"""
result = list(self.sql_query(query))
return result[0][0]

def namespace_id_to_name(self, ns_id, all=False):
"""Given a namespace ID, returns associated namespace names.

If *all* is ``False`` (default), we'll return the first name in the
list, which is usually the localized version. Otherwise, we'll return
the entire list, which includes the canonical name. For example, this
returns ``u"Wikipedia"`` if *ns_id* = ``4`` and *all* is ``False`` on
``enwiki``; returns ``[u"Wikipedia", u"Project", u"WP"]`` if *ns_id* =
``4`` and *all* is ``True``.

Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the ID
is not found.
"""
try:
if all:
return self._namespaces[ns_id]
else:
return self._namespaces[ns_id][0]
except KeyError:
e = "There is no namespace with id {0}.".format(ns_id)
raise exceptions.NamespaceNotFoundError(e)

def namespace_name_to_id(self, name):
"""Given a namespace name, returns the associated ID.

Like :py:meth:`namespace_id_to_name`, but reversed. Case is ignored,
because namespaces are assumed to be case-insensitive.

Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the
name is not found.
"""
lname = name.lower()
for ns_id, names in self._namespaces.items():
lnames = [n.lower() for n in names] # Be case-insensitive
if lname in lnames:
return ns_id

e = "There is no namespace with name '{0}'.".format(name)
raise exceptions.NamespaceNotFoundError(e)

def get_page(self, title, follow_redirects=False, pageid=None):
"""Return a :py:class:`Page` object for the given title.

*follow_redirects* is passed directly to
:py:class:`~earwigbot.wiki.page.Page`'s constructor. Also, this will
return a :py:class:`~earwigbot.wiki.category.Category` object instead
if the given title is in the category namespace. As
:py:class:`~earwigbot.wiki.category.Category` is a subclass of
:py:class:`~earwigbot.wiki.page.Page`, this should not cause problems.

Note that this doesn't do any direct checks for existence or
redirect-following: :py:class:`~earwigbot.wiki.page.Page`'s methods
provide that.
"""
title = self._unicodeify(title)
prefixes = self.namespace_id_to_name(constants.NS_CATEGORY, all=True)
prefix = title.split(":", 1)[0]
if prefix != title: # Avoid a page that is simply "Category"
if prefix in prefixes:
return Category(self, title, follow_redirects, pageid,
self._logger)
return Page(self, title, follow_redirects, pageid, self._logger)

def get_category(self, catname, follow_redirects=False, pageid=None):
"""Return a :py:class:`Category` object for the given category name.

*catname* should be given *without* a namespace prefix. This method is
really just shorthand for :py:meth:`get_page("Category:" + catname)
<get_page>`.
"""
catname = self._unicodeify(catname)
prefix = self.namespace_id_to_name(constants.NS_CATEGORY)
pagename = u':'.join((prefix, catname))
return Category(self, pagename, follow_redirects, pageid, self._logger)

def get_user(self, username=None):
"""Return a :py:class:`User` object for the given username.

If *username* is left as ``None``, then a
:py:class:`~earwigbot.wiki.user.User` object representing the currently
logged-in (or anonymous!) user is returned.
"""
if username:
username = self._unicodeify(username)
else:
username = self._get_username()
return User(self, username, self._logger)

def delegate(self, services, args=None, kwargs=None):
"""Delegate a task to either the API or SQL depending on conditions.

*services* should be a dictionary in which the key is the service name
(:py:attr:`self.SERVICE_API <SERVICE_API>` or
:py:attr:`self.SERVICE_SQL <SERVICE_SQL>`), and the value is the
function to call for this service. All functions will be passed the
same arguments the tuple *args* and the dict **kwargs**, which are both
empty by default. The service order is determined by
:py:meth:`_get_service_order`.

Not every service needs an entry in the dictionary. Will raise
:py:exc:`~earwigbot.exceptions.NoServiceError` if an appropriate
service cannot be found.
"""
if not args:
args = ()
if not kwargs:
kwargs = {}

order = self._get_service_order()
for srv in order:
if srv in services:
try:
return services[srv](*args, **kwargs)
except exceptions.ServiceError:
continue
raise exceptions.NoServiceError(services)

+ 438
- 0
earwigbot/wiki/sitesdb.py View File

@@ -0,0 +1,438 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from collections import OrderedDict
from cookielib import LWPCookieJar, LoadError
import errno
from os import chmod, path
from platform import python_version
import stat
import sqlite3 as sqlite

from earwigbot import __version__
from earwigbot.exceptions import SiteNotFoundError
from earwigbot.wiki.copyvios.exclusions import ExclusionsDB
from earwigbot.wiki.site import Site

__all__ = ["SitesDB"]

class SitesDB(object):
"""
**EarwigBot: Wiki Toolset: Sites Database Manager**

This class controls the :file:`sites.db` file, which stores information
about all wiki sites known to the bot. Three public methods act as bridges
between the bot's config files and :py:class:`~earwigbot.wiki.site.Site`
objects:

- :py:meth:`get_site`: returns a Site object corresponding to a site
- :py:meth:`add_site`: stores a site in the database
- :py:meth:`remove_site`: removes a site from the database

There's usually no need to use this class directly. All public methods
here are available as :py:meth:`bot.wiki.get_site`,
:py:meth:`bot.wiki.add_site`, and :py:meth:`bot.wiki.remove_site`, which
use a :file:`sites.db` file located in the same directory as our
:file:`config.yml` file. Lower-level access can be achieved by importing
the manager class (``from earwigbot.wiki import SitesDB``).
"""

def __init__(self, bot):
"""Set up the manager with an attribute for the base Bot object."""
self.config = bot.config
self._logger = bot.logger.getChild("wiki")

self._sites = {} # Internal site cache
self._sitesdb = path.join(bot.config.root_dir, "sites.db")
self._cookie_file = path.join(bot.config.root_dir, ".cookies")
self._cookiejar = None

excl_db = path.join(bot.config.root_dir, "exclusions.db")
excl_logger = self._logger.getChild("exclusionsdb")
self._exclusions_db = ExclusionsDB(self, excl_db, excl_logger)

def __repr__(self):
"""Return the canonical string representation of the SitesDB."""
res = "SitesDB(config={0!r}, sitesdb={1!r}, cookie_file={2!r})"
return res.format(self.config, self._sitesdb, self._cookie_file)

def __str__(self):
"""Return a nice string representation of the SitesDB."""
return "<SitesDB at {0}>".format(self._sitesdb)

def _get_cookiejar(self):
"""Return a LWPCookieJar object loaded from our .cookies file.

The same .cookies file is returned every time, located in the project
root, same directory as config.yml and bot.py. If it doesn't exist, we
will create the file and set it to be readable and writeable only by
us. If it exists but the information inside is bogus, we'll ignore it.

This is normally called by _make_site_object() (in turn called by
get_site()), and the cookiejar is passed to our Site's constructor,
used when it makes API queries. This way, we can easily preserve
cookies between sites (e.g., for CentralAuth), making logins easier.
"""
if self._cookiejar:
return self._cookiejar

self._cookiejar = LWPCookieJar(self._cookie_file)

try:
self._cookiejar.load()
except LoadError:
pass # File contains bad data, so ignore it completely
except IOError as e:
if e.errno == errno.ENOENT: # "No such file or directory"
# Create the file and restrict reading/writing only to the
# owner, so others can't peak at our cookies:
open(self._cookie_file, "w").close()
chmod(self._cookie_file, stat.S_IRUSR|stat.S_IWUSR)
else:
raise

return self._cookiejar

def _create_sitesdb(self):
"""Initialize the sitesdb file with its three necessary tables."""
script = """
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url,
site_article_path, site_script_path);
CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value);
CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name);
"""
with sqlite.connect(self._sitesdb) as conn:
conn.executescript(script)

def _get_site_object(self, name):
"""Return the site from our cache, or create it if it doesn't exist.

This is essentially just a wrapper around _make_site_object that
returns the same object each time a specific site is asked for.
"""
try:
return self._sites[name]
except KeyError:
site = self._make_site_object(name)
self._sites[name] = site
return site

def _load_site_from_sitesdb(self, name):
"""Return all information stored in the sitesdb relating to given site.

The information will be returned as a tuple, containing the site's
name, project, language, base URL, article path, script path, SQL
connection data, and namespaces, in that order. If the site is not
found in the database, SiteNotFoundError will be raised. An empty
database will be created before the exception is raised if none exists.
"""
query1 = "SELECT * FROM sites WHERE site_name = ?"
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?"
query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?"
error = "Site '{0}' not found in the sitesdb.".format(name)
with sqlite.connect(self._sitesdb) as conn:
try:
site_data = conn.execute(query1, (name,)).fetchone()
except sqlite.OperationalError:
self._create_sitesdb()
raise SiteNotFoundError(error)
if not site_data:
raise SiteNotFoundError(error)
sql_data = conn.execute(query2, (name,)).fetchall()
ns_data = conn.execute(query3, (name,)).fetchall()

name, project, lang, base_url, article_path, script_path = site_data
sql = dict(sql_data)
namespaces = {}
for ns_id, ns_name, ns_is_primary_name in ns_data:
try:
if ns_is_primary_name: # "Primary" name goes first in list
namespaces[ns_id].insert(0, ns_name)
else: # Ordering of the aliases doesn't matter
namespaces[ns_id].append(ns_name)
except KeyError:
namespaces[ns_id] = [ns_name]

return (name, project, lang, base_url, article_path, script_path, sql,
namespaces)

def _make_site_object(self, name):
"""Return a Site object associated with the site *name* in our sitesdb.

This calls _load_site_from_sitesdb(), so SiteNotFoundError will be
raised if the site is not in our sitesdb.
"""
cookiejar = self._get_cookiejar()
(name, project, lang, base_url, article_path, script_path, sql,
namespaces) = self._load_site_from_sitesdb(name)

config = self.config
login = (config.wiki.get("username"), config.wiki.get("password"))
user_agent = config.wiki.get("userAgent")
use_https = config.wiki.get("useHTTPS", False)
assert_edit = config.wiki.get("assert")
maxlag = config.wiki.get("maxlag")
wait_between_queries = config.wiki.get("waitTime", 2)
logger = self._logger.getChild(name)
search_config = config.wiki.get("search", OrderedDict()).copy()

if user_agent:
user_agent = user_agent.replace("$1", __version__)
user_agent = user_agent.replace("$2", python_version())

if search_config:
nltk_dir = path.join(self.config.root_dir, ".nltk")
search_config["nltk_dir"] = nltk_dir
search_config["exclusions_db"] = self._exclusions_db

if not sql:
sql = config.wiki.get("sql", OrderedDict()).copy()
for key, value in sql.iteritems():
if isinstance(value, basestring) and "$1" in value:
sql[key] = value.replace("$1", name)

return Site(name=name, project=project, lang=lang, base_url=base_url,
article_path=article_path, script_path=script_path,
sql=sql, namespaces=namespaces, login=login,
cookiejar=cookiejar, user_agent=user_agent,
use_https=use_https, assert_edit=assert_edit,
maxlag=maxlag, wait_between_queries=wait_between_queries,
logger=logger, search_config=search_config)

def _get_site_name_from_sitesdb(self, project, lang):
"""Return the name of the first site with the given project and lang.

If we can't find the site with the given information, we'll also try
searching for a site whose base_url contains "{lang}.{project}". There
are a few sites, like the French Wikipedia, that set their project to
something other than the expected "wikipedia" ("wikipédia" in this
case), but we should correctly find them when doing get_site(lang="fr",
project="wikipedia").

If the site is not found, return None. An empty sitesdb will be created
if none exists.
"""
query1 = "SELECT site_name FROM sites WHERE site_project = ? and site_lang = ?"
query2 = "SELECT site_name FROM sites WHERE site_base_url LIKE ?"
with sqlite.connect(self._sitesdb) as conn:
try:
site = conn.execute(query1, (project, lang)).fetchone()
if site:
return site[0]
else:
url = "%{0}.{1}%".format(lang, project)
site = conn.execute(query2, (url,)).fetchone()
return site[0] if site else None
except sqlite.OperationalError:
self._create_sitesdb()

def _add_site_to_sitesdb(self, site):
"""Extract relevant info from a Site object and add it to the sitesdb.

Works like a reverse _load_site_from_sitesdb(); the site's project,
language, base URL, article path, script path, SQL connection data, and
namespaces are extracted from the site and inserted into the sites
database. If the sitesdb doesn't exist, we'll create it first.
"""
name = site.name
sites_data = (name, site.project, site.lang, site._base_url,
site._article_path, site._script_path)
sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()]
ns_data = []
for ns_id, ns_names in site._namespaces.iteritems():
ns_data.append((name, ns_id, ns_names.pop(0), True))
for ns_name in ns_names:
ns_data.append((name, ns_id, ns_name, False))

with sqlite.connect(self._sitesdb) as conn:
check_exists = "SELECT 1 FROM sites WHERE site_name = ?"
try:
exists = conn.execute(check_exists, (name,)).fetchone()
except sqlite.OperationalError:
self._create_sitesdb()
else:
if exists:
conn.execute("DELETE FROM sites WHERE site_name = ?", (name,))
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,))
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,))
conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data)
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data)
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data)

def _remove_site_from_sitesdb(self, name):
"""Remove a site by name from the sitesdb and the internal cache."""
try:
del self._sites[name]
except KeyError:
pass

with sqlite.connect(self._sitesdb) as conn:
cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,))
if cursor.rowcount == 0:
return False
else:
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,))
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,))
self._logger.info("Removed site '{0}'".format(name))
return True

def get_site(self, name=None, project=None, lang=None):
"""Return a Site instance based on information from the sitesdb.

With no arguments, return the default site as specified by our config
file. This is ``config.wiki["defaultSite"]``.

With *name* specified, return the site with that name. This is
equivalent to the site's ``wikiid`` in the API, like *enwiki*.

With *project* and *lang* specified, return the site whose project and
language match these values. If there are multiple sites with the same
values (unlikely), this is not a reliable way of loading a site. Call
the function with an explicit *name* in that case.

We will attempt to login to the site automatically using
``config.wiki["username"]`` and ``config.wiki["password"]`` if both are
defined.

Specifying a project without a lang or a lang without a project will
raise :py:exc:`TypeError`. If all three args are specified, *name* will
be first tried, then *project* and *lang* if *name* doesn't work. If a
site cannot be found in the sitesdb,
:py:exc:`~earwigbot.exceptions.SiteNotFoundError` will be raised. An
empty sitesdb will be created if none is found.
"""
# Someone specified a project without a lang, or vice versa:
if (project and not lang) or (not project and lang):
e = "Keyword arguments 'lang' and 'project' must be specified together."
raise TypeError(e)

# No args given, so return our default site:
if not name and not project and not lang:
try:
default = self.config.wiki["defaultSite"]
except KeyError:
e = "Default site is not specified in config."
raise SiteNotFoundError(e)
return self._get_site_object(default)

# Name arg given, but don't look at others unless `name` isn't found:
if name:
try:
return self._get_site_object(name)
except SiteNotFoundError:
if project and lang:
name = self._get_site_name_from_sitesdb(project, lang)
if name:
return self._get_site_object(name)
raise

# If we end up here, then project and lang are the only args given:
name = self._get_site_name_from_sitesdb(project, lang)
if name:
return self._get_site_object(name)
e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang)
raise SiteNotFoundError(e)

def add_site(self, project=None, lang=None, base_url=None,
script_path="/w", sql=None):
"""Add a site to the sitesdb so it can be retrieved with get_site().

If only a project and a lang are given, we'll guess the *base_url* as
``"//{lang}.{project}.org"`` (which is protocol-relative, becoming
``"https"`` if *useHTTPS* is ``True`` in config otherwise ``"http"``).
If this is wrong, provide the correct *base_url* as an argument (in
which case project and lang are ignored). Most wikis use ``"/w"`` as
the script path (meaning the API is located at
``"{base_url}{script_path}/api.php"`` ->
``"//{lang}.{project}.org/w/api.php"``), so this is the default. If
your wiki is different, provide the script_path as an argument. SQL
connection settings are guessed automatically using config's template
value. If this is wrong or not specified, provide a dict of kwargs as
*sql* and Site will pass it to :py:func:`oursql.connect(**sql)
<oursql.connect>`, allowing you to make queries with
:py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`.

Returns ``True`` if the site was added successfully or ``False`` if the
site is already in our sitesdb (this can be done purposefully to update
old site info). Raises :py:exc:`~earwigbot.exception.SiteNotFoundError`
if not enough information has been provided to identify the site (e.g.
a *project* but not a *lang*).
"""
if not base_url:
if not project or not lang:
e = "Without a base_url, both a project and a lang must be given."
raise SiteNotFoundError(e)
base_url = "//{0}.{1}.org".format(lang, project)
cookiejar = self._get_cookiejar()

config = self.config
login = (config.wiki.get("username"), config.wiki.get("password"))
user_agent = config.wiki.get("userAgent")
use_https = config.wiki.get("useHTTPS", True)
assert_edit = config.wiki.get("assert")
maxlag = config.wiki.get("maxlag")
wait_between_queries = config.wiki.get("waitTime", 2)

if user_agent:
user_agent = user_agent.replace("$1", __version__)
user_agent = user_agent.replace("$2", python_version())

# Create a Site object to log in and load the other attributes:
site = Site(base_url=base_url, script_path=script_path, sql=sql,
login=login, cookiejar=cookiejar, user_agent=user_agent,
use_https=use_https, assert_edit=assert_edit,
maxlag=maxlag, wait_between_queries=wait_between_queries)

self._logger.info("Added site '{0}'".format(site.name))
self._add_site_to_sitesdb(site)
return self._get_site_object(site.name)

def remove_site(self, name=None, project=None, lang=None):
"""Remove a site from the sitesdb.

Returns ``True`` if the site was removed successfully or ``False`` if
the site was not in our sitesdb originally. If all three args (*name*,
*project*, and *lang*) are given, we'll first try *name* and then try
the latter two if *name* wasn't found in the database. Raises
:py:exc:`TypeError` if a project was given but not a language, or vice
versa. Will create an empty sitesdb if none was found.
"""
# Someone specified a project without a lang, or vice versa:
if (project and not lang) or (not project and lang):
e = "Keyword arguments 'lang' and 'project' must be specified together."
raise TypeError(e)

if name:
was_removed = self._remove_site_from_sitesdb(name)
if not was_removed:
if project and lang:
name = self._get_site_name_from_sitesdb(project, lang)
if name:
return self._remove_site_from_sitesdb(name)
return was_removed

if project and lang:
name = self._get_site_name_from_sitesdb(project, lang)
if name:
return self._remove_site_from_sitesdb(name)

return False

+ 316
- 0
earwigbot/wiki/user.py View File

@@ -0,0 +1,316 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from logging import getLogger, NullHandler
from time import gmtime, strptime
from socket import AF_INET, AF_INET6, error as socket_error, inet_pton

from earwigbot.exceptions import UserNotFoundError
from earwigbot.wiki import constants
from earwigbot.wiki.page import Page

__all__ = ["User"]

class User(object):
"""
**EarwigBot: Wiki Toolset: User**

Represents a user on a given :py:class:`~earwigbot.wiki.site.Site`. Has
methods for getting a bunch of information about the user, such as
editcount and user rights, methods for returning the user's userpage and
talkpage, etc.

*Attributes:*

- :py:attr:`site`: the user's corresponding Site object
- :py:attr:`name`: the user's username
- :py:attr:`exists`: ``True`` if the user exists, else ``False``
- :py:attr:`userid`: an integer ID representing the user
- :py:attr:`blockinfo`: information about any current blocks on the user
- :py:attr:`groups`: a list of the user's groups
- :py:attr:`rights`: a list of the user's rights
- :py:attr:`editcount`: the number of edits made by the user
- :py:attr:`registration`: the time the user registered
- :py:attr:`emailable`: ``True`` if you can email the user, or ``False``
- :py:attr:`gender`: the user's gender ("male"/"female"/"unknown")
- :py:attr:`is_ip`: ``True`` if this is an IP address, or ``False``

*Public methods:*

- :py:meth:`reload`: forcibly reloads the user's attributes
- :py:meth:`get_userpage`: returns a Page object representing the user's
userpage
- :py:meth:`get_talkpage`: returns a Page object representing the user's
talkpage
"""

def __init__(self, site, name, logger=None):
"""Constructor for new User instances.

Takes two arguments, a Site object (necessary for doing API queries),
and the name of the user, preferably without "User:" in front, although
this prefix will be automatically removed by the API if given.

You can also use site.get_user() instead, which returns a User object,
and is preferred.

We won't do any API queries yet for basic information about the user -
save that for when the information is requested.
"""
self._site = site
self._name = name

# Set up our internal logger:
if logger:
self._logger = logger
else: # Just set up a null logger to eat up our messages:
self._logger = getLogger("earwigbot.wiki")
self._logger.addHandler(NullHandler())

def __repr__(self):
"""Return the canonical string representation of the User."""
return "User(name={0!r}, site={1!r})".format(self._name, self._site)

def __str__(self):
"""Return a nice string representation of the User."""
return '<User "{0}" of {1}>'.format(self.name, str(self.site))

def _get_attribute(self, attr):
"""Internally used to get an attribute by name.

We'll call _load_attributes() to get this (and all other attributes)
from the API if it is not already defined.

Raises UserNotFoundError if a nonexistant user prevents us from
returning a certain attribute.
"""
if not hasattr(self, attr):
self._load_attributes()
if not self._exists:
e = u"User '{0}' does not exist.".format(self._name)
raise UserNotFoundError(e)
return getattr(self, attr)

def _load_attributes(self):
"""Internally used to load all attributes from the API.

Normally, this is called by _get_attribute() when a requested attribute
is not defined. This defines it.
"""
props = "blockinfo|groups|rights|editcount|registration|emailable|gender"
result = self.site.api_query(action="query", list="users",
ususers=self._name, usprop=props)
res = result["query"]["users"][0]

# normalize our username in case it was entered oddly
self._name = res["name"]

try:
self._userid = res["userid"]
except KeyError: # userid is missing, so user does not exist
self._exists = False
return

self._exists = True

try:
self._blockinfo = {
"by": res["blockedby"],
"reason": res["blockreason"],
"expiry": res["blockexpiry"]
}
except KeyError:
self._blockinfo = False

self._groups = res["groups"]
try:
self._rights = res["rights"].values()
except AttributeError:
self._rights = res["rights"]
self._editcount = res["editcount"]

reg = res["registration"]
try:
self._registration = strptime(reg, "%Y-%m-%dT%H:%M:%SZ")
except TypeError:
# Sometimes the API doesn't give a date; the user's probably really
# old. There's nothing else we can do!
self._registration = gmtime(0)

try:
res["emailable"]
except KeyError:
self._emailable = False
else:
self._emailable = True

self._gender = res["gender"]

@property
def site(self):
"""The user's corresponding Site object."""
return self._site

@property
def name(self):
"""The user's username.

This will never make an API query on its own, but if one has already
been made by the time this is retrieved, the username may have been
"normalized" from the original input to the constructor, converted into
a Unicode object, with underscores removed, etc.
"""
return self._name

@property
def exists(self):
"""``True`` if the user exists, or ``False`` if they do not.

Makes an API query only if we haven't made one already.
"""
if not hasattr(self, "_exists"):
self._load_attributes()
return self._exists

@property
def userid(self):
"""An integer ID used by MediaWiki to represent the user.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_userid")

@property
def blockinfo(self):
"""Information about any current blocks on the user.

If the user is not blocked, returns ``False``. If they are, returns a
dict with three keys: ``"by"`` is the blocker's username, ``"reason"``
is the reason why they were blocked, and ``"expiry"`` is when the block
expires.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_blockinfo")

@property
def groups(self):
"""A list of groups this user is in, including ``"*"``.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_groups")

@property
def rights(self):
"""A list of this user's rights.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_rights")

@property
def editcount(self):
"""Returns the number of edits made by the user.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_editcount")

@property
def registration(self):
"""The time the user registered as a :py:class:`time.struct_time`.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_registration")

@property
def emailable(self):
"""``True`` if the user can be emailed, or ``False`` if they cannot.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_emailable")

@property
def gender(self):
"""The user's gender.

Can return either ``"male"``, ``"female"``, or ``"unknown"``, if they
did not specify it.

Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_gender")

@property
def is_ip(self):
"""``True`` if the user is an IP address, or ``False`` otherwise.

This tests for IPv4 and IPv6 using :py:func:`socket.inet_pton` on the
username. No API queries are made.
"""
try:
inet_pton(AF_INET, self.name)
except socket_error:
try:
inet_pton(AF_INET6, self.name)
except socket_error:
return False
return True

def reload(self):
"""Forcibly reload the user's attributes.

Emphasis on *reload*: this is only necessary if there is reason to
believe they have changed.
"""
self._load_attributes()

def get_userpage(self):
"""Return a Page object representing the user's userpage.

No checks are made to see if it exists or not. Proper site namespace
conventions are followed.
"""
prefix = self.site.namespace_id_to_name(constants.NS_USER)
pagename = ':'.join((prefix, self._name))
return Page(self.site, pagename)

def get_talkpage(self):
"""Return a Page object representing the user's talkpage.

No checks are made to see if it exists or not. Proper site namespace
conventions are followed.
"""
prefix = self.site.namespace_id_to_name(constants.NS_USER_TALK)
pagename = ':'.join((prefix, self._name))
return Page(self.site, pagename)

+ 0
- 0
View File


+ 0
- 33
irc/base_command.py View File

@@ -1,33 +0,0 @@
# -*- coding: utf-8 -*-

# A base class for commands on IRC.

class BaseCommand(object):
def __init__(self, connection):
"""A base class for commands on IRC."""
self.connection = connection

def get_hooks(self):
"""Hooks are: 'msg', 'msg_private', 'msg_public', and 'join'. Return
the hooks you want this command to be called on."""
return []

def get_help(self, command):
"""Return help information for the command, used by !help. return None
for no help. If a given class handles multiple commands, the command
variable can be used to return different help for each one."""
return None

def check(self, data):
"""Given a Data() object, return True if we should respond to this
activity, or False if we should ignore it/it doesn't apply to us. Most
commands return True if data.command == 'command_name', otherwise
they return False."""
return False

def process(self, data):
"""Handle an activity (usually a message) on IRC. At this point, thanks
to self.check() which is called automatically by command_handler, we
know this is something we should respond to, so (usually) a
'if data.command != "command_name": return' is unnecessary."""
pass

+ 0
- 66
irc/command_handler.py View File

@@ -1,66 +0,0 @@
# -*- coding: utf-8 -*-

# A module to manage IRC commands.

import os
import traceback

commands = []

def load_commands(connection):
"""load all valid command classes from irc/commmands/ into the commands variable"""
files = os.listdir(os.path.join("irc", "commands")) # get all files in irc/commands/
files.sort() # alphabetically sort list of files

for f in files:
if f.startswith("_") or not f.endswith(".py"): # ignore non-python files or files beginning with "_"
continue
module = f[:-3] # strip .py from end
try:
exec "from irc.commands import %s" % module
except: # importing the file failed for some reason...
print "Couldn't load file %s:" % f
traceback.print_exc()
continue
process_module(connection, eval(module)) # 'module' is a string, so get the actual object for processing by eval-ing it

pretty_cmnds = map(lambda c: c.__class__.__name__, commands)
print "Found %s command classes: %s." % (len(commands), ', '.join(pretty_cmnds))

def process_module(connection, module):
"""go through all objects in a module and add valid command classes to the commands variable"""
global commands
objects = dir(module)

for this_obj in objects: # go through everything in the file
obj = eval("module.%s" % this_obj) # this_obj is a string, so get the actual object corresponding to that string

try:
bases = obj.__bases__
except AttributeError: # object isn't a valid class, so ignore it
continue

for base in bases:
if base.__name__ == "BaseCommand": # this inherits BaseCommand, so it must be a command class
command = obj(connection) # initialize a new command object
commands.append(command)
print "Added command class %s from %s..." % (this_obj, module.__name__)
continue

def get_commands():
"""get our commands"""
return commands

def check(hook, data):
"""given an event on IRC, check if there's anything we can respond to by calling each command class"""
data.parse_args() # parse command arguments into data.command and data.args

for command in commands:
if hook in command.get_hooks():
if command.check(data):
try:
command.process(data)
except:
print "Error executing command '{}':".format(data.command)
traceback.print_exc() # catch exceptions and print them
break

+ 0
- 0
View File


+ 0
- 979
irc/commands/_old.py View File

@@ -1,979 +0,0 @@
# -*- coding: utf-8 -*-
######
###### NOTE:
###### This is an old commands file from the previous version of EarwigBot.
###### It is not used by the new EarwigBot and is simply here for reference
###### when developing new commands.
######
### EarwigBot

## Import basics.
import sys, socket, string, time, codecs, os, traceback, thread, re, urllib, web, math, unicodedata

## Import our functions.
import config

## Set up constants.
HOST, PORT, NICK, IDENT, REALNAME, CHANS, REPORT_CHAN, WELCOME_CHAN, HOST2, CHAN2, OWNER, ADMINS, ADMINS_R, PASS = config.host, config.port, config.nick, config.ident, config.realname, config.chans, config.report_chan, config.welcome_chan, config.host2, config.chan2, config.owner, config.admins, config.admin_readable, config.password

def get_commandList():
return {'quiet': 'quiet',
'welcome': 'welcome',
'greet': 'welcome',
'linker': 'linker',
'auth': 'auth',
'access': 'access',
'join': 'join',
'part': 'part',
'restart': 'restart',
'quit': 'quit',
'die': 'quit',
'msg': 'msg',
'me': 'me',
'calc': 'calc',
'dice': 'dice',
'tock': 'tock',
'beats': 'beats',
'copyvio': 'copyvio',
'copy': 'copyvio',
'copyright': 'copyvio',
'dict': 'dictionary',
'dictionary': 'dictionary',
'ety': 'etymology',
'etymology': 'etymology',
'lang': 'langcode',
'langcode': 'langcode',
'num': 'number',
'number': 'number',
'count': 'number',
'c': 'number',
'nick': 'nick',
'op': 'op',
'deop': 'deop',
'voice': 'voice',
'devoice': 'devoice',
'pend': 'pending',
'pending': 'pending',
'sub': 'submissions',
'submissions': 'submissions',
'praise': 'praise',
'leonard': 'leonard',
'groovedog': 'groovedog',
'earwig': 'earwig',
'macmed': 'macmed',
'cubs197': 'cubs197',
'sparksboy': 'sparksboy',
'tim_song': 'tim_song',
'tim': 'tim_song',
'blurpeace': 'blurpeace',
'sausage': 'sausage',
'mindstormskid': 'mindstormskid',
'mcjohn': 'mcjohn',
'fetchcomms': 'fetchcomms',
'trout': 'trout',
'kill': 'kill',
'destroy': 'kill',
'murder': 'kill',
'fish': 'fish',
'report': 'report',
'commands': 'commands',
'help': 'help',
'doc': 'help',
'documentation': 'help',
'mysql': 'mysql',
'remind': 'reminder',
'reminder': 'reminder',
'notes': 'notes',
'note': 'notes',
'about': 'notes',
'data': 'notes',
'database': 'notes',
'hash': 'hash',
'lookup': 'lookup',
'ip': 'lookup'
}

def main(command, line, line2, nick, chan, host, auth, notice, say, reply, s):
try:
parse(command, line, line2, nick, chan, host, auth, notice, say, reply, s)
except Exception:
trace = traceback.format_exc() # Traceback.
print trace # Print.
lines = list(reversed(trace.splitlines())) # Convert lines to process traceback....
report2 = [lines[0].strip()]
for line in lines:
line = line.strip()
if line.startswith('File "/'):
report2.append(line[0].lower() + line[1:])
break
else: report2.append('source unknown')
say(report2[0] + ' (' + report2[1] + ')', chan)

def parse(command, line, line2, nick, chan, host, auth, notice, say, reply, s):
authy = auth(host)
if command == "access":
a = 'The bot\'s owner is "%s".' % OWNER
b = 'The bot\'s admins are "%s".' % ', '.join(ADMINS_R)
reply(a, chan, nick)
reply(b, chan, nick)
return
if command == "join":
if authy == "owner" or authy == "admin":
try:
channel = line2[4]
except Exception:
channel = chan
s.send("JOIN %s\r\n" % channel)
else:
reply("You aren't authorized to use that command.", chan, nick)
return
if command == "part":
if authy == "owner" or authy == "admin":
try:
channel = line2[4]
except Exception:
channel = chan
s.send("PART %s\r\n" % channel)
else:
reply("You aren't authorized to use that command.", chan, nick)
return
if command == "restart":
import thread
if authy == "owner":
s.send("QUIT\r\n")
time.sleep(5)
os.system("nice -15 python main.py")
exit()
else:
reply("Only the owner, %s, can stop the bot. This incident will be reported." % OWNER, chan, nick)
return
if command == "quit" or command == "die":
if authy != "owner":
if command != "suicide":
reply("Only the owner, %s, can stop the bot. This incident will be reported." % OWNER, chan, nick)
else:
say("\x01ACTION hands %s a gun... have fun :D\x01" % nick, nick)
else:
if command == "suicide":
say("\x01ACTION stabs himself with a knife.\x01", chan)
time.sleep(0.2)
try:
s.send("QUIT :%s\r\n" % ' '.join(line2[4:]))
except Exception:
s.send("QUIT\r\n")
__import__('os')._exit(0)
return
if command == "msg":
if authy == "owner" or authy == "admin":
say(' '.join(line2[5:]), line2[4])
else:
reply("You aren't authorized to use that command.", chan, nick)
return
if command == "me":
if authy == "owner" or authy == "admin":
say("\x01ACTION %s\x01" % ' '.join(line2[5:]), line2[4])
else:
reply("You aren't authorized to use that command.", chan, nick)
return
if command == "calc":
r_result = re.compile(r'(?i)<A NAME=results>(.*?)</A>')
r_tag = re.compile(r'<\S+.*?>')
subs = [
(' in ', ' -> '),
(' over ', ' / '),
(u'£', 'GBP '),
(u'€', 'EUR '),
('\$', 'USD '),
(r'\bKB\b', 'kilobytes'),
(r'\bMB\b', 'megabytes'),
(r'\bGB\b', 'kilobytes'),
('kbps', '(kilobits / second)'),
('mbps', '(megabits / second)')
]
try:
q = ' '.join(line2[4:])
except Exception:
say("0?", chan)
return
query = q[:]
for a, b in subs:
query = re.sub(a, b, query)
query = query.rstrip(' \t')

precision = 5
if query[-3:] in ('GBP', 'USD', 'EUR', 'NOK'):
precision = 2
query = web.urllib.quote(query.encode('utf-8'))

uri = 'http://futureboy.us/fsp/frink.fsp?fromVal='
bytes = web.get(uri + query)
m = r_result.search(bytes)
if m:
result = m.group(1)
result = r_tag.sub('', result) # strip span.warning tags
result = result.replace('&gt;', '>')
result = result.replace('(undefined symbol)', '(?) ')

if '.' in result:
try: result = str(round(float(result), precision))
except ValueError: pass

if not result.strip():
result = '?'
elif ' in ' in q:
result += ' ' + q.split(' in ', 1)[1]

say(q + ' = ' + result[:350], chan)
else: reply("Sorry, can't calculate that.", chan, nick)
return
if command == "dice":
import random
try:
set = range(int(line2[4]), int(line2[5]) + 1)
except Exception:
set = range(1, 7)
num = random.choice(set)
reply("You rolled a %s." % num, chan, nick)
if len(set) < 30:
say("Set consisted of %s." % set, nick)
else:
say("Set consisted of %s... and %s others." % (set[:30], len(set) - 30), nick)
return
if command == "tock":
u = urllib.urlopen('http://tycho.usno.navy.mil/cgi-bin/timer.pl')
info = u.info()
u.close()
say('"' + info['Date'] + '" - tycho.usno.navy.mil', chan)
return
if command == "beats":
beats = ((time.time() + 3600) % 86400) / 86.4
beats = int(math.floor(beats))
say('@%03i' % beats, chan)
return
if command == "copyvio" or command == "copy" or command == "copyright":
url = "http://en.wikipedia.org/wiki/User:EarwigBot/AfC copyvios"
query = urllib.urlopen(url)
data = query.read()
url = "http://toolserver.org/~earwig/earwigbot/pywikipedia/error.txt"
query = urllib.urlopen(url)
data2 = query.read()
if "critical" in data2:
text = "AfC copyvio situation is CRITICAL: Major disaster."
elif "exceed" in data2:
text = "AfC copyvio situation is CRITICAL: Queries exceeded error."
elif "spam" in data2:
text = "AfC copyvio situation is CRITICAL: Spamfilter error."
elif "<h3>" in data:
text = "AfC copyvio situation is BAD: Unsolved copyvios at [[User:EarwigBot/AfC copyvios]]"
else:
text = "AfC copyvio situation is OK: OK."
reply(text, chan, nick)
return
if command == "dict" or command == "dictionary":
def trim(thing):
if thing.endswith('&nbsp;'):
thing = thing[:-6]
return thing.strip(' :.')
r_li = re.compile(r'(?ims)<li>.*?</li>')
r_tag = re.compile(r'<[^>]+>')
r_parens = re.compile(r'(?<=\()(?:[^()]+|\([^)]+\))*(?=\))')
r_word = re.compile(r'^[A-Za-z0-9\' -]+$')
uri = 'http://encarta.msn.com/dictionary_/%s.html'
r_info = re.compile(r'(?:ResultBody"><br /><br />(.*?)&nbsp;)|(?:<b>(.*?)</b>)')
try:
word = line2[4]
except Exception:
reply("Please enter a word.", chan, nick)
return
word = urllib.quote(word.encode('utf-8'))
bytes = web.get(uri % word)
results = {}
wordkind = None
for kind, sense in r_info.findall(bytes):
kind, sense = trim(kind), trim(sense)
if kind: wordkind = kind
elif sense:
results.setdefault(wordkind, []).append(sense)
result = word.encode('utf-8') + ' - '
for key in sorted(results.keys()):
if results[key]:
result += (key or '') + ' 1. ' + results[key][0]
if len(results[key]) > 1:
result += ', 2. ' + results[key][1]
result += '; '
result = result.rstrip('; ')
if result.endswith('-') and (len(result) < 30):
reply('Sorry, no definition found.', chan, nick)
else: say(result, chan)
return
if command == "ety" or command == "etymology":
etyuri = 'http://etymonline.com/?term=%s'
etysearch = 'http://etymonline.com/?search=%s'
r_definition = re.compile(r'(?ims)<dd[^>]*>.*?</dd>')
r_tag = re.compile(r'<(?!!)[^>]+>')
r_whitespace = re.compile(r'[\t\r\n ]+')
abbrs = [
'cf', 'lit', 'etc', 'Ger', 'Du', 'Skt', 'Rus', 'Eng', 'Amer.Eng', 'Sp',
'Fr', 'N', 'E', 'S', 'W', 'L', 'Gen', 'J.C', 'dial', 'Gk',
'19c', '18c', '17c', '16c', 'St', 'Capt', 'obs', 'Jan', 'Feb', 'Mar',
'Apr', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'c', 'tr', 'e', 'g'
]
t_sentence = r'^.*?(?<!%s)(?:\.(?= [A-Z0-9]|\Z)|\Z)'
r_sentence = re.compile(t_sentence % ')(?<!'.join(abbrs))
def unescape(s):
s = s.replace('&gt;', '>')
s = s.replace('&lt;', '<')
s = s.replace('&amp;', '&')
return s
def text(html):
html = r_tag.sub('', html)
html = r_whitespace.sub(' ', html)
return unescape(html).strip()
try:
word = line2[4]
except Exception:
reply("Please enter a word.", chan, nick)
return
def ety(word):
if len(word) > 25:
raise ValueError("Word too long: %s[...]" % word[:10])
word = {'axe': 'ax/axe'}.get(word, word)
bytes = web.get(etyuri % word)
definitions = r_definition.findall(bytes)
if not definitions:
return None
defn = text(definitions[0])
m = r_sentence.match(defn)
if not m:
return None
sentence = m.group(0)
try:
sentence = unicode(sentence, 'iso-8859-1')
sentence = sentence.encode('utf-8')
except: pass
maxlength = 275
if len(sentence) > maxlength:
sentence = sentence[:maxlength]
words = sentence[:-5].split(' ')
words.pop()
sentence = ' '.join(words) + ' [...]'
sentence = '"' + sentence.replace('"', "'") + '"'
return sentence + ' - ' + (etyuri % word)
try:
result = ety(word.encode('utf-8'))
except IOError:
msg = "Can't connect to etymonline.com (%s)" % (etyuri % word)
reply(msg, chan, nick)
return
except AttributeError:
result = None
if result is not None:
reply(result, chan, nick)
else:
uri = etysearch % word
msg = 'Can\'t find the etymology for "%s". Try %s' % (word, uri)
reply(msg, chan, nick)
return
if command == "num" or command == "number" or command == "count" or command == "c":
try:
params = string.lower(line2[4])
except Exception:
params = False
if params == "old" or params == "afc" or params == "a":
number = unicode(int(len(re.findall("title=", urllib.urlopen("http://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Pending_AfC_submissions&cmlimit=500").read()))) - 2)
reply("There are currently %s pending AfC submissions." % number, chan, nick)
elif params == "redirect" or params == "redir" or params == "redirs" or params == "redirects" or params == "r":
redir_data = urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Articles_for_creation/Redirects").read()
redirs = (string.count(redir_data, "<h2>") - 1) - (string.count(redir_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">'))
reply("There are currently %s open redirect requests." % redirs, chan, nick)
elif params == "files" or params == "ffu" or params == "file" or params == "image" or params == "images" or params == "ifu" or params == "f":
file_data = re.sub("<h2>Contents</h2>", "", urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Files_for_upload").read())
files = (string.count(file_data, "<h2>") - 1) - (string.count(file_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">'))
reply("There are currently %s open file upload requests." % files, chan, nick)
elif params == "aggregate" or params == "agg":
subs = unicode(int(len(re.findall("title=", urllib.urlopen("http://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Pending_AfC_submissions&cmlimit=500").read()))) - 2)
redir_data = urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Articles_for_creation/Redirects").read()
file_data = re.sub("<h2>Contents</h2>", "", urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Files_for_upload").read())
redirs = (string.count(redir_data, "<h2><span class=\"editsection\">")) - (string.count(redir_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">'))
files = (string.count(file_data, "<h2>") - 1) - (string.count(file_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">'))
aggregate = (int(subs) * 5) + (int(redirs) * 2) + (int(files) * 2)
if aggregate == 0:
stat = "clear"
elif aggregate < 60:
stat = "almost clear"
elif aggregate < 125:
stat = "small backlog"
elif aggregate < 175:
stat = "average backlog"
elif aggregate < 250:
stat = "backlogged"
elif aggregate < 300:
stat = "heavily backlogged"
else:
stat = "severely backlogged"
reply("Aggregate is currently %s (%s)." % (aggregate, stat), chan, nick)
else:
subs = unicode(int(len(re.findall("title=", urllib.urlopen("http://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Pending_AfC_submissions&cmlimit=500").read()))) - 2)
redir_data = urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Articles_for_creation/Redirects").read()
file_data = re.sub("<h2>Contents</h2>", "", urllib.urlopen("http://en.wikipedia.org/w/index.php?title=Wikipedia:Files_for_upload").read())
redirs = (string.count(redir_data, "<h2><span class=\"editsection\">")) - (string.count(redir_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">'))
files = (string.count(file_data, "<h2>") - 1) - (string.count(file_data, '<table class="navbox collapsible collapsed" style="text-align: left; border: 0px; margin-top: 0.2em;">'))
reply("There are currently %s pending submissions, %s open redirect requests, and %s open file upload requests." % (subs, redirs, files), chan, nick)
return
if command == "nick":
if authy == "owner":
try:
new_nick = line2[4]
except Exception:
reply("Please specify a nick to change to.", chan, nick)
return
s.send("NICK %s\r\n" % new_nick)
else:
reply("You aren't authorized to use that command.", chan, nick)
return
if command == "op" or command == "deop" or command == "voice" or command == "devoice":
if authy == "owner" or authy == "admin":
try:
user = line2[4]
except Exception:
user = nick
say("%s %s %s" % (command, chan, user), "ChanServ")
else:
reply("You aren't authorized to use that command.", chan, nick)
return
if command == "pend" or command == "pending":
say("Pending submissions status page: <http://en.wikipedia.org/wiki/WP:AFC/S>.", chan)
say("Pending submissions category: <http://en.wikipedia.org/wiki/Category:Pending_AfC_submissions>.", chan)
return
if command == "sub" or command == "submissions":
try:
number = int(line2[4])
except Exception:
reply("Please enter a number.", chan, nick)
return
do_url = False
try:
if "url" in line2[5:]: do_url = True
except Exception:
pass
url = "http://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Pending_AfC_submissions&cmlimit=500&cmsort=timestamp"
query = urllib.urlopen(url)
data = query.read()
pages = re.findall("title=&quot;(.*?)&quot;", data)
try:
pages.remove("Wikipedia:Articles for creation/Redirects")
except Exception:
pass
try:
pages.remove("Wikipedia:Files for upload")
except Exception:
pass
pages.reverse()
pages = pages[:number]
if not do_url:
s = string.join(pages, "]], [[")
s = "[[%s]]" % s
else:
s = string.join(pages, ">, <http://en.wikipedia.org/wiki/")
s = "<http://en.wikipedia.org/wiki/%s>" % s
s = re.sub(" ", "_", s)
s = re.sub(">,_<", ">, <", s)
report = "\x02First %s pending AfC submissions:\x0F %s" % (number, s)
say(report, chan)
return
if command == "praise" or command == "leonard" or command == "groovedog" or command == "earwig" or command == "macmed" or command == "cubs197" or command == "sparksboy" or command == "tim_song" or command == "tim" or command == "sausage" or command == "mindstormskid" or command == "mcjohn" or command == "fetchcomms" or command == "blurpeace":
bad = False
if command == "leonard":
special = "AfC redirect reviewer"
user = "Leonard^Bloom"
elif command == "groovedog":
special = "heh"
user = "GrooveDog"
elif command == "earwig":
special = "Python programmer"
user = "Earwig"
elif command == "macmed":
special = "CSD tagger"
user = "MacMed"
elif command == "mindstormskid":
special = "Lego fanatic"
user = "MindstormsKid"
elif command == "cubs197":
special = "IRC dude"
user = "Cubs197"
elif command == "sparksboy":
special = "pet owner"
user = "SparksBoy"
elif command == "tim_song" or command == "tim":
special = "JavaScript programmer"
user = "Tim_Song"
elif command == "sausage":
special = "helper"
user = "chzz"
elif command == "mcjohn":
special = "edit summary writer"
user = "McJohn"
elif command == "fetchcomms":
special = "n00b"
user = "Fetchcomms"
elif command == "blurpeace":
special = "Commons admin"
user = "Blurpeace"
else:
say("Only a true fool would use that command, %s." % nick, chan)
# say("The users who you can praise are: Leonard^Bloom, GrooveDog, Earwig, MacMed, Cubs197, SparksBoy, MindstormsKid, Chzz, McJohn, Tim_Song, Fetchcomms, and Blurpeace.", chan)
return
if not bad:
say("\x02%s\x0F is the bestest %s evah!" % (user, special), chan)
if bad:
say("\x02%s\x0F is worstest %s evah!" % (user, special), chan)
return
if command == "trout":
try:
user = line2[4]
user = ' '.join(line2[4:])
except Exception:
reply("Hahahahahahahaha...", chan, nick)
return
normal = unicodedata.normalize('NFKD', unicode(string.lower(user)))
if "itself" in normal:
reply("I'm not that stupid ;)", chan, nick)
return
elif "earwigbot" in normal:
reply("I'm not that stupid ;)", chan, nick)
elif "earwig" not in normal and "ear wig" not in normal:
text = 'slaps %s around a bit with a large trout.' % user
msg = '\x01ACTION %s\x01' % text
say(msg, chan)
else:
reply("I refuse to hurt anything with \"Earwig\" in its name :P", chan, nick)
return
if command == "kill" or command == "destroy" or command == "murder":
reply("Who do you think I am? The Mafia?", chan, nick)
return
if command == "fish":
try:
user = line2[4]
fish = ' '.join(line2[5:])
except Exception:
reply("Hahahahahahahaha...", chan, nick)
return
normal = unicodedata.normalize('NFKD', unicode(string.lower(user)))
if "itself" in normal:
reply("I'm not that stupid ;)", chan, nick)
return
elif "earwigbot" in normal:
reply("I'm not that stupid ;)", chan, nick)
elif "earwig" not in normal and "ear wig" not in normal:
text = 'slaps %s around a bit with a %s.' % (user, fish)
msg = '\x01ACTION %s\x01' % text
say(msg, chan)
else:
reply("I refuse to hurt anything with \"Earwig\" in its name :P", chan, nick)
return
if command == "report":
def find_status(name="", talk=False):
enname = re.sub(" ", "_", name)
if talk == True:
enname = "Wikipedia_talk:Articles_for_creation/%s" % enname
if talk == False:
enname = "Wikipedia:Articles_for_creation/%s" % enname
url = "http://en.wikipedia.org/w/api.php?action=query&titles=%s&prop=revisions&rvprop=content" % enname
query = urllib.urlopen(url)
data = query.read()
status = ""
if "{{AFC submission|D" in data or "{{AFC submission|d" in data:
reason = re.findall("(D|d)\|(.*?)\|", data)
if reason[0][1] != "reason":
status = "Declined, reason is '%s'" % reason[0][1]
if reason[0][1] == "reason":
status = "Declined, reason is a custom reason"
if "{{AFC submission|H" in data or "{{AFC submission|h" in data:
reason = re.findall("(H|h)\|(.*?)\|", data)
if reason[0][1] != "reason":
status = "Held, reason is '%s'" % reason[0][1]
if reason[0][1] == "reason":
status = "Held, reason is a custom reason"
if "{{AFC submission||" in data:
status = "Pending"
if "{{AFC submission|R" in data or "{{AFC submission|r" in data:
status = "Reviewing"
if not status:
exist = exists(name=enname)
if exist == True:
status = "Accepted"
if exist == False:
status = "Not found"
return status
def exists(name=""):
url = "http://en.wikipedia.org/wiki/%s" % name
query = urllib.urlopen(url)
data = query.read()
if "Wikipedia does not have a" in data:
return False
return True
def get_submitter(name="", talk=False):
enname = re.sub(" ", "_", name)
if talk == True:
enname = "Wikipedia_talk:Articles_for_creation/%s" % enname
if talk == False:
enname = "Wikipedia:Articles_for_creation/%s" % enname
url = "http://en.wikipedia.org/w/api.php?action=query&titles=%s&prop=revisions&rvprop=user&rvdir=newer&rvlimit=1" % enname
query = urllib.urlopen(url)
data = query.read()
extract = re.findall("user=&quot;(.*?)&quot;", data)
if "anon=" in data:
anon = True
else:
anon = False
try:
return extract[0], anon
except BaseException:
print extract
return "", anon
try:
rawSub = line2[4]
rawSub = ' '.join(line2[4:])
except Exception:
reply("You need to specify a submission name in order to use %s!" % command, chan, nick)
return
talk = False
if "[[" in rawSub and "]]" in rawSub:
name = re.sub("\[\[(.*)\]\]", "\\1", rawSub)
name = re.sub(" ", "_", name)
name = urllib.quote(name, ":/")
name = "http://en.wikipedia.org/wiki/%s" % name
if "talk:" in name:
talk = True
elif "http://" in rawSub:
name = rawSub
if "talk:" in name:
talk = True
elif "en.wikipedia.org" in rawSub:
name = "http://%s" % rawSub
if "talk:" in name:
talk = True
elif "Wikipedia:" in rawSub or "Wikipedia_talk:" in rawSub or "Wikipedia talk:" in rawSub:
name = re.sub(" ", "_", rawSub)
name = urllib.quote(name, ":/")
name = "http://en.wikipedia.org/wiki/%s" % name
if "talk:" in name:
talk = True
else:
url = "http://en.wikipedia.org/wiki/"
pagename = re.sub(" ", "_", rawSub)
pagename = urllib.quote(pagename, ":/")
pagename = "Wikipedia:Articles_for_creation/%s" % pagename
page = urllib.urlopen("%s%s" % (url, pagename))
text = page.read()
name = "http://en.wikipedia.org/wiki/%s" % pagename
if "Wikipedia does not have a" in text:
pagename = re.sub(" ", "_", rawSub)
pagename = urllib.quote(pagename, ":/")
pagename = "Wikipedia_talk:Articles_for_creation/%s" % pagename
page = urllib.urlopen("%s%s" % (url, pagename))
name = "http://en.wikipedia.org/wiki/%s" % pagename
talk = True
unname = re.sub("http://en.wikipedia.org/wiki/Wikipedia:Articles_for_creation/", "", name)
unname = re.sub("http://en.wikipedia.org/wiki/Wikipedia_talk:Articles_for_creation/", "", unname)
unname = re.sub("_", " ", unname)
if "talk" in unname:
talk = True
submitter, anon = get_submitter(name=unname, talk=talk)
status = find_status(name=unname, talk=talk)
if submitter != "":
if anon == True:
submitter_page = "Special:Contributions/%s" % submitter
if anon == False:
unsubmit = re.sub(" ", "_", submitter)
unsubmit = urllib.quote(unsubmit, ":/")
submitter_page = "User:%s" % unsubmit
if status == "Accepted":
submitterm = "Reviewer"
else:
submitterm = "Submitter"
line1 = "\x02AfC submission report for %s:" % unname
line2 = "\x02URL: \x0301\x0F%s" % name
if submitter != "":
line3 = "\x02%s: \x0F\x0302%s (\x0301\x0Fhttp://en.wikipedia.org/wiki/%s)." % (submitterm, submitter, submitter_page)
line4 = "\x02Status: \x0F\x0302%s." % status
say(line1, chan)
time.sleep(0.1)
say(line2, chan)
time.sleep(0.1)
if submitter != "":
say(line3, chan)
time.sleep(0.1)
say(line4, chan)
return
if command == "commands":
if chan.startswith("#"):
reply("Please use that command in a private message.", chan, nick)
return
others2 = get_commandList().values()
others = []
for com in others2:
if com == "copyvio" or com == "number" or com == "pending" or com == "report" or com == "submissions" or com == "access" or com == "help" or com == "join" or com == "linker" or com == "nick" or com == "op" or com == "part" or com == "quiet" or com == "quit" or com == "restart" or com == "voice" or com == "welcome" or com == "fish" or com == "praise" or com == "trout" or com == "notes":
continue
if com in others: continue
others.append(com)
others.sort()
say("\x02AFC commands:\x0F copyvio, number, pending, report, submissions.", chan)
time.sleep(0.1)
say("\x02Bot operation and channel maintaince commands:\x0F access, help, join, linker, nick, op, part, quiet, quit, restart, voice, welcome.", chan)
time.sleep(0.1)
say("\x02Fun commands:\x0F fish, praise, trout, and numerous easter eggs", chan)
time.sleep(0.1)
say("\x02Other commands:\x0F %s" % ', '.join(others), chan)
time.sleep(0.1)
say("The bot maintains a mini-wiki. Type \"!notes help\" for more information.", chan)
time.sleep(0.1)
say("See http://enwp.org/User:The_Earwig/Bots/IRC for details. For help on a specific command, type '!help command'.", chan)
return
if command == "help" or command == "doc" or command == "documentation":
try:
com = line2[4]
except Exception:
reply("Hi, I'm a bot that does work for Articles for Creation. You can find information about me at http://enwp.org/User:The_Earwig/Bots/IRC. Say \"!commands\" to me in a private message for some of my abilities. Earwig is my owner and creator, and you can contact him at http://enwp.org/User_talk:The_Earwig.", chan, nick)
return
say("Sorry, command documentation has not been implemented yet.", chan)
return
if command == "mysql":
if authy != "owner":
reply("You aren't authorized to use this command.", chan, nick)
return
import MySQLdb
try:
strings = line2[4]
strings = ' '.join(line2[4:])
if "db:" in strings:
database = re.findall("db\:(.*?)\s", strings)[0]
else:
database = "enwiki_p"
if "time:" in strings:
times = int(re.findall("time\:(.*?)\s", strings)[0])
else:
times = 60
file = re.findall("file\:(.*?)\s", strings)[0]
sqlquery = re.findall("query\:(.*?)\Z", strings)[0]
except Exception:
reply("You did not specify enough data for the bot to continue.", chan, nick)
return
database2 = database[:-2] + "-p"
db = MySQLdb.connect(db=database, host="%s.rrdb.toolserver.org" % database2, read_default_file="/home/earwig/.my.cnf")
db.query(sqlquery)
r = db.use_result()
data = r.fetch_row(0)
try:
f = codecs.open("/home/earwig/public_html/reports/%s/%s" % (database[:-2], file), 'r')
reply("A file already exists with that name.", chan, nick)
return
except Exception:
pass
f = codecs.open("/home/earwig/public_html/reports/%s/%s" % (database[:-2], file), 'a', 'utf-8')
for line in data:
new_line = []
for l in line:
new_line.append(str(l))
f.write(' '.join(new_line) + "\n")
f.close()
reply("Query completed successfully. See http://toolserver.org/~earwig/reports/%s/%s. I will delete the report in %s seconds." % (database[:-2], file, times), chan, nick)
time.sleep(times)
os.remove("/home/earwig/public_html/reports/%s/%s" % (database[:-2], file))
return
if command == "remind" or command == "reminder":
try:
times = int(line2[4])
content = ' '.join(line2[5:])
except Exception:
reply("Please specify a time and a note in the following format: !remind <time> <note>.", chan, nick)
return
reply("Set reminder for \"%s\" in %s seconds." % (content, times), chan, nick)
time.sleep(times)
reply(content, chan, nick)
return
if command == "notes" or command == "note" or command == "about" or command == "data" or command == "database":
try:
action = line2[4]
except BaseException:
reply("What do you want me to do? Type \"!notes help\" for more information.", chan, nick)
return
import MySQLdb
db = MySQLdb.connect(db="u_earwig_ircbot", host="sql", read_default_file="/home/earwig/.my.cnf")
specify = ' '.join(line2[5:])
if action == "help" or action == "manual":
shortCommandList = "read, write, change, undo, delete, move, author, category, list, report, developer"
if specify == "read":
say("To read an entry, type \"!notes read <entry>\".", chan)
elif specify == "write":
say("To write a new entry, type \"!notes write <entry> <content>\". This will create a new entry only if one does not exist, see the below command...", chan)
elif specify == "change":
say("To change an entry, type \"!notes change <entry> <new content>\". The old entry will be stored in the database, so it can be undone later.", chan)
elif specify == "undo":
say("To undo a change, type \"!notes undo <entry>\".", chan)
elif specify == "delete":
say("To delete an entry, type \"!notes delete <entry>\". For security reasons, only bot admins can do this.", chan)
elif specify == "move":
say("To move an entry, type \"!notes move <old_title> <new_title>\".", chan)
elif specify == "author":
say("To return the author of an entry, type \"!notes author <entry>\".", chan)
elif specify == "category" or specify == "cat":
say("To change an entry's category, type \"!notes category <entry> <category>\".", chan)
elif specify == "list":
say("To list all categories in the database, type \"!notes list\". Type \"!notes list <category>\" to get all entries in a certain category.", chan)
elif specify == "report":
say("To give some statistics about the mini-wiki, including some debugging information, type \"!notes report\" in a PM.", chan)
elif specify == "developer":
say("To do developer work, such as writing to the database directly, type \"!notes developer <command>\". This can only be done by the bot owner.", chan)
else:
db.query("SELECT * FROM version;")
r = db.use_result()
data = r.fetch_row(0)
version = data[0]
reply("The Earwig Mini-Wiki: running v%s." % version, chan, nick)
reply("The full list of commands, for reference, are: %s." % shortCommandList, chan, nick)
reply("For an explaination of a certain command, type \"!notes help <command>\".", chan, nick)
reply("You can also access the database from the Toolserver: http://toolserver.org/~earwig/cgi-bin/irc_database.py", chan, nick)
time.sleep(0.4)
return
elif action == "read":
specify = string.lower(specify)
if " " in specify: specify = string.split(specify, " ")[0]
if not specify or "\"" in specify:
reply("Please include the name of the entry you would like to read after the command, e.g. !notes read earwig", chan, nick)
return
try:
db.query("SELECT entry_content FROM entries WHERE entry_title = \"%s\";" % specify)
r = db.use_result()
data = r.fetch_row(0)
entry = data[0][0]
say("Entry \"\x02%s\x0F\": %s" % (specify, entry), chan)
except Exception:
reply("There is no entry titled \"\x02%s\x0F\"." % specify, chan, nick)
return
elif action == "delete" or action == "remove":
specify = string.lower(specify)
if " " in specify: specify = string.split(specify, " ")[0]
if not specify or "\"" in specify:
reply("Please include the name of the entry you would like to delete after the command, e.g. !notes delete earwig", chan, nick)
return
if authy == "owner" or authy == "admin":
try:
db.query("DELETE from entries where entry_title = \"%s\";" % specify)
r = db.use_result()
db.commit()
reply("The entry on \"\x02%s\x0F\" has been removed." % specify, chan, nick)
except Exception:
phenny.reply("Unable to remove the entry on \"\x02%s\x0F\", because it doesn't exist." % specify, chan, nick)
else:
reply("Only bot admins can remove entries.", chan, nick)
return
elif action == "developer":
if authy == "owner":
db.query(specify)
r = db.use_result()
try:
print r.fetch_row(0)
except Exception:
pass
db.commit()
reply("Done.", chan, nick)
else:
reply("Only the bot owner can modify the raw database.", chan, nick)
return
elif action == "write":
try:
write = line2[5]
content = ' '.join(line2[6:])
except Exception:
reply("Please include some content in your entry.", chan, nick)
return
db.query("SELECT * from entries WHERE entry_title = \"%s\";" % write)
r = db.use_result()
data = r.fetch_row(0)
if data:
reply("An entry on %s already exists; please use \"!notes change %s %s\"." % (write, write, content), chan, nick)
return
content2 = content.replace('"', '\\' + '"')
db.query("INSERT INTO entries (entry_title, entry_author, entry_category, entry_content, entry_content_old) VALUES (\"%s\", \"%s\", \"uncategorized\", \"%s\", NULL);" % (write, nick, content2))
db.commit()
reply("You have written an entry titled \"\x02%s\x0F\", with the following content: \"%s\"" % (write, content), chan, nick)
return
elif action == "change":
reply("NotImplementedError", chan, nick)
elif action == "undo":
reply("NotImplementedError", chan, nick)
elif action == "move":
reply("NotImplementedError", chan, nick)
elif action == "author":
try:
entry = line2[5]
except Exception:
reply("Please include the name of the entry you would like to get information for after the command, e.g. !notes author earwig", chan, nick)
return
db.query("SELECT entry_author from entries WHERE entry_title = \"%s\";" % entry)
r = db.use_result()
data = r.fetch_row(0)
if data:
say("The author of \"\x02%s\x0F\" is \x02%s\x0F." % (entry, data[0][0]), chan)
return
reply("There is no entry titled \"\x02%s\x0F\"." % entry, chan, nick)
return
elif action == "cat" or action == "category":
reply("NotImplementedError", chan, nick)
elif action == "list":
reply("NotImplementedError", chan, nick)
elif action == "report":
reply("NotImplementedError", chan, nick)
if command == "hash":
import hashlib
try:
hashVia = line2[4]
hashText = line2[5]
hashText = ' '.join(line2[5:])
except Exception:
reply("Please provide a string and method to hash by.", chan, nick)
return
try:
hashed = eval("hashlib.%s(\"%s\").hexdigest()" % (hashVia, hashText))
reply(hashed, chan, nick)
except Exception:
try:
hashing = hashlib.new(hashVia)
hashing.update(hashText)
hashed = hashing.hexdigest()
reply(hashed, chan, nick)
except Exception:
reply("Error.", chan, nick)
if command == "langcode" or command == "lang" or command == "language":
try:
lang = line2[4]
except Exception:
reply("Please specify an ISO code.", chan, nick)
return
data = urllib.urlopen("http://toolserver.org/~earwig/cgi-bin/swmt.py?action=iso").read()
data = string.split(data, "\n")
result = False
for datum in data:
if datum.startswith(lang):
result = re.findall(".*? (.*)", datum)[0]
break
if result:
reply(result, chan, nick)
return
reply("Not found.", chan, nick)
return
if command == "lookup" or command == "ip":
try:
hexIP = line2[4]
except Exception:
reply("Please specify a hex IP address.", chan, nick)
return
hexes = [hexIP[:2], hexIP[2:4], hexIP[4:6], hexIP[6:8]]
hashes = []
for hexHash in hexes:
newHex = int(hexHash, 16)
hashes.append(newHex)
normalizedIP = "%s.%s.%s.%s" % (hashes[0], hashes[1], hashes[2], hashes[3])
reply(normalizedIP, chan, nick)
return

+ 0
- 138
irc/commands/afc_status.py View File

@@ -1,138 +0,0 @@
# -*- coding: utf-8 -*-

# Report the status of AFC submissions, either as an automatic message on join or a request via !status.

import json
import re
import urllib

from config.watcher import *
from irc.base_command import BaseCommand

class AFCStatus(BaseCommand):
def get_hooks(self):
return ["join", "msg"]

def get_help(self, command):
return "Get the number of pending AfC submissions, open redirect requests, and open file upload requests."

def check(self, data):
if data.is_command and (data.command == "status" or
data.command == "count" or data.command == "num" or
data.command == "number" or data.command == "afc_status"):
return True
try:
if data.line[1] == "JOIN" and data.chan in AFC_CHANS:
return True
except IndexError:
pass
return False

def process(self, data):
if data.line[1] == "JOIN":
notice = self.get_join_notice()
self.connection.notice(data.nick, notice)
return

if data.args:
if data.args[0].startswith("sub") or data.args[0] == "s":
subs = self.count_submissions()
self.connection.reply(data, "there are currently %s pending AfC submissions." % subs)

elif data.args[0].startswith("redir") or data.args[0] == "r":
redirs = self.count_redirects()
self.connection.reply(data, "there are currently %s open redirect requests." % redirs)

elif data.args[0].startswith("file") or data.args[0] == "f":
files = self.count_redirects()
self.connection.reply(data, "there are currently %s open file upload requests." % files)

elif data.args[0].startswith("agg") or data.args[0] == "a":
try:
agg_num = int(data.args[1])
except IndexError:
agg_data = (self.count_submissions(), self.count_redirects(), self.count_files())
agg_num = self.get_aggregate_number(agg_data)
except ValueError:
self.connection.reply(data, "\x0303%s\x0301 isn't a number!" % data.args[1])
return
aggregate = self.get_aggregate(agg_num)
self.connection.reply(data, "aggregate is currently %s (AfC %s)." % (agg_num, aggregate))

elif data.args[0].startswith("join") or data.args[0] == "j":
notice = self.get_join_notice()
self.connection.reply(data, notice)

else:
self.connection.reply(data, "unknown argument: \x0303%s\x0301. Valid args are 'subs', 'redirs', 'files', 'agg', and 'join'." % data.args[0])

else:
subs = self.count_submissions()
redirs = self.count_redirects()
files = self.count_files()
self.connection.reply(data, "there are currently %s pending submissions, %s open redirect requests, and %s open file upload requests."
% (subs, redirs, files))

def get_join_notice(self):
subs = self.count_submissions()
redirs = self.count_redirects()
files = self.count_files()
agg_num = self.get_aggregate_number((subs, redirs, files))
aggregate = self.get_aggregate(agg_num)
return ("\x02Current status:\x0F Articles for Creation %s (\x0302AFC\x0301: \x0305%s\x0301; \x0302AFC/R\x0301: \x0305%s\x0301; \x0302FFU\x0301: \x0305%s\x0301)"
% (aggregate, subs, redirs, files))

def count_submissions(self):
params = {'action': 'query', 'list': 'categorymembers', 'cmlimit':'500', 'format': 'json'}
params['cmtitle'] = "Category:Pending_AfC_submissions"
data = urllib.urlencode(params)
raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read()
res = json.loads(raw)
subs = len(res['query']['categorymembers'])
subs -= 2 # remove [[Wikipedia:Articles for creation/Redirects]] and [[Wikipedia:Files for upload]], which aren't real submissions
return subs

def count_redirects(self):
content = self.get_page("Wikipedia:Articles_for_creation/Redirects")
total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE))
closed = content.lower().count("{{afc-c|b}}")
redirs = total - closed
return redirs

def count_files(self):
content = self.get_page("Wikipedia:Files_for_upload")
total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE))
closed = content.lower().count("{{ifu-c|b}}")
files = total - closed
return files

def get_page(self, pagename):
params = {'action': 'query', 'prop': 'revisions', 'rvprop':'content', 'rvlimit':'1', 'format': 'json'}
params['titles'] = pagename
data = urllib.urlencode(params)
raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read()
res = json.loads(raw)
pageid = res['query']['pages'].keys()[0]
content = res['query']['pages'][pageid]['revisions'][0]['*']
return content

def get_aggregate(self, num):
if num == 0:
agg = "is \x02\x0303clear\x0301\x0F"
elif num < 60:
agg = "is \x0303almost clear\x0301"
elif num < 125:
agg = "has a \x0312small backlog\x0301"
elif num < 175:
agg = "has an \x0307average backlog\x0301"
elif num < 250:
agg = "is \x0304backlogged\x0301"
elif num < 300:
agg = "is \x02\x0304heavily backlogged\x0301\x0F"
else:
agg = "is \x02\x1F\x0304severely backlogged\x0301\x0F"
return agg

def get_aggregate_number(self, (subs, redirs, files)):
num = (subs * 5) + (redirs * 2) + (files * 2)
return num

+ 0
- 71
irc/commands/calc.py View File

@@ -1,71 +0,0 @@
# -*- coding: utf-8 -*-

# A somewhat advanced calculator: http://futureboy.us/fsp/frink.fsp.

import re
import urllib

from irc.base_command import BaseCommand

class Calc(BaseCommand):
def get_hooks(self):
return ["msg"]

def get_help(self, command):
return "A somewhat advanced calculator: see http://futureboy.us/fsp/frink.fsp for details."

def check(self, data):
if data.is_command and data.command == "calc":
return True
return False

def process(self, data):
if not data.args:
self.connection.reply(data, "What do you want me to calculate?")
return

query = ' '.join(data.args)
query = self.cleanup(query)

url = "http://futureboy.us/fsp/frink.fsp?fromVal=%s" % urllib.quote(query)
result = urllib.urlopen(url).read()

r_result = re.compile(r'(?i)<A NAME=results>(.*?)</A>')
r_tag = re.compile(r'<\S+.*?>')

match = r_result.search(result)
if not match:
self.connection.reply(data, "Calculation error.")
return

result = match.group(1)
result = r_tag.sub("", result) # strip span.warning tags
result = result.replace("&gt;", ">")
result = result.replace("(undefined symbol)", "(?) ")
result = result.strip()

if not result:
result = '?'
elif " in " in query:
result += " " + query.split(" in ", 1)[1]

res = "%s = %s" % (query, result)
self.connection.reply(data, res)

def cleanup(self, query):
fixes = [
(' in ', ' -> '),
(' over ', ' / '),
(u'£', 'GBP '),
(u'€', 'EUR '),
('\$', 'USD '),
(r'\bKB\b', 'kilobytes'),
(r'\bMB\b', 'megabytes'),
(r'\bGB\b', 'kilobytes'),
('kbps', '(kilobits / second)'),
('mbps', '(megabits / second)')
]

for original, fix in fixes:
query = re.sub(original, fix, query)
return query.strip()

+ 0
- 31
irc/commands/chanops.py View File

@@ -1,31 +0,0 @@
# -*- coding: utf-8 -*-

# Voice/devoice/op/deop users in the channel.

from irc.base_command import BaseCommand
from config.irc import *

class ChanOps(BaseCommand):
def get_hooks(self):
return ["msg"]

def get_help(self, command):
action = command.capitalize()
return "%s users in the channel." % action

def check(self, data):
if data.is_command and data.command in ["voice", "devoice", "op", "deop"]:
return True
return False

def process(self, data):
if data.host not in ADMINS:
self.connection.reply(data, "you must be a bot admin to use this command.")
return

if not data.args: # if it is just !op/!devoice/whatever without arguments, assume they want to do this to themselves
target = data.nick
else:
target = data.args[0]

self.connection.say("ChanServ", "%s %s %s" % (data.command, data.chan, target))

+ 0
- 160
irc/commands/git.py View File

@@ -1,160 +0,0 @@
# -*- coding: utf-8 -*-

# Commands to interface with the bot's git repository; use '!git help' for sub-command list.

import shlex, subprocess, re

from config.irc import *
from irc.base_command import BaseCommand

class Git(BaseCommand):
def get_hooks(self):
return ["msg"]

def get_help(self, command):
return "Commands to interface with the bot's git repository; use '!git help' for sub-command list."

def check(self, data):
if data.is_command and data.command == "git":
return True
return False

def process(self, data):
self.data = data
if data.host not in OWNERS:
self.connection.reply(data, "you must be a bot owner to use this command.")
return

if not data.args:
self.connection.reply(data, "no arguments provided. Maybe you wanted '!git help'?")
return

if data.args[0] == "help":
self.do_help()

elif data.args[0] == "branch":
self.do_branch()

elif data.args[0] == "branches":
self.do_branches()

elif data.args[0] == "checkout":
self.do_checkout()

elif data.args[0] == "delete":
self.do_delete()

elif data.args[0] == "pull":
self.do_pull()

elif data.args[0] == "status":
self.do_status()

else: # they asked us to do something we don't know
self.connection.reply(data, "unknown argument: \x0303%s\x0301." % data.args[0])

def exec_shell(self, command):
"""execute a shell command and get the output"""
command = shlex.split(command)
result = subprocess.check_output(command, stderr=subprocess.STDOUT)
if result:
result = result[:-1] # strip newline
return result

def do_help(self):
"""display all commands"""
help_dict = {
"branch": "get current branch",
"branches": "get all branches",
"checkout": "switch branches",
"delete": "delete an old branch",
"pull": "update everything from the remote server",
"status": "check if we are up-to-date",
}
keys = help_dict.keys()
keys.sort()
help = ""
for key in keys:
help += "\x0303%s\x0301 (%s), " % (key, help_dict[key])
help = help[:-2] # trim last comma and space
self.connection.reply(self.data, "sub-commands are: %s." % help)

def do_branch(self):
"""get our current branch"""
branch = self.exec_shell("git name-rev --name-only HEAD")
self.connection.reply(self.data, "currently on branch \x0302%s\x0301." % branch)

def do_branches(self):
"""get list of branches"""
branches = self.exec_shell("git branch")
branches = branches.replace('\n* ', ', ') # cleanup extraneous characters
branches = branches.replace('* ', ' ')
branches = branches.replace('\n ', ', ')
branches = branches.strip()
self.connection.reply(self.data, "branches: \x0302%s\x0301." % branches)

def do_checkout(self):
"""switch branches"""
try:
branch = self.data.args[1]
except IndexError: # no branch name provided
self.connection.reply(self.data, "switch to which branch?")
return

try:
result = self.exec_shell("git checkout %s" % branch)
if "Already on" in result:
self.connection.reply(self.data, "already on \x0302%s\x0301!" % branch)
else:
current_branch = self.exec_shell("git name-rev --name-only HEAD")
self.connection.reply(self.data, "switched from branch \x0302%s\x0301 to \x0302%s\x0301." % (current_branch, branch))

except subprocess.CalledProcessError: # git couldn't switch branches
self.connection.reply(self.data, "branch \x0302%s\x0301 doesn't exist!" % branch)

def do_delete(self):
"""delete a branch, while making sure that we are not on it"""
try:
delete_branch = self.data.args[1]
except IndexError: # no branch name provided
self.connection.reply(self.data, "delete which branch?")
return

current_branch = self.exec_shell("git name-rev --name-only HEAD")

if current_branch == delete_branch:
self.connection.reply(self.data, "you're currently on this branch; please checkout to a different branch before deleting.")
return

try:
self.exec_shell("git branch -d %s" % delete_branch)
self.connection.reply(self.data, "branch \x0302%s\x0301 has been deleted locally." % delete_branch)
except subprocess.CalledProcessError: # git couldn't delete
self.connection.reply(self.data, "branch \x0302%s\x0301 doesn't exist!" % delete_branch)

def do_pull(self):
"""pull from remote repository"""
branch = self.exec_shell("git name-rev --name-only HEAD")
self.connection.reply(self.data, "pulling from remote (currently on \x0302%s\x0301)..." % branch)

result = self.exec_shell("git pull")

if "Already up-to-date." in result:
self.connection.reply(self.data, "done; no new changes.")
else:
changes = re.findall("\s*((.*?)\sfile(.*?)tions?\(-\))", result)[0][0] # find the changes
try:
remote = self.exec_shell("git config --get branch.%s.remote" % branch)
url = self.exec_shell("git config --get remote.%s.url" % remote)
self.connection.reply(self.data, "done; %s [from %s]." % (changes, url))
except subprocess.CalledProcessError: # something in .git/config is not specified correctly, so we cannot get the remote's url
self.connection.reply(self.data, "done; %s." % changes)

def do_status(self):
"""check whether we have anything to pull"""
last = self.exec_shell("git log -n 1 --pretty=\"%ar\"")
result = self.exec_shell("git fetch --dry-run")
if not result: # nothing was fetched, so remote and local are equal
self.connection.reply(self.data, "last commit was %s. Local copy is \x02up-to-date\x0F with remote." % last)
else:
self.connection.reply(self.data, "last local commit was %s. Remote is \x02ahead\x0F of local copy." % last)

+ 0
- 55
irc/commands/help.py View File

@@ -1,55 +0,0 @@
# -*- coding: utf-8 -*-

# Generates help information.

from irc.base_command import BaseCommand
from irc.data import Data
from irc import command_handler

class Help(BaseCommand):
def get_hooks(self):
return ["msg"]

def get_help(self, command):
return "Generates help information."

def check(self, data):
if data.is_command and data.command == "help":
return True
return False

def process(self, data):
if not data.args:
self.do_general_help(data)
else:
if data.args[0] == "list":
self.do_list_help(data)
else:
self.do_command_help(data)

def do_general_help(self, data):
self.connection.reply(data, "I am a bot! You can get help for any command with '!help <command>', or a list of all loaded modules with '!help list'.")

def do_list_help(self, data):
commands = command_handler.get_commands()
cmnds = map(lambda c: c.__class__.__name__, commands)
pretty_cmnds = ', '.join(cmnds)
self.connection.reply(data, "%s command classes loaded: %s." % (len(cmnds), pretty_cmnds))

def do_command_help(self, data):
command = data.args[0]
commands = command_handler.get_commands()

dummy = Data() # dummy message to test which command classes pick up this command
dummy.command = command.lower() # lowercase command name
dummy.is_command = True

for cmnd in commands:
if cmnd.check(dummy):
help = cmnd.get_help(command)
break

try:
self.connection.reply(data, "info for command \x0303%s\x0301: \"%s\"" % (command, help))
except UnboundLocalError:
self.connection.reply(data, "sorry, no help for \x0303%s\x0301." % command)

+ 0
- 65
irc/commands/link.py View File

@@ -1,65 +0,0 @@
# -*- coding: utf-8 -*-

# Convert a Wikipedia page name into a URL.

import re

from irc.base_command import BaseCommand

class Link(BaseCommand):
def get_hooks(self):
return ["msg"]

def get_help(self, command):
return "Convert a Wikipedia page name into a URL."

def check(self, data):
if ((data.is_command and data.command == "link") or
(("[[" in data.msg and "]]" in data.msg) or
("{{" in data.msg and "}}" in data.msg))):
return True
return False

def process(self, data):
msg = data.msg

if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", msg):
links = self.parse_line(msg)
links = " , ".join(links)
self.connection.reply(data, links)

elif data.command == "link":
if not data.args:
self.connection.reply(data, "what do you want me to link to?")
return
pagename = ' '.join(data.args)
link = self.parse_link(pagename)
self.connection.reply(data, link)

def parse_line(self, line):
results = list()

line = re.sub("\{\{\{(.*?)\}\}\}", "", line) # destroy {{{template parameters}}}

links = re.findall("(\[\[(.*?)(\||\]\]))", line) # find all [[links]]
if links:
links = map(lambda x: x[1], links) # re.findall() returns a list of tuples, but we only want the 2nd item in each tuple
results.extend(map(self.parse_link, links))

templates = re.findall("(\{\{(.*?)(\||\}\}))", line) # find all {{templates}}
if templates:
templates = map(lambda x: x[1], templates)
results.extend(map(self.parse_template, templates))

return results

def parse_link(self, pagename):
pagename = pagename.strip()
link = "http://en.wikipedia.org/wiki/" + pagename
link = link.replace(" ", "_")
return link

def parse_template(self, pagename):
pagename = "Template:%s" % pagename # TODO: implement an actual namespace check
link = self.parse_link(pagename)
return link

+ 0
- 124
irc/commands/tasks.py View File

@@ -1,124 +0,0 @@
# -*- coding: utf-8 -*-

# Manage wiki tasks from IRC, and check on thread status.

import threading, re

from irc.base_command import BaseCommand
from irc.data import *
from wiki import task_manager
from config.main import *
from config.irc import *

class Tasks(BaseCommand):
def get_hooks(self):
return ["msg"]

def get_help(self, command):
return "Manage wiki tasks from IRC, and check on thread status."

def check(self, data):
if data.is_command and data.command in ["tasks", "threads", "tasklist"]:
return True
return False

def process(self, data):
self.data = data
if data.host not in OWNERS:
self.connection.reply(data, "at this time, you must be a bot owner to use this command.")
return

if not data.args:
if data.command == "tasklist":
self.do_list()
else:
self.connection.reply(data, "no arguments provided. Maybe you wanted '!{cmnd} list', '!{cmnd} start', or '!{cmnd} listall'?".format(cmnd=data.command))
return
if data.args[0] == "list":
self.do_list()
elif data.args[0] == "start":
self.do_start()
elif data.args[0] in ["listall", "all"]:
self.do_listall()

else: # they asked us to do something we don't know
self.connection.reply(data, "unknown argument: \x0303{}\x0301.".format(data.args[0]))

def do_list(self):
threads = threading.enumerate()
normal_threads = []
task_threads = []
for thread in threads:
tname = thread.name
if tname == "MainThread":
tname = self.get_main_thread_name()
normal_threads.append("\x0302{}\x0301 (as main thread, id {})".format(tname, thread.ident))
elif tname in ["irc-frontend", "irc-watcher", "wiki-scheduler"]:
normal_threads.append("\x0302{}\x0301 (id {})".format(tname, thread.ident))
else:
tname, start_time = re.findall("^(.*?) \((.*?)\)$", tname)[0]
task_threads.append("\x0302{}\x0301 (id {}, since {})".format(tname, thread.ident, start_time))
if task_threads:
msg = "\x02{}\x0F threads active: {}, and \x02{}\x0F task threads: {}.".format(len(threads), ', '.join(normal_threads), len(task_threads), ', '.join(task_threads))
else:
msg = "\x02{}\x0F threads active: {}, and \x020\x0F task threads.".format(len(threads), ', '.join(normal_threads))
self.connection.reply(self.data, msg)
def do_listall(self):
tasks = task_manager.task_list.keys()
threads = threading.enumerate()
tasklist = []
tasks.sort()

for task in tasks:
threads_running_task = [t for t in threads if t.name.startswith(task)]
ids = map(lambda t: str(t.ident), threads_running_task)
if not ids:
tasklist.append("\x0302{}\x0301 (idle)".format(task))
elif len(ids) == 1:
tasklist.append("\x0302{}\x0301 (\x02active\x0F as id {})".format(task, ids[0]))
else:
tasklist.append("\x0302{}\x0301 (\x02active\x0F as ids {})".format(task, ', '.join(ids)))
tasklist = ", ".join(tasklist)
msg = "{} tasks loaded: {}.".format(len(tasks), tasklist)
self.connection.reply(self.data, msg)
def do_start(self):
data = self.data
try:
task_name = data.args[1]
except IndexError: # no task name given
self.connection.reply(data, "what task do you want me to start?")
return
try:
data.parse_kwargs()
except KwargParseException, arg:
self.connection.reply(data, "error parsing argument: \x0303{}\x0301.".format(arg))
return
if task_name not in task_manager.task_list.keys(): # this task does not exist or hasn't been loaded
self.connection.reply(data, "task could not be found; either wiki/tasks/{}.py doesn't exist, or it wasn't loaded correctly.".format(task_name))
return
task_manager.start_task(task_name, **data.kwargs)
self.connection.reply(data, "task \x0302{}\x0301 started.".format(task_name))

def get_main_thread_name(self):
"""Return the "proper" name of the MainThread; e.g. "irc-frontend" or "irc-watcher"."""
if enable_irc_frontend:
return "irc-frontend"
elif enable_wiki_schedule:
return "wiki-scheduler"
else:
return "irc-watcher"

+ 0
- 26
irc/commands/test.py View File

@@ -1,26 +0,0 @@
# -*- coding: utf-8 -*-

# A very simple command to test the bot.

import random

from irc.base_command import BaseCommand

class Test(BaseCommand):
def get_hooks(self):
return ["msg"]

def get_help(self, command):
return "Test the bot!"

def check(self, data):
if data.is_command and data.command == "test":
return True
return False

def process(self, data):
hey = random.randint(0, 1)
if hey:
self.connection.say(data.chan, "Hey \x02%s\x0F!" % data.nick)
else:
self.connection.say(data.chan, "'sup \x02%s\x0F?" % data.nick)

+ 0
- 75
irc/connection.py View File

@@ -1,75 +0,0 @@
# -*- coding: utf-8 -*-

# A class to interface with IRC.

import socket
import threading

class BrokenSocketException(Exception):
"""A socket has broken, because it is not sending data."""
pass

class Connection(object):
def __init__(self, host=None, port=None, nick=None, ident=None, realname=None):
"""a class to interface with IRC"""
self.host = host
self.port = port
self.nick = nick
self.ident = ident
self.realname = realname

def connect(self):
"""connect to IRC"""
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.sock.connect((self.host, self.port))
self.send("NICK %s" % self.nick)
self.send("USER %s %s * :%s" % (self.ident, self.host, self.realname))

def close(self):
"""close our connection with IRC"""
try:
self.sock.shutdown(socket.SHUT_RDWR) # shut down connection first
except socket.error:
pass # ignore if the socket is already down
self.sock.close()

def get(self, size=4096):
"""receive (get) data from the server"""
data = self.sock.recv(4096)
if not data: # socket giving us no data, so it is dead/broken
raise BrokenSocketException()
return data

def send(self, msg):
"""send data to the server"""
lock = threading.Lock()
lock.acquire() # ensure that we only send one message at a time (blocking)
try:
self.sock.sendall(msg + "\r\n")
print " %s" % msg
finally:
lock.release()

def say(self, target, msg):
"""send a message"""
self.send("PRIVMSG %s :%s" % (target, msg))

def reply(self, data, msg):
"""send a message as a reply"""
self.say(data.chan, "%s%s%s: %s" % (chr(2), data.nick, chr(0x0f), msg))

def action(self, target, msg):
"""send a message as an action"""
self.say(target,"%sACTION %s%s" % (chr(1), msg, chr(1)))

def notice(self, target, msg):
"""send a notice"""
self.send("NOTICE %s :%s" % (target, msg))

def join(self, chan):
"""join a channel"""
self.send("JOIN %s" % chan)

def mode(self, chan, level, msg):
"""send a mode message"""
self.send("MODE %s %s %s" % (chan, level, msg))

+ 0
- 55
irc/data.py View File

@@ -1,55 +0,0 @@
# -*- coding: utf-8 -*-

# A class to store data from an individual line received on IRC.

import re

class KwargParseException(Exception):
"""Couldn't parse a certain keyword argument in self.args, probably because
it was given incorrectly: e.g., no value (abc), just a value (=xyz), just
an equal sign (=), instead of the correct (abc=xyz)."""
pass

class Data(object):
def __init__(self):
"""store data from an individual line received on IRC"""
self.line = str()
self.chan = str()
self.nick = str()
self.ident = str()
self.host = str()
self.msg = str()

def parse_args(self):
"""parse command arguments from self.msg into self.command and self.args"""
args = self.msg.strip().split(' ') # strip out extra whitespace and split the message into a list
while '' in args: # remove any empty arguments
args.remove('')

self.args = args[1:] # the command arguments
self.is_command = False # whether this is a real command or not

try:
self.command = args[0] # the command itself
except IndexError:
self.command = None

try:
if self.command.startswith('!') or self.command.startswith('.'):
self.is_command = True
self.command = self.command[1:] # strip '!' or '.'
self.command = self.command.lower() # lowercase command name
except AttributeError:
pass

def parse_kwargs(self):
"""parse command arguments from self.args, given as !command key1=value1 key2=value2..., into a dict self.kwargs: {'key1': 'value2', 'key2': 'value2'...}"""
self.kwargs = {}
for arg in self.args[2:]:
try:
key, value = re.findall("^(.*?)\=(.*?)$", arg)[0]
except IndexError:
raise KwargParseException(arg)
if not key or not value:
raise KwargParseException(arg)
self.kwargs[key] = value

+ 0
- 75
irc/frontend.py View File

@@ -1,75 +0,0 @@
# -*- coding: utf-8 -*-

## Imports
import re, time

from config.irc import *
from config.secure import *

from irc import command_handler
from irc.connection import *
from irc.data import Data

connection = None

def get_connection():
connection = Connection(HOST, PORT, NICK, IDENT, REALNAME)
return connection

def startup(conn):
global connection
connection = conn
command_handler.load_commands(connection)
connection.connect()

def main():
read_buffer = str()

while 1:
try:
read_buffer = read_buffer + connection.get()
except BrokenSocketException:
print "Socket has broken on front-end; restarting bot..."
return

lines = read_buffer.split("\n")
read_buffer = lines.pop()

for line in lines:
line = line.strip().split()
data = Data()
data.line = line

if line[1] == "JOIN":
data.nick, data.ident, data.host = re.findall(":(.*?)!(.*?)@(.*?)\Z", line[0])[0]
data.chan = line[2][1:]

command_handler.check("join", data) # check if there's anything we can respond to, and if so, respond

if line[1] == "PRIVMSG":
data.nick, data.ident, data.host = re.findall(":(.*?)!(.*?)@(.*?)\Z", line[0])[0]
data.msg = ' '.join(line[3:])[1:]
data.chan = line[2]

if data.chan == NICK: # this is a privmsg to us, so set 'chan' as the nick of the sender
data.chan = data.nick
command_handler.check("msg_private", data) # only respond if it's a private message
else:
command_handler.check("msg_public", data) # only respond if it's a public (channel) message

command_handler.check("msg", data) # check for general messages

if data.msg.startswith("!restart"): # hardcode the !restart command (we can't restart from within an ordinary command)
if data.host in OWNERS:
print "Restarting bot per owner request..."
return

if line[0] == "PING": # If we are pinged, pong back to the server
connection.send("PONG %s" % line[1])

if line[1] == "376":
if NS_AUTH: # if we're supposed to auth to nickserv, do that
connection.say("NickServ", "IDENTIFY %s %s" % (NS_USER, NS_PASS))
time.sleep(3) # sleep for a bit so we don't join channels un-authed
for chan in CHANS: # join all of our startup channels
connection.join(chan)

+ 0
- 57
irc/rc.py View File

@@ -1,57 +0,0 @@
# -*- coding: utf-8 -*-

# A class to store data on an individual event received from our IRC watcher.

import re

class RC(object):
def __init__(self, msg):
"""store data on an individual event received from our IRC watcher"""
self.msg = msg

def parse(self):
"""parse recent changes log into some variables"""
msg = self.msg
msg = re.sub("\x03([0-9]{1,2}(,[0-9]{1,2})?)?", "", msg) # strip IRC color codes; we don't want/need 'em
msg = msg.strip()
self.msg = msg
self.is_edit = True

# flags: 'M' for minor edit, 'B' for bot edit, 'create' for a user creation log entry...
try:
page, flags, url, user, comment = re.findall("\A\[\[(.*?)\]\]\s(.*?)\s(http://.*?)\s\*\s(.*?)\s\*\s(.*?)\Z", msg)[0]
except IndexError: # we're probably missing the http:// part, because it's a log entry, which lacks a url
page, flags, user, comment = re.findall("\A\[\[(.*?)\]\]\s(.*?)\s\*\s(.*?)\s\*\s(.*?)\Z", msg)[0]
url = "http://en.wikipedia.org/wiki/{}".format(page)
flags = flags.strip() # flag tends to have a extraneous whitespace character at the end when it's a log entry
self.is_edit = False # this is a log entry, not edit
self.page, self.flags, self.url, self.user, self.comment = page, flags, url, user, comment

def get_pretty(self):
"""make a nice, colorful message from self.msg to send to the front-end"""
flags = self.flags
event_type = flags # "New <event>:" if we don't know exactly what happened
if "N" in flags:
event_type = "page" # "New page:"
elif flags == "delete":
event_type = "deletion" # "New deletion:"
elif flags == "protect":
event_type = "protection" # "New protection:"
elif flags == "create":
event_type = "user" # "New user:"
if self.page == "Special:Log/move":
event_type = "move" # New move:
else:
event_type = "edit" # "New edit:"
if "B" in flags:
event_type = "bot {}".format(event_type) # "New bot edit:"
if "M" in flags:
event_type = "minor {}".format(event_type) # "New minor edit:" OR "New minor bot edit:"
if self.is_edit:
pretty = "\x02New {0}\x0F: \x0314[[\x0307{1}\x0314]]\x0306 *\x0303 {2}\x0306 *\x0302 {3}\x0306 *\x0310 {4}".format(event_type, self.page, self.user, self.url, self.comment)
else:
pretty = "\x02New {0}\x0F: \x0303{1}\x0306 *\x0302 {2}\x0306 *\x0310 {3}".format(event_type, self.user, self.url, self.comment)
return pretty

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save