@@ -19,3 +19,12 @@ made over 45,000 edits. | |||
A project to rewrite it from scratch began in early April 2011, thus moving | |||
away from the Pywikipedia framework and allowing for less overall code, better | |||
integration between bot parts, and easier maintenance. | |||
# Installation | |||
## Dependencies | |||
EarwigBot uses the MySQL library | |||
[oursql](http://packages.python.org/oursql/) (>= 0.9.2) for communicating with | |||
MediaWiki databases, and some tasks use their own tables for storage. It is not | |||
required. |
@@ -64,3 +64,6 @@ class SpamDetectedError(EditError): | |||
class FilteredError(EditError): | |||
"""The edit filter refused our edit.""" | |||
class SQLError(WikiToolsetError): | |||
"""Some error involving SQL querying occurred.""" |
@@ -32,7 +32,7 @@ def _load_config(): | |||
earwigbot.py or core/main.py will already call these functions. | |||
""" | |||
is_encrypted = config.load() | |||
if is_encrypted: # passwords in the config file are encrypted | |||
if is_encrypted: # Passwords in the config file are encrypted | |||
key = getpass("Enter key to unencrypt bot passwords: ") | |||
config.decrypt(key) | |||
@@ -60,12 +60,11 @@ def _get_cookiejar(): | |||
try: | |||
_cookiejar.load() | |||
except LoadError: | |||
# file contains bad data, so ignore it completely | |||
pass | |||
pass # File contains bad data, so ignore it completely | |||
except IOError as e: | |||
if e.errno == errno.ENOENT: # "No such file or directory" | |||
# create the file and restrict reading/writing only to the owner, | |||
# so others can't peak at our cookies | |||
# Create the file and restrict reading/writing only to the owner, | |||
# so others can't peak at our cookies: | |||
open(cookie_file, "w").close() | |||
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) | |||
else: | |||
@@ -82,7 +81,7 @@ def _get_site_object_from_dict(name, d): | |||
base_url = d.get("baseURL") | |||
article_path = d.get("articlePath") | |||
script_path = d.get("scriptPath") | |||
sql = (d.get("sqlServer"), d.get("sqlDB")) | |||
sql = d.get("sql", {}) | |||
namespaces = d.get("namespaces", {}) | |||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||
cookiejar = _get_cookiejar() | |||
@@ -129,18 +128,18 @@ def get_site(name=None, project=None, lang=None): | |||
then `project` and `lang`. If, with any number of args, a site cannot be | |||
found in the config, SiteNotFoundError is raised. | |||
""" | |||
# check if config has been loaded, and load it if it hasn't | |||
# Check if config has been loaded, and load it if it hasn't: | |||
if not config.is_loaded(): | |||
_load_config() | |||
# someone specified a project without a lang (or a lang without a project)! | |||
# Someone specified a project without a lang (or a lang without a project)! | |||
if (project is None and lang is not None) or (project is not None and | |||
lang is None): | |||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||
raise TypeError(e) | |||
# no args given, so return our default site (project is None implies lang | |||
# is None, so we don't need to add that in) | |||
# No args given, so return our default site (project is None implies lang | |||
# is None, so we don't need to add that in): | |||
if name is None and project is None: | |||
try: | |||
default = config.wiki["defaultSite"] | |||
@@ -154,12 +153,12 @@ def get_site(name=None, project=None, lang=None): | |||
raise SiteNotFoundError(e) | |||
return _get_site_object_from_dict(default, site) | |||
# name arg given, but don't look at others unless `name` isn't found | |||
# Name arg given, but don't look at others unless `name` isn't found: | |||
if name is not None: | |||
try: | |||
site = config.wiki["sites"][name] | |||
except KeyError: | |||
if project is None: # implies lang is None, so only name was given | |||
if project is None: # Implies lang is None, so only name was given | |||
e = "Site '{0}' not found in config.".format(name) | |||
raise SiteNotFoundError(e) | |||
for sitename, site in config.wiki["sites"].items(): | |||
@@ -171,7 +170,7 @@ def get_site(name=None, project=None, lang=None): | |||
else: | |||
return _get_site_object_from_dict(name, site) | |||
# if we end up here, then project and lang are both not None | |||
# If we end up here, then project and lang are both not None: | |||
for sitename, site in config.wiki["sites"].items(): | |||
if site["project"] == project and site["lang"] == lang: | |||
return _get_site_object_from_dict(sitename, site) | |||
@@ -10,6 +10,11 @@ from urllib import unquote_plus, urlencode | |||
from urllib2 import build_opener, HTTPCookieProcessor, URLError | |||
from urlparse import urlparse | |||
try: | |||
from oursql import connect | |||
except ImportError: | |||
connect = None | |||
from wiki.category import Category | |||
from wiki.constants import * | |||
from wiki.exceptions import * | |||
@@ -40,7 +45,7 @@ class Site(object): | |||
""" | |||
def __init__(self, name=None, project=None, lang=None, base_url=None, | |||
article_path=None, script_path=None, sql=(None, None), | |||
article_path=None, script_path=None, sql=None, | |||
namespaces=None, login=(None, None), cookiejar=None, | |||
user_agent=None, assert_edit=None, maxlag=None): | |||
"""Constructor for new Site instances. | |||
@@ -67,14 +72,17 @@ class Site(object): | |||
self._base_url = base_url | |||
self._article_path = article_path | |||
self._script_path = script_path | |||
self._sql = sql | |||
self._namespaces = namespaces | |||
# Attributes used when querying the API: | |||
# Attributes used for API queries: | |||
self._assert_edit = assert_edit | |||
self._maxlag = maxlag | |||
self._max_retries = 5 | |||
# Attributes used for SQL queries: | |||
self._sql_data = sql | |||
self._sql_conn = None | |||
# Set up cookiejar and URL opener for making API queries: | |||
if cookiejar is not None: | |||
self._cookiejar = cookiejar | |||
@@ -416,6 +424,24 @@ class Site(object): | |||
self._cookiejar.clear() | |||
self._save_cookiejar() | |||
def _sql_connect(self, **kwargs): | |||
"""Attempt to establish a connection with this site's SQL database. | |||
Will raise SQLError() if the module "oursql" is not available. | |||
""" | |||
if not connect: | |||
e = "Module 'oursql' is required for SQL queries." | |||
raise SQLError(e) | |||
args = self._sql_data | |||
for key, value in kwargs.iteritems(): | |||
args[key] = value | |||
if "read_default_file" not in args and "user" not in args and "passwd" not in args: | |||
args["read_default_file"] = "~/.my.cnf" | |||
self._sql_conn = connect(**args) | |||
def api_query(self, **kwargs): | |||
"""Do an API query with `kwargs` as the parameters. | |||
@@ -423,6 +449,29 @@ class Site(object): | |||
""" | |||
return self._api_query(kwargs) | |||
def sql_query(self, query, params=(), plain_query=False, cursor_class=None, | |||
show_table=False): | |||
"""Do an SQL query and yield its results. | |||
For example: | |||
>>> query = "SELECT user_name, user_registration FROM user WHERE user_name IN (?, ?)" | |||
>>> for row in site.sql_query(query, ("EarwigBot", "The Earwig")): | |||
... print row | |||
('EarwigBot', '20090428220032') | |||
('The Earwig', '20080703215134') | |||
May raise SQLError() or one of oursql's exceptions | |||
(oursql.ProgrammingError, oursql.InterfaceError, ...) if there were | |||
problems with the query. | |||
""" | |||
if not self._sql_conn: | |||
self._sql_connect() | |||
with self._sql_conn.cursor(cursor_class, show_table=show_table) as cur: | |||
cur.execute(query, params, plain_query) | |||
for result in cur: | |||
yield result | |||
def name(self): | |||
"""Returns the Site's name (or "wikiid" in the API), like "enwiki".""" | |||
return self._name | |||