@@ -19,3 +19,12 @@ made over 45,000 edits. | |||||
A project to rewrite it from scratch began in early April 2011, thus moving | A project to rewrite it from scratch began in early April 2011, thus moving | ||||
away from the Pywikipedia framework and allowing for less overall code, better | away from the Pywikipedia framework and allowing for less overall code, better | ||||
integration between bot parts, and easier maintenance. | integration between bot parts, and easier maintenance. | ||||
# Installation | |||||
## Dependencies | |||||
EarwigBot uses the MySQL library | |||||
[oursql](http://packages.python.org/oursql/) (>= 0.9.2) for communicating with | |||||
MediaWiki databases, and some tasks use their own tables for storage. It is not | |||||
required. |
@@ -64,3 +64,6 @@ class SpamDetectedError(EditError): | |||||
class FilteredError(EditError): | class FilteredError(EditError): | ||||
"""The edit filter refused our edit.""" | """The edit filter refused our edit.""" | ||||
class SQLError(WikiToolsetError): | |||||
"""Some error involving SQL querying occurred.""" |
@@ -32,7 +32,7 @@ def _load_config(): | |||||
earwigbot.py or core/main.py will already call these functions. | earwigbot.py or core/main.py will already call these functions. | ||||
""" | """ | ||||
is_encrypted = config.load() | is_encrypted = config.load() | ||||
if is_encrypted: # passwords in the config file are encrypted | |||||
if is_encrypted: # Passwords in the config file are encrypted | |||||
key = getpass("Enter key to unencrypt bot passwords: ") | key = getpass("Enter key to unencrypt bot passwords: ") | ||||
config.decrypt(key) | config.decrypt(key) | ||||
@@ -60,12 +60,11 @@ def _get_cookiejar(): | |||||
try: | try: | ||||
_cookiejar.load() | _cookiejar.load() | ||||
except LoadError: | except LoadError: | ||||
# file contains bad data, so ignore it completely | |||||
pass | |||||
pass # File contains bad data, so ignore it completely | |||||
except IOError as e: | except IOError as e: | ||||
if e.errno == errno.ENOENT: # "No such file or directory" | if e.errno == errno.ENOENT: # "No such file or directory" | ||||
# create the file and restrict reading/writing only to the owner, | |||||
# so others can't peak at our cookies | |||||
# Create the file and restrict reading/writing only to the owner, | |||||
# so others can't peak at our cookies: | |||||
open(cookie_file, "w").close() | open(cookie_file, "w").close() | ||||
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) | chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) | ||||
else: | else: | ||||
@@ -82,7 +81,7 @@ def _get_site_object_from_dict(name, d): | |||||
base_url = d.get("baseURL") | base_url = d.get("baseURL") | ||||
article_path = d.get("articlePath") | article_path = d.get("articlePath") | ||||
script_path = d.get("scriptPath") | script_path = d.get("scriptPath") | ||||
sql = (d.get("sqlServer"), d.get("sqlDB")) | |||||
sql = d.get("sql", {}) | |||||
namespaces = d.get("namespaces", {}) | namespaces = d.get("namespaces", {}) | ||||
login = (config.wiki.get("username"), config.wiki.get("password")) | login = (config.wiki.get("username"), config.wiki.get("password")) | ||||
cookiejar = _get_cookiejar() | cookiejar = _get_cookiejar() | ||||
@@ -129,18 +128,18 @@ def get_site(name=None, project=None, lang=None): | |||||
then `project` and `lang`. If, with any number of args, a site cannot be | then `project` and `lang`. If, with any number of args, a site cannot be | ||||
found in the config, SiteNotFoundError is raised. | found in the config, SiteNotFoundError is raised. | ||||
""" | """ | ||||
# check if config has been loaded, and load it if it hasn't | |||||
# Check if config has been loaded, and load it if it hasn't: | |||||
if not config.is_loaded(): | if not config.is_loaded(): | ||||
_load_config() | _load_config() | ||||
# someone specified a project without a lang (or a lang without a project)! | |||||
# Someone specified a project without a lang (or a lang without a project)! | |||||
if (project is None and lang is not None) or (project is not None and | if (project is None and lang is not None) or (project is not None and | ||||
lang is None): | lang is None): | ||||
e = "Keyword arguments 'lang' and 'project' must be specified together." | e = "Keyword arguments 'lang' and 'project' must be specified together." | ||||
raise TypeError(e) | raise TypeError(e) | ||||
# no args given, so return our default site (project is None implies lang | |||||
# is None, so we don't need to add that in) | |||||
# No args given, so return our default site (project is None implies lang | |||||
# is None, so we don't need to add that in): | |||||
if name is None and project is None: | if name is None and project is None: | ||||
try: | try: | ||||
default = config.wiki["defaultSite"] | default = config.wiki["defaultSite"] | ||||
@@ -154,12 +153,12 @@ def get_site(name=None, project=None, lang=None): | |||||
raise SiteNotFoundError(e) | raise SiteNotFoundError(e) | ||||
return _get_site_object_from_dict(default, site) | return _get_site_object_from_dict(default, site) | ||||
# name arg given, but don't look at others unless `name` isn't found | |||||
# Name arg given, but don't look at others unless `name` isn't found: | |||||
if name is not None: | if name is not None: | ||||
try: | try: | ||||
site = config.wiki["sites"][name] | site = config.wiki["sites"][name] | ||||
except KeyError: | except KeyError: | ||||
if project is None: # implies lang is None, so only name was given | |||||
if project is None: # Implies lang is None, so only name was given | |||||
e = "Site '{0}' not found in config.".format(name) | e = "Site '{0}' not found in config.".format(name) | ||||
raise SiteNotFoundError(e) | raise SiteNotFoundError(e) | ||||
for sitename, site in config.wiki["sites"].items(): | for sitename, site in config.wiki["sites"].items(): | ||||
@@ -171,7 +170,7 @@ def get_site(name=None, project=None, lang=None): | |||||
else: | else: | ||||
return _get_site_object_from_dict(name, site) | return _get_site_object_from_dict(name, site) | ||||
# if we end up here, then project and lang are both not None | |||||
# If we end up here, then project and lang are both not None: | |||||
for sitename, site in config.wiki["sites"].items(): | for sitename, site in config.wiki["sites"].items(): | ||||
if site["project"] == project and site["lang"] == lang: | if site["project"] == project and site["lang"] == lang: | ||||
return _get_site_object_from_dict(sitename, site) | return _get_site_object_from_dict(sitename, site) | ||||
@@ -10,6 +10,11 @@ from urllib import unquote_plus, urlencode | |||||
from urllib2 import build_opener, HTTPCookieProcessor, URLError | from urllib2 import build_opener, HTTPCookieProcessor, URLError | ||||
from urlparse import urlparse | from urlparse import urlparse | ||||
try: | |||||
from oursql import connect | |||||
except ImportError: | |||||
connect = None | |||||
from wiki.category import Category | from wiki.category import Category | ||||
from wiki.constants import * | from wiki.constants import * | ||||
from wiki.exceptions import * | from wiki.exceptions import * | ||||
@@ -40,7 +45,7 @@ class Site(object): | |||||
""" | """ | ||||
def __init__(self, name=None, project=None, lang=None, base_url=None, | def __init__(self, name=None, project=None, lang=None, base_url=None, | ||||
article_path=None, script_path=None, sql=(None, None), | |||||
article_path=None, script_path=None, sql=None, | |||||
namespaces=None, login=(None, None), cookiejar=None, | namespaces=None, login=(None, None), cookiejar=None, | ||||
user_agent=None, assert_edit=None, maxlag=None): | user_agent=None, assert_edit=None, maxlag=None): | ||||
"""Constructor for new Site instances. | """Constructor for new Site instances. | ||||
@@ -67,14 +72,17 @@ class Site(object): | |||||
self._base_url = base_url | self._base_url = base_url | ||||
self._article_path = article_path | self._article_path = article_path | ||||
self._script_path = script_path | self._script_path = script_path | ||||
self._sql = sql | |||||
self._namespaces = namespaces | self._namespaces = namespaces | ||||
# Attributes used when querying the API: | |||||
# Attributes used for API queries: | |||||
self._assert_edit = assert_edit | self._assert_edit = assert_edit | ||||
self._maxlag = maxlag | self._maxlag = maxlag | ||||
self._max_retries = 5 | self._max_retries = 5 | ||||
# Attributes used for SQL queries: | |||||
self._sql_data = sql | |||||
self._sql_conn = None | |||||
# Set up cookiejar and URL opener for making API queries: | # Set up cookiejar and URL opener for making API queries: | ||||
if cookiejar is not None: | if cookiejar is not None: | ||||
self._cookiejar = cookiejar | self._cookiejar = cookiejar | ||||
@@ -416,6 +424,24 @@ class Site(object): | |||||
self._cookiejar.clear() | self._cookiejar.clear() | ||||
self._save_cookiejar() | self._save_cookiejar() | ||||
def _sql_connect(self, **kwargs): | |||||
"""Attempt to establish a connection with this site's SQL database. | |||||
Will raise SQLError() if the module "oursql" is not available. | |||||
""" | |||||
if not connect: | |||||
e = "Module 'oursql' is required for SQL queries." | |||||
raise SQLError(e) | |||||
args = self._sql_data | |||||
for key, value in kwargs.iteritems(): | |||||
args[key] = value | |||||
if "read_default_file" not in args and "user" not in args and "passwd" not in args: | |||||
args["read_default_file"] = "~/.my.cnf" | |||||
self._sql_conn = connect(**args) | |||||
def api_query(self, **kwargs): | def api_query(self, **kwargs): | ||||
"""Do an API query with `kwargs` as the parameters. | """Do an API query with `kwargs` as the parameters. | ||||
@@ -423,6 +449,29 @@ class Site(object): | |||||
""" | """ | ||||
return self._api_query(kwargs) | return self._api_query(kwargs) | ||||
def sql_query(self, query, params=(), plain_query=False, cursor_class=None, | |||||
show_table=False): | |||||
"""Do an SQL query and yield its results. | |||||
For example: | |||||
>>> query = "SELECT user_name, user_registration FROM user WHERE user_name IN (?, ?)" | |||||
>>> for row in site.sql_query(query, ("EarwigBot", "The Earwig")): | |||||
... print row | |||||
('EarwigBot', '20090428220032') | |||||
('The Earwig', '20080703215134') | |||||
May raise SQLError() or one of oursql's exceptions | |||||
(oursql.ProgrammingError, oursql.InterfaceError, ...) if there were | |||||
problems with the query. | |||||
""" | |||||
if not self._sql_conn: | |||||
self._sql_connect() | |||||
with self._sql_conn.cursor(cursor_class, show_table=show_table) as cur: | |||||
cur.execute(query, params, plain_query) | |||||
for result in cur: | |||||
yield result | |||||
def name(self): | def name(self): | ||||
"""Returns the Site's name (or "wikiid" in the API), like "enwiki".""" | """Returns the Site's name (or "wikiid" in the API), like "enwiki".""" | ||||
return self._name | return self._name | ||||