diff --git a/README.md b/README.md index af1cbe3..9d01bdf 100644 --- a/README.md +++ b/README.md @@ -19,3 +19,12 @@ made over 45,000 edits. A project to rewrite it from scratch began in early April 2011, thus moving away from the Pywikipedia framework and allowing for less overall code, better integration between bot parts, and easier maintenance. + +# Installation + +## Dependencies + +EarwigBot uses the MySQL library +[oursql](http://packages.python.org/oursql/) (>= 0.9.2) for communicating with +MediaWiki databases, and some tasks use their own tables for storage. It is not +required. diff --git a/bot/wiki/exceptions.py b/bot/wiki/exceptions.py index 5a87fda..3e964cf 100644 --- a/bot/wiki/exceptions.py +++ b/bot/wiki/exceptions.py @@ -64,3 +64,6 @@ class SpamDetectedError(EditError): class FilteredError(EditError): """The edit filter refused our edit.""" + +class SQLError(WikiToolsetError): + """Some error involving SQL querying occurred.""" diff --git a/bot/wiki/functions.py b/bot/wiki/functions.py index 7562a4e..eae18a8 100644 --- a/bot/wiki/functions.py +++ b/bot/wiki/functions.py @@ -32,7 +32,7 @@ def _load_config(): earwigbot.py or core/main.py will already call these functions. """ is_encrypted = config.load() - if is_encrypted: # passwords in the config file are encrypted + if is_encrypted: # Passwords in the config file are encrypted key = getpass("Enter key to unencrypt bot passwords: ") config.decrypt(key) @@ -60,12 +60,11 @@ def _get_cookiejar(): try: _cookiejar.load() except LoadError: - # file contains bad data, so ignore it completely - pass + pass # File contains bad data, so ignore it completely except IOError as e: if e.errno == errno.ENOENT: # "No such file or directory" - # create the file and restrict reading/writing only to the owner, - # so others can't peak at our cookies + # Create the file and restrict reading/writing only to the owner, + # so others can't peak at our cookies: open(cookie_file, "w").close() chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) else: @@ -82,7 +81,7 @@ def _get_site_object_from_dict(name, d): base_url = d.get("baseURL") article_path = d.get("articlePath") script_path = d.get("scriptPath") - sql = (d.get("sqlServer"), d.get("sqlDB")) + sql = d.get("sql", {}) namespaces = d.get("namespaces", {}) login = (config.wiki.get("username"), config.wiki.get("password")) cookiejar = _get_cookiejar() @@ -129,18 +128,18 @@ def get_site(name=None, project=None, lang=None): then `project` and `lang`. If, with any number of args, a site cannot be found in the config, SiteNotFoundError is raised. """ - # check if config has been loaded, and load it if it hasn't + # Check if config has been loaded, and load it if it hasn't: if not config.is_loaded(): _load_config() - # someone specified a project without a lang (or a lang without a project)! + # Someone specified a project without a lang (or a lang without a project)! if (project is None and lang is not None) or (project is not None and lang is None): e = "Keyword arguments 'lang' and 'project' must be specified together." raise TypeError(e) - # no args given, so return our default site (project is None implies lang - # is None, so we don't need to add that in) + # No args given, so return our default site (project is None implies lang + # is None, so we don't need to add that in): if name is None and project is None: try: default = config.wiki["defaultSite"] @@ -154,12 +153,12 @@ def get_site(name=None, project=None, lang=None): raise SiteNotFoundError(e) return _get_site_object_from_dict(default, site) - # name arg given, but don't look at others unless `name` isn't found + # Name arg given, but don't look at others unless `name` isn't found: if name is not None: try: site = config.wiki["sites"][name] except KeyError: - if project is None: # implies lang is None, so only name was given + if project is None: # Implies lang is None, so only name was given e = "Site '{0}' not found in config.".format(name) raise SiteNotFoundError(e) for sitename, site in config.wiki["sites"].items(): @@ -171,7 +170,7 @@ def get_site(name=None, project=None, lang=None): else: return _get_site_object_from_dict(name, site) - # if we end up here, then project and lang are both not None + # If we end up here, then project and lang are both not None: for sitename, site in config.wiki["sites"].items(): if site["project"] == project and site["lang"] == lang: return _get_site_object_from_dict(sitename, site) diff --git a/bot/wiki/site.py b/bot/wiki/site.py index 476159e..d38b3bc 100644 --- a/bot/wiki/site.py +++ b/bot/wiki/site.py @@ -10,6 +10,11 @@ from urllib import unquote_plus, urlencode from urllib2 import build_opener, HTTPCookieProcessor, URLError from urlparse import urlparse +try: + from oursql import connect +except ImportError: + connect = None + from wiki.category import Category from wiki.constants import * from wiki.exceptions import * @@ -40,7 +45,7 @@ class Site(object): """ def __init__(self, name=None, project=None, lang=None, base_url=None, - article_path=None, script_path=None, sql=(None, None), + article_path=None, script_path=None, sql=None, namespaces=None, login=(None, None), cookiejar=None, user_agent=None, assert_edit=None, maxlag=None): """Constructor for new Site instances. @@ -67,14 +72,17 @@ class Site(object): self._base_url = base_url self._article_path = article_path self._script_path = script_path - self._sql = sql self._namespaces = namespaces - # Attributes used when querying the API: + # Attributes used for API queries: self._assert_edit = assert_edit self._maxlag = maxlag self._max_retries = 5 + # Attributes used for SQL queries: + self._sql_data = sql + self._sql_conn = None + # Set up cookiejar and URL opener for making API queries: if cookiejar is not None: self._cookiejar = cookiejar @@ -416,6 +424,24 @@ class Site(object): self._cookiejar.clear() self._save_cookiejar() + def _sql_connect(self, **kwargs): + """Attempt to establish a connection with this site's SQL database. + + Will raise SQLError() if the module "oursql" is not available. + """ + if not connect: + e = "Module 'oursql' is required for SQL queries." + raise SQLError(e) + + args = self._sql_data + for key, value in kwargs.iteritems(): + args[key] = value + + if "read_default_file" not in args and "user" not in args and "passwd" not in args: + args["read_default_file"] = "~/.my.cnf" + + self._sql_conn = connect(**args) + def api_query(self, **kwargs): """Do an API query with `kwargs` as the parameters. @@ -423,6 +449,29 @@ class Site(object): """ return self._api_query(kwargs) + def sql_query(self, query, params=(), plain_query=False, cursor_class=None, + show_table=False): + """Do an SQL query and yield its results. + + For example: + >>> query = "SELECT user_name, user_registration FROM user WHERE user_name IN (?, ?)" + >>> for row in site.sql_query(query, ("EarwigBot", "The Earwig")): + ... print row + ('EarwigBot', '20090428220032') + ('The Earwig', '20080703215134') + + May raise SQLError() or one of oursql's exceptions + (oursql.ProgrammingError, oursql.InterfaceError, ...) if there were + problems with the query. + """ + if not self._sql_conn: + self._sql_connect() + + with self._sql_conn.cursor(cursor_class, show_table=show_table) as cur: + cur.execute(query, params, plain_query) + for result in cur: + yield result + def name(self): """Returns the Site's name (or "wikiid" in the API), like "enwiki".""" return self._name