diff --git a/earwigbot/wiki/functions.py b/earwigbot/wiki/functions.py index 5504306..52aa75d 100644 --- a/earwigbot/wiki/functions.py +++ b/earwigbot/wiki/functions.py @@ -24,7 +24,9 @@ EarwigBot's Wiki Toolset: Misc Functions This module, a component of the wiki package, contains miscellaneous functions -that are not methods of any class, like get_site(). +that are not methods of any class. Currently, it contains get_site(), +add_site(), and remove_site(). These functions act as bridges between the bot's +config files and Site objects. There's no need to import this module explicitly. All functions here are automatically available from earwigbot.wiki. @@ -36,21 +38,25 @@ from getpass import getpass from os import chmod, path import platform import stat +import sqlite3 as sqlite -import earwigbot +from earwigbot import __version__ from earwigbot.config import config from earwigbot.wiki.exceptions import SiteNotFoundError from earwigbot.wiki.site import Site -__all__ = ["get_site", "add_site", "del_site"] +__all__ = ["get_site", "add_site", "remove_site"] _cookiejar = None +_sitesdb = "sites.db" def _load_config(): - """Called by a config-requiring function, such as get_site(), when config + """Load the bot's config. + + Called by a config-requiring function, such as get_site(), when config has not been loaded. This will usually happen only if we're running code directly from Python's interpreter and not the bot itself, because - earwigbot.py or core/main.py will already call these functions. + bot.py and earwigbot.runner will already call these functions. """ is_encrypted = config.load() if is_encrypted: # Passwords in the config file are encrypted @@ -59,21 +65,20 @@ def _load_config(): config.decrypt(config.wiki, "password") def _get_cookiejar(): - """Returns a LWPCookieJar object loaded from our .cookies file. The same - one is returned every time. + """Return a LWPCookieJar object loaded from our .cookies file. - The .cookies file is located in the project root, same directory as - config.yml and bot.py. If it doesn't exist, we will create the file and set - it to be readable and writeable only by us. If it exists but the - information inside is bogus, we will ignore it. + The same .cookies file is returned every time, located in the project root, + same directory as config.yml and bot.py. If it doesn't exist, we will + create the file and set it to be readable and writeable only by us. If it + exists but the information inside is bogus, we will ignore it. - This is normally called by _get_site_object_from_dict() (in turn called by + This is normally called by _make_site_object() (in turn called by get_site()), and the cookiejar is passed to our Site's constructor, used when it makes API queries. This way, we can easily preserve cookies between sites (e.g., for CentralAuth), making logins easier. """ global _cookiejar - if _cookiejar is not None: + if _cookiejar: return _cookiejar cookie_file = path.join(config.root_dir, ".cookies") @@ -94,17 +99,63 @@ def _get_cookiejar(): return _cookiejar -def _get_site_object_from_dict(name, d): - """Return a Site object based on the contents of a dict, probably acquired - through our config file, and a separate name. +def _create_sitesdb(): + """Initialize the sitesdb file with its three necessary tables.""" + script = """ + CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, + site_article_path, site_script_path); + CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value); + CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name); + """ + with sqlite.connect(_sitesdb) as conn: + conn.executescript(script) + +def _load_site_from_sitesdb(name): + """Return all information stored in the sitesdb relating to site 'name'. + + The information will be returned as a tuple, containing the site's project, + language, base URL, article path, script path, SQL connection data, and + namespaces, in that order. If the site is not found in the database, + SiteNotFoundError will be raised. An empty database will be created before + the exception is raised if none exists. """ - project = d.get("project") - lang = d.get("lang") - base_url = d.get("baseURL") - article_path = d.get("articlePath") - script_path = d.get("scriptPath") - sql = d.get("sql", {}) - namespaces = d.get("namespaces", {}) + query1 = "SELECT * FROM sites WHERE site_name = ?" + query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" + query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?" + error = "Site '{0}' not found in the sitesdb.".format(name) + with sqlite.connect(_sitesdb) as conn: + try: + site_data = conn.execute(query1, (name,)).fetchone() + except sqllite.OperationalError: + _create_sitesdb() + raise SiteNotFoundError(error) + if not site_data: + raise SiteNotFoundError(error) + sql_data = conn.execute(query2, (name,)).fetchall() + ns_data = conn.execute(query3, (name,)).fetchall() + + project, lang, base_url, article_path, script_path = site_data + sql = dict(sql_data) + namespaces = {} + for ns_id, ns_name, ns_is_primary_name in ns_data: + try: + if ns_is_primary_name: # "Primary" name goes first in list + namespaces[ns_id].insert(0, ns_name) + else: # Ordering of the aliases doesn't matter + namespaces[ns_id].append(ns_name) + except KeyError: + namespaces[ns_id] = [ns_name] + + return project, lang, base_url, article_path, script_path, sql, namespaces + +def _make_site_object(name): + """Return a Site object associated with the site 'name' in our sitesdb. + + This calls _load_site_from_sitesdb(), so SiteNotFoundError will be raised + if the site is not in our sitesdb. + """ + (project, lang, base_url, article_path, script_path, sql, + namespaces) = _load_site_from_sitesdb(name) login = (config.wiki.get("username"), config.wiki.get("password")) cookiejar = _get_cookiejar() user_agent = config.wiki.get("userAgent") @@ -113,7 +164,7 @@ def _get_site_object_from_dict(name, d): search_config = config.wiki.get("search") if user_agent: - user_agent = user_agent.replace("$1", earwigbot.__version__) + user_agent = user_agent.replace("$1", __version__) user_agent = user_agent.replace("$2", platform.python_version()) return Site(name=name, project=project, lang=lang, base_url=base_url, @@ -122,90 +173,196 @@ def _get_site_object_from_dict(name, d): user_agent=user_agent, assert_edit=assert_edit, maxlag=maxlag, search_config=search_config) +def _get_site_name_from_sitesdb(project, lang): + """Return the name of the first site with the specified project and lang. + + If the site is not found, return None. An empty sitesdb will be created if + none exists. + """ + query = "SELECT site_name FROM site WHERE site_project = ? and site_lang = ?" + with sqlite.connect(_sitesdb) as conn: + try: + return conn.execute(query, (project, lang)).fetchone() + except sqllite.OperationalError: + _create_sitesdb() + +def _add_site_to_sitesdb(site): + """Extract relevant info from a Site object and add it to the sitesdb. + + Works like a reverse _load_site_from_sitesdb(); the site's project, + language, base URL, article path, script path, SQL connection data, and + namespaces are extracted from the site and inserted into the sites + database. If the sitesdb doesn't exist, we'll create it first. + """ + name = site.name + sites_data = (name, site.project, site.lang, site._base_url, + site._article_path, site._script_path) + sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()] + ns_data = [] + for ns_id, ns_names in site._namespaces.iteritems(): + ns_data.append((name, ns_id, ns_names.pop(0), True)) + for ns_name in ns_names: + ns_data.append((name, ns_id, ns_name, False)) + + with sqlite.connect(_sitesdb) as conn: + check_exists = "SELECT 1 FROM sites WHERE site_name = ?" + try: + exists = conn.execute(check_exists, (name,)).fetchone() + except sqlite.OperationalError: + _create_sitesdb() + else: + if exists: + conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) + conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) + conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) + conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data) + conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) + conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) + +def _remove_site_from_sitesdb(name): + """Remove a site by name from the sitesdb.""" + with sqlite.connect(_sitesdb) as conn: + cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) + if cursor.rowcount == 0: + return False + else: + conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) + conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) + return True + def get_site(name=None, project=None, lang=None): - """Returns a Site instance based on information from our config file. + """Return a Site instance based on information from the sitesdb. - With no arguments, returns the default site as specified by our config - file. This is default = config.wiki["defaultSite"]; - config.wiki["sites"][default]. + With no arguments, return the default site as specified by our config + file. This is config.wiki["defaultSite"]. - With `name` specified, returns the site specified by - config.wiki["sites"][name]. + With 'name' specified, return the site with that name. This is equivalent + to the site's 'wikiid' in the API, like 'enwiki'. - With `project` and `lang` specified, returns the site specified by the - member of config.wiki["sites"], `s`, for which s["project"] == project and - s["lang"] == lang. + With 'project' and 'lang' specified, return the site whose project and + language match these values. If there are multiple sites with the same + values (unlikely), this is not a reliable way of loading a site. Call the + function with an explicit 'name' in that case. - We will attempt to login to the site automatically - using config.wiki["username"] and config.wiki["password"] if both are - defined. + We will attempt to login to the site automatically using + config.wiki["username"] and config.wiki["password"] if both are defined. Specifying a project without a lang or a lang without a project will raise - TypeError. If all three args are specified, `name` will be first tried, - then `project` and `lang`. If, with any number of args, a site cannot be - found in the config, SiteNotFoundError is raised. + TypeError. If all three args are specified, 'name' will be first tried, + then 'project' and 'lang' if 'name' doesn't work. If a site cannot be found + in the sitesdb, SiteNotFoundError will be raised. An empty sitesdb will be + created if none is found. """ - # Check if config has been loaded, and load it if it hasn't: if not config.is_loaded(): _load_config() # Someone specified a project without a lang (or a lang without a project)! - if (project is None and lang is not None) or (project is not None and - lang is None): + if (project and not lang) or (not project and lang): e = "Keyword arguments 'lang' and 'project' must be specified together." raise TypeError(e) - # No args given, so return our default site (project is None implies lang - # is None, so we don't need to add that in): - if name is None and project is None: + # No args given, so return our default site: + if not name and not project and not lang: try: default = config.wiki["defaultSite"] except KeyError: e = "Default site is not specified in config." raise SiteNotFoundError(e) - try: - site = config.wiki["sites"][default] - except KeyError: - e = "Default site specified by config is not in the config's sites list." - raise SiteNotFoundError(e) - return _get_site_object_from_dict(default, site) + return _make_site_object(default) # Name arg given, but don't look at others unless `name` isn't found: - if name is not None: + if name: try: - site = config.wiki["sites"][name] - except KeyError: - if project is None: # Implies lang is None, so only name was given - e = "Site '{0}' not found in config.".format(name) - raise SiteNotFoundError(e) - for sitename, site in config.wiki["sites"].items(): - if site["project"] == project and site["lang"] == lang: - return _get_site_object_from_dict(sitename, site) - e = "Neither site '{0}' nor site '{1}:{2}' found in config." - e.format(name, project, lang) - raise SiteNotFoundError(e) - else: - return _get_site_object_from_dict(name, site) + return _make_site_object(name) + except SiteNotFoundError: + if project and lang: + name = _get_site_name_from_sitesdb(project, lang) + if name: + return _make_site_object(name) + raise - # If we end up here, then project and lang are both not None: - for sitename, site in config.wiki["sites"].items(): - if site["project"] == project and site["lang"] == lang: - return _get_site_object_from_dict(sitename, site) - e = "Site '{0}:{1}' not found in config.".format(project, lang) + # If we end up here, then project and lang are the only args given: + name = _get_site_name_from_sitesdb(project, lang) + if name: + return _make_site_object(name) + e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) raise SiteNotFoundError(e) -def add_site(): - """STUB: config editing is required first. +def add_site(project=None, lang=None, base_url=None, script_path="/w", + sql=None): + """Add a site to the sitesdb so it can be retrieved with get_site() later. + + If only a project and a lang are given, we'll guess the base_url as + "http://{lang}.{project}.org". If this is wrong, provide the correct + base_url as an argument (in which case project and lang are ignored). Most + wikis use "/w" as the script path (meaning the API is located at + "{base_url}{script_path}/api.php" -> "http://{lang}.{project}.org/w/api.php"), + so this is the default. If your wiki is different, provide the script_path + as an argument. The only other argument to Site() that we can't get from + config files or by querying the wiki itself is SQL connection info, so + provide a dict of kwargs as `sql` and Site will be pass it to + oursql.connect(**sql), allowing you to make queries with site.sql_query(). + + Returns True if the site was added successfully or False if the site is + already in our sitesdb (this can be done purposefully to update old site + info). Raises SiteNotFoundError if not enough information has been provided + to identify the site (e.g. a project but not a lang). + """ + if not config.is_loaded(): + _load_config() + + if not base_url: + if not project or not lang: + e = "Without a base_url, both a project and a lang must be given." + raise SiteNotFoundError(e) + base_url = "http://{0}.{1}.org".format(lang, project) + + login = (config.wiki.get("username"), config.wiki.get("password")) + cookiejar = _get_cookiejar() + user_agent = config.wiki.get("userAgent") + assert_edit = config.wiki.get("assert") + maxlag = config.wiki.get("maxlag") + search_config = config.wiki.get("search") + + # Create a temp Site object to log in and load the other attributes: + site = Site(base_url=base_url, script_path=script_path, sql=sql, + login=login, cookiejar=cookiejar, user_agent=user_agent, + assert_edit=assert_edit, maxlag=maxlag, + search_config=search_config) - Returns True if the site was added successfully or False if the site was - already in our config. Raises ConfigError if saving the updated file failed - for some reason.""" - pass + _add_site_to_sitesdb(site) + return site -def del_site(name): - """STUB: config editing is required first. +def remove_site(name=None, project=None, lang=None): + """Remove a site from the sitesdb. Returns True if the site was removed successfully or False if the site was - not in our config originally. Raises ConfigError if saving the updated file - failed for some reason.""" - pass + not in our sitesdb originally. If all three args (name, project, and lang) + are given, we'll first try 'name' and then try the latter two if 'name' + wasn't found in the database. Raises TypeError if a project was given but + not a language, or vice versa. Will create an empty sitesdb if none was + found. + """ + if not config.is_loaded(): + _load_config() + + # Someone specified a project without a lang (or a lang without a project)! + if (project and not lang) or (not project and lang): + e = "Keyword arguments 'lang' and 'project' must be specified together." + raise TypeError(e) + + if name: + was_removed = _remove_site_from_sitesdb(name) + if not was_removed: + if project and lang: + name = _get_site_name_from_sitesdb(project, lang) + if name: + return _remove_site_from_sitesdb(name) + return was_removed + + if project and lang: + name = _get_site_name_from_sitesdb(project, lang) + if name: + return _remove_site_from_sitesdb(name) + + return False