From 30b1b99a133585775a3e056be27b38f9f3a4cccb Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 25 Mar 2012 17:52:43 -0400 Subject: [PATCH 1/3] Cleaned up boolean logic a bit. --- earwigbot/wiki/page.py | 6 +++--- earwigbot/wiki/site.py | 42 +++++++++++++++++++++--------------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/earwigbot/wiki/page.py b/earwigbot/wiki/page.py index 8407108..dfd5268 100644 --- a/earwigbot/wiki/page.py +++ b/earwigbot/wiki/page.py @@ -174,7 +174,7 @@ class Page(CopyrightMixin): Assuming the API is sound, this should not raise any exceptions. """ - if result is None: + if not result: params = {"action": "query", "rvprop": "user", "intoken": "edit", "prop": "info|revisions", "rvlimit": 1, "rvdir": "newer", "titles": self._title, "inprop": "protection|url"} @@ -240,7 +240,7 @@ class Page(CopyrightMixin): Don't call this directly, ever - use .get(force=True) if you want to force content reloading. """ - if result is None: + if not result: params = {"action": "query", "prop": "revisions", "rvlimit": 1, "rvprop": "content|timestamp", "titles": self._title} result = self._site._api_query(params) @@ -471,7 +471,7 @@ class Page(CopyrightMixin): """ if force: self._load_wrapper() - if self._fullurl is not None: + if self._fullurl: return self._fullurl else: slug = quote(self._title.replace(" ", "_"), safe="/:") diff --git a/earwigbot/wiki/site.py b/earwigbot/wiki/site.py index 8719036..0521f79 100644 --- a/earwigbot/wiki/site.py +++ b/earwigbot/wiki/site.py @@ -78,11 +78,12 @@ class Site(object): This probably isn't necessary to call yourself unless you're building a Site that's not in your config and you don't want to add it - normally all you need is tools.get_site(name), which creates the Site for you - based on your config file. We accept a bunch of kwargs, but the only - ones you really "need" are `base_url` and `script_path` - this is - enough to figure out an API url. `login`, a tuple of - (username, password), is highly recommended. `cookiejar` will be used - to store cookies, and we'll use a normal CookieJar if none is given. + based on your config file and the sites database. We accept a bunch of + kwargs, but the only ones you really "need" are `base_url` and + `script_path` - this is enough to figure out an API url. `login`, a + tuple of (username, password), is highly recommended. `cookiejar` will + be used to store cookies, and we'll use a normal CookieJar if none is + given. First, we'll store the given arguments as attributes, then set up our URL opener. We'll load any of the attributes that weren't given from @@ -112,11 +113,11 @@ class Site(object): self._search_config = search_config # Set up cookiejar and URL opener for making API queries: - if cookiejar is not None: + if cookiejar: self._cookiejar = cookiejar else: self._cookiejar = CookieJar() - if user_agent is None: + if not user_agent: user_agent = USER_AGENT # Set default UA from wiki.constants self._opener = build_opener(HTTPCookieProcessor(self._cookiejar)) self._opener.addheaders = [("User-Agent", user_agent), @@ -127,9 +128,9 @@ class Site(object): # If we have a name/pass and the API says we're not logged in, log in: self._login_info = name, password = login - if name is not None and password is not None: + if name and password: logged_in_as = self._get_username_from_cookies() - if logged_in_as is None or name != logged_in_as: + if not logged_in_as or name != logged_in_as: self._login(login) def __repr__(self): @@ -180,7 +181,7 @@ class Site(object): There's helpful MediaWiki API documentation at . """ - if self._base_url is None or self._script_path is None: + if not self._base_url or self._script_path is None: e = "Tried to do an API query, but no API URL is known." raise SiteAPIError(e) @@ -332,15 +333,15 @@ class Site(object): name = ''.join((self._name, "Token")) cookie = self._get_cookie(name, domain) - if cookie is not None: + if cookie: name = ''.join((self._name, "UserName")) user_name = self._get_cookie(name, domain) - if user_name is not None: + if user_name: return user_name.value name = "centralauth_Token" for cookie in self._cookiejar: - if cookie.domain_initial_dot is False or cookie.is_expired(): + if not cookie.domain_initial_dot or cookie.is_expired(): continue if cookie.name != name: continue @@ -348,7 +349,7 @@ class Site(object): search = ''.join(("(.*?)", re_escape(cookie.domain))) if re_match(search, domain): # Test it against our site user_name = self._get_cookie("centralauth_User", cookie.domain) - if user_name is not None: + if user_name: return user_name.value def _get_username_from_api(self): @@ -378,7 +379,7 @@ class Site(object): single API query for our username (or IP address) and return that. """ name = self._get_username_from_cookies() - if name is not None: + if name: return name return self._get_username_from_api() @@ -417,7 +418,7 @@ class Site(object): """ name, password = login params = {"action": "login", "lgname": name, "lgpassword": password} - if token is not None: + if token: params["lgtoken"] = token result = self._api_query(params) res = result["login"]["result"] @@ -455,10 +456,9 @@ class Site(object): def _sql_connect(self, **kwargs): """Attempt to establish a connection with this site's SQL database. - oursql.connect() will be called with self._sql_data as its kwargs, - which is usually config.wiki["sites"][self.name()]["sql"]. Any kwargs - given to this function will be passed to connect() and will have - precedence over the config file. + oursql.connect() will be called with self._sql_data as its kwargs. + Any kwargs given to this function will be passed to connect() and will + have precedence over the config file. Will raise SQLError() if the module "oursql" is not available. oursql may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot @@ -631,6 +631,6 @@ class Site(object): If `username` is left as None, then a User object representing the currently logged-in (or anonymous!) user is returned. """ - if username is None: + if not username: username = self._get_username() return User(self, username) From 4a1cb4116255851a578d002d82254f9c29157219 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 31 Mar 2012 15:49:58 -0400 Subject: [PATCH 2/3] get_site(), add_site(), remove_site() implemented --- earwigbot/wiki/functions.py | 319 +++++++++++++++++++++++++++++++++----------- 1 file changed, 238 insertions(+), 81 deletions(-) diff --git a/earwigbot/wiki/functions.py b/earwigbot/wiki/functions.py index 5504306..52aa75d 100644 --- a/earwigbot/wiki/functions.py +++ b/earwigbot/wiki/functions.py @@ -24,7 +24,9 @@ EarwigBot's Wiki Toolset: Misc Functions This module, a component of the wiki package, contains miscellaneous functions -that are not methods of any class, like get_site(). +that are not methods of any class. Currently, it contains get_site(), +add_site(), and remove_site(). These functions act as bridges between the bot's +config files and Site objects. There's no need to import this module explicitly. All functions here are automatically available from earwigbot.wiki. @@ -36,21 +38,25 @@ from getpass import getpass from os import chmod, path import platform import stat +import sqlite3 as sqlite -import earwigbot +from earwigbot import __version__ from earwigbot.config import config from earwigbot.wiki.exceptions import SiteNotFoundError from earwigbot.wiki.site import Site -__all__ = ["get_site", "add_site", "del_site"] +__all__ = ["get_site", "add_site", "remove_site"] _cookiejar = None +_sitesdb = "sites.db" def _load_config(): - """Called by a config-requiring function, such as get_site(), when config + """Load the bot's config. + + Called by a config-requiring function, such as get_site(), when config has not been loaded. This will usually happen only if we're running code directly from Python's interpreter and not the bot itself, because - earwigbot.py or core/main.py will already call these functions. + bot.py and earwigbot.runner will already call these functions. """ is_encrypted = config.load() if is_encrypted: # Passwords in the config file are encrypted @@ -59,21 +65,20 @@ def _load_config(): config.decrypt(config.wiki, "password") def _get_cookiejar(): - """Returns a LWPCookieJar object loaded from our .cookies file. The same - one is returned every time. + """Return a LWPCookieJar object loaded from our .cookies file. - The .cookies file is located in the project root, same directory as - config.yml and bot.py. If it doesn't exist, we will create the file and set - it to be readable and writeable only by us. If it exists but the - information inside is bogus, we will ignore it. + The same .cookies file is returned every time, located in the project root, + same directory as config.yml and bot.py. If it doesn't exist, we will + create the file and set it to be readable and writeable only by us. If it + exists but the information inside is bogus, we will ignore it. - This is normally called by _get_site_object_from_dict() (in turn called by + This is normally called by _make_site_object() (in turn called by get_site()), and the cookiejar is passed to our Site's constructor, used when it makes API queries. This way, we can easily preserve cookies between sites (e.g., for CentralAuth), making logins easier. """ global _cookiejar - if _cookiejar is not None: + if _cookiejar: return _cookiejar cookie_file = path.join(config.root_dir, ".cookies") @@ -94,17 +99,63 @@ def _get_cookiejar(): return _cookiejar -def _get_site_object_from_dict(name, d): - """Return a Site object based on the contents of a dict, probably acquired - through our config file, and a separate name. +def _create_sitesdb(): + """Initialize the sitesdb file with its three necessary tables.""" + script = """ + CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, + site_article_path, site_script_path); + CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value); + CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name); + """ + with sqlite.connect(_sitesdb) as conn: + conn.executescript(script) + +def _load_site_from_sitesdb(name): + """Return all information stored in the sitesdb relating to site 'name'. + + The information will be returned as a tuple, containing the site's project, + language, base URL, article path, script path, SQL connection data, and + namespaces, in that order. If the site is not found in the database, + SiteNotFoundError will be raised. An empty database will be created before + the exception is raised if none exists. """ - project = d.get("project") - lang = d.get("lang") - base_url = d.get("baseURL") - article_path = d.get("articlePath") - script_path = d.get("scriptPath") - sql = d.get("sql", {}) - namespaces = d.get("namespaces", {}) + query1 = "SELECT * FROM sites WHERE site_name = ?" + query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" + query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?" + error = "Site '{0}' not found in the sitesdb.".format(name) + with sqlite.connect(_sitesdb) as conn: + try: + site_data = conn.execute(query1, (name,)).fetchone() + except sqllite.OperationalError: + _create_sitesdb() + raise SiteNotFoundError(error) + if not site_data: + raise SiteNotFoundError(error) + sql_data = conn.execute(query2, (name,)).fetchall() + ns_data = conn.execute(query3, (name,)).fetchall() + + project, lang, base_url, article_path, script_path = site_data + sql = dict(sql_data) + namespaces = {} + for ns_id, ns_name, ns_is_primary_name in ns_data: + try: + if ns_is_primary_name: # "Primary" name goes first in list + namespaces[ns_id].insert(0, ns_name) + else: # Ordering of the aliases doesn't matter + namespaces[ns_id].append(ns_name) + except KeyError: + namespaces[ns_id] = [ns_name] + + return project, lang, base_url, article_path, script_path, sql, namespaces + +def _make_site_object(name): + """Return a Site object associated with the site 'name' in our sitesdb. + + This calls _load_site_from_sitesdb(), so SiteNotFoundError will be raised + if the site is not in our sitesdb. + """ + (project, lang, base_url, article_path, script_path, sql, + namespaces) = _load_site_from_sitesdb(name) login = (config.wiki.get("username"), config.wiki.get("password")) cookiejar = _get_cookiejar() user_agent = config.wiki.get("userAgent") @@ -113,7 +164,7 @@ def _get_site_object_from_dict(name, d): search_config = config.wiki.get("search") if user_agent: - user_agent = user_agent.replace("$1", earwigbot.__version__) + user_agent = user_agent.replace("$1", __version__) user_agent = user_agent.replace("$2", platform.python_version()) return Site(name=name, project=project, lang=lang, base_url=base_url, @@ -122,90 +173,196 @@ def _get_site_object_from_dict(name, d): user_agent=user_agent, assert_edit=assert_edit, maxlag=maxlag, search_config=search_config) +def _get_site_name_from_sitesdb(project, lang): + """Return the name of the first site with the specified project and lang. + + If the site is not found, return None. An empty sitesdb will be created if + none exists. + """ + query = "SELECT site_name FROM site WHERE site_project = ? and site_lang = ?" + with sqlite.connect(_sitesdb) as conn: + try: + return conn.execute(query, (project, lang)).fetchone() + except sqllite.OperationalError: + _create_sitesdb() + +def _add_site_to_sitesdb(site): + """Extract relevant info from a Site object and add it to the sitesdb. + + Works like a reverse _load_site_from_sitesdb(); the site's project, + language, base URL, article path, script path, SQL connection data, and + namespaces are extracted from the site and inserted into the sites + database. If the sitesdb doesn't exist, we'll create it first. + """ + name = site.name + sites_data = (name, site.project, site.lang, site._base_url, + site._article_path, site._script_path) + sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()] + ns_data = [] + for ns_id, ns_names in site._namespaces.iteritems(): + ns_data.append((name, ns_id, ns_names.pop(0), True)) + for ns_name in ns_names: + ns_data.append((name, ns_id, ns_name, False)) + + with sqlite.connect(_sitesdb) as conn: + check_exists = "SELECT 1 FROM sites WHERE site_name = ?" + try: + exists = conn.execute(check_exists, (name,)).fetchone() + except sqlite.OperationalError: + _create_sitesdb() + else: + if exists: + conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) + conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) + conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) + conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data) + conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) + conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) + +def _remove_site_from_sitesdb(name): + """Remove a site by name from the sitesdb.""" + with sqlite.connect(_sitesdb) as conn: + cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) + if cursor.rowcount == 0: + return False + else: + conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) + conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) + return True + def get_site(name=None, project=None, lang=None): - """Returns a Site instance based on information from our config file. + """Return a Site instance based on information from the sitesdb. - With no arguments, returns the default site as specified by our config - file. This is default = config.wiki["defaultSite"]; - config.wiki["sites"][default]. + With no arguments, return the default site as specified by our config + file. This is config.wiki["defaultSite"]. - With `name` specified, returns the site specified by - config.wiki["sites"][name]. + With 'name' specified, return the site with that name. This is equivalent + to the site's 'wikiid' in the API, like 'enwiki'. - With `project` and `lang` specified, returns the site specified by the - member of config.wiki["sites"], `s`, for which s["project"] == project and - s["lang"] == lang. + With 'project' and 'lang' specified, return the site whose project and + language match these values. If there are multiple sites with the same + values (unlikely), this is not a reliable way of loading a site. Call the + function with an explicit 'name' in that case. - We will attempt to login to the site automatically - using config.wiki["username"] and config.wiki["password"] if both are - defined. + We will attempt to login to the site automatically using + config.wiki["username"] and config.wiki["password"] if both are defined. Specifying a project without a lang or a lang without a project will raise - TypeError. If all three args are specified, `name` will be first tried, - then `project` and `lang`. If, with any number of args, a site cannot be - found in the config, SiteNotFoundError is raised. + TypeError. If all three args are specified, 'name' will be first tried, + then 'project' and 'lang' if 'name' doesn't work. If a site cannot be found + in the sitesdb, SiteNotFoundError will be raised. An empty sitesdb will be + created if none is found. """ - # Check if config has been loaded, and load it if it hasn't: if not config.is_loaded(): _load_config() # Someone specified a project without a lang (or a lang without a project)! - if (project is None and lang is not None) or (project is not None and - lang is None): + if (project and not lang) or (not project and lang): e = "Keyword arguments 'lang' and 'project' must be specified together." raise TypeError(e) - # No args given, so return our default site (project is None implies lang - # is None, so we don't need to add that in): - if name is None and project is None: + # No args given, so return our default site: + if not name and not project and not lang: try: default = config.wiki["defaultSite"] except KeyError: e = "Default site is not specified in config." raise SiteNotFoundError(e) - try: - site = config.wiki["sites"][default] - except KeyError: - e = "Default site specified by config is not in the config's sites list." - raise SiteNotFoundError(e) - return _get_site_object_from_dict(default, site) + return _make_site_object(default) # Name arg given, but don't look at others unless `name` isn't found: - if name is not None: + if name: try: - site = config.wiki["sites"][name] - except KeyError: - if project is None: # Implies lang is None, so only name was given - e = "Site '{0}' not found in config.".format(name) - raise SiteNotFoundError(e) - for sitename, site in config.wiki["sites"].items(): - if site["project"] == project and site["lang"] == lang: - return _get_site_object_from_dict(sitename, site) - e = "Neither site '{0}' nor site '{1}:{2}' found in config." - e.format(name, project, lang) - raise SiteNotFoundError(e) - else: - return _get_site_object_from_dict(name, site) + return _make_site_object(name) + except SiteNotFoundError: + if project and lang: + name = _get_site_name_from_sitesdb(project, lang) + if name: + return _make_site_object(name) + raise - # If we end up here, then project and lang are both not None: - for sitename, site in config.wiki["sites"].items(): - if site["project"] == project and site["lang"] == lang: - return _get_site_object_from_dict(sitename, site) - e = "Site '{0}:{1}' not found in config.".format(project, lang) + # If we end up here, then project and lang are the only args given: + name = _get_site_name_from_sitesdb(project, lang) + if name: + return _make_site_object(name) + e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) raise SiteNotFoundError(e) -def add_site(): - """STUB: config editing is required first. +def add_site(project=None, lang=None, base_url=None, script_path="/w", + sql=None): + """Add a site to the sitesdb so it can be retrieved with get_site() later. + + If only a project and a lang are given, we'll guess the base_url as + "http://{lang}.{project}.org". If this is wrong, provide the correct + base_url as an argument (in which case project and lang are ignored). Most + wikis use "/w" as the script path (meaning the API is located at + "{base_url}{script_path}/api.php" -> "http://{lang}.{project}.org/w/api.php"), + so this is the default. If your wiki is different, provide the script_path + as an argument. The only other argument to Site() that we can't get from + config files or by querying the wiki itself is SQL connection info, so + provide a dict of kwargs as `sql` and Site will be pass it to + oursql.connect(**sql), allowing you to make queries with site.sql_query(). + + Returns True if the site was added successfully or False if the site is + already in our sitesdb (this can be done purposefully to update old site + info). Raises SiteNotFoundError if not enough information has been provided + to identify the site (e.g. a project but not a lang). + """ + if not config.is_loaded(): + _load_config() + + if not base_url: + if not project or not lang: + e = "Without a base_url, both a project and a lang must be given." + raise SiteNotFoundError(e) + base_url = "http://{0}.{1}.org".format(lang, project) + + login = (config.wiki.get("username"), config.wiki.get("password")) + cookiejar = _get_cookiejar() + user_agent = config.wiki.get("userAgent") + assert_edit = config.wiki.get("assert") + maxlag = config.wiki.get("maxlag") + search_config = config.wiki.get("search") + + # Create a temp Site object to log in and load the other attributes: + site = Site(base_url=base_url, script_path=script_path, sql=sql, + login=login, cookiejar=cookiejar, user_agent=user_agent, + assert_edit=assert_edit, maxlag=maxlag, + search_config=search_config) - Returns True if the site was added successfully or False if the site was - already in our config. Raises ConfigError if saving the updated file failed - for some reason.""" - pass + _add_site_to_sitesdb(site) + return site -def del_site(name): - """STUB: config editing is required first. +def remove_site(name=None, project=None, lang=None): + """Remove a site from the sitesdb. Returns True if the site was removed successfully or False if the site was - not in our config originally. Raises ConfigError if saving the updated file - failed for some reason.""" - pass + not in our sitesdb originally. If all three args (name, project, and lang) + are given, we'll first try 'name' and then try the latter two if 'name' + wasn't found in the database. Raises TypeError if a project was given but + not a language, or vice versa. Will create an empty sitesdb if none was + found. + """ + if not config.is_loaded(): + _load_config() + + # Someone specified a project without a lang (or a lang without a project)! + if (project and not lang) or (not project and lang): + e = "Keyword arguments 'lang' and 'project' must be specified together." + raise TypeError(e) + + if name: + was_removed = _remove_site_from_sitesdb(name) + if not was_removed: + if project and lang: + name = _get_site_name_from_sitesdb(project, lang) + if name: + return _remove_site_from_sitesdb(name) + return was_removed + + if project and lang: + name = _get_site_name_from_sitesdb(project, lang) + if name: + return _remove_site_from_sitesdb(name) + + return False From 7edfb0b1afe4dba425cbf4b2c7a5e0e3cc45ee11 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 31 Mar 2012 17:46:06 -0400 Subject: [PATCH 3/3] Re-organize SitesDB code; protocol-relative URLs --- .gitignore | 22 +-- earwigbot/config.py | 2 +- earwigbot/irc/watcher.py | 2 +- earwigbot/wiki/__init__.py | 2 +- earwigbot/wiki/functions.py | 368 ----------------------------------------- earwigbot/wiki/site.py | 28 ++-- earwigbot/wiki/sitesdb.py | 392 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 420 insertions(+), 396 deletions(-) delete mode 100644 earwigbot/wiki/functions.py create mode 100644 earwigbot/wiki/sitesdb.py diff --git a/.gitignore b/.gitignore index 5c965b9..d2b75fb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,19 +1,11 @@ -# Ignore python bytecode: -*.pyc - -# Ignore bot-specific config file: -config.yml - -# Ignore logs directory: +# Ignore bot-specific files: logs/ - -# Ignore cookies file: +config.yml +sites.db .cookies -# Ignore OS X's crud: -.DS_Store +# Ignore python bytecode: +*.pyc -# Ignore pydev's nonsense: -.project -.pydevproject -.settings/ +# Ignore OS X's stuff: +.DS_Store diff --git a/earwigbot/config.py b/earwigbot/config.py index e0ef26a..f1a977c 100644 --- a/earwigbot/config.py +++ b/earwigbot/config.py @@ -176,7 +176,7 @@ class _BotConfig(object): return self._root_dir @property - def config_path(self): + def path(self): return self._config_path @property diff --git a/earwigbot/irc/watcher.py b/earwigbot/irc/watcher.py index f3731a7..ad206d6 100644 --- a/earwigbot/irc/watcher.py +++ b/earwigbot/irc/watcher.py @@ -89,7 +89,7 @@ class Watcher(IRCConnection): return module = imp.new_module("_rc_event_processing_rules") try: - exec compile(rules, config.config_path, "exec") in module.__dict__ + exec compile(rules, config.path, "exec") in module.__dict__ except Exception: e = "Could not compile config file's RC event rules" self.logger.exception(e) diff --git a/earwigbot/wiki/__init__.py b/earwigbot/wiki/__init__.py index 03a8e9e..e48be82 100644 --- a/earwigbot/wiki/__init__.py +++ b/earwigbot/wiki/__init__.py @@ -36,9 +36,9 @@ logger.addHandler(_log.NullHandler()) from earwigbot.wiki.constants import * from earwigbot.wiki.exceptions import * -from earwigbot.wiki.functions import * from earwigbot.wiki.category import Category from earwigbot.wiki.page import Page from earwigbot.wiki.site import Site +from earwigbot.wiki.sitesdb import get_site, add_site, remove_site from earwigbot.wiki.user import User diff --git a/earwigbot/wiki/functions.py b/earwigbot/wiki/functions.py deleted file mode 100644 index 52aa75d..0000000 --- a/earwigbot/wiki/functions.py +++ /dev/null @@ -1,368 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 by Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -""" -EarwigBot's Wiki Toolset: Misc Functions - -This module, a component of the wiki package, contains miscellaneous functions -that are not methods of any class. Currently, it contains get_site(), -add_site(), and remove_site(). These functions act as bridges between the bot's -config files and Site objects. - -There's no need to import this module explicitly. All functions here are -automatically available from earwigbot.wiki. -""" - -from cookielib import LWPCookieJar, LoadError -import errno -from getpass import getpass -from os import chmod, path -import platform -import stat -import sqlite3 as sqlite - -from earwigbot import __version__ -from earwigbot.config import config -from earwigbot.wiki.exceptions import SiteNotFoundError -from earwigbot.wiki.site import Site - -__all__ = ["get_site", "add_site", "remove_site"] - -_cookiejar = None -_sitesdb = "sites.db" - -def _load_config(): - """Load the bot's config. - - Called by a config-requiring function, such as get_site(), when config - has not been loaded. This will usually happen only if we're running code - directly from Python's interpreter and not the bot itself, because - bot.py and earwigbot.runner will already call these functions. - """ - is_encrypted = config.load() - if is_encrypted: # Passwords in the config file are encrypted - key = getpass("Enter key to unencrypt bot passwords: ") - config._decryption_key = key - config.decrypt(config.wiki, "password") - -def _get_cookiejar(): - """Return a LWPCookieJar object loaded from our .cookies file. - - The same .cookies file is returned every time, located in the project root, - same directory as config.yml and bot.py. If it doesn't exist, we will - create the file and set it to be readable and writeable only by us. If it - exists but the information inside is bogus, we will ignore it. - - This is normally called by _make_site_object() (in turn called by - get_site()), and the cookiejar is passed to our Site's constructor, used - when it makes API queries. This way, we can easily preserve cookies between - sites (e.g., for CentralAuth), making logins easier. - """ - global _cookiejar - if _cookiejar: - return _cookiejar - - cookie_file = path.join(config.root_dir, ".cookies") - _cookiejar = LWPCookieJar(cookie_file) - - try: - _cookiejar.load() - except LoadError: - pass # File contains bad data, so ignore it completely - except IOError as e: - if e.errno == errno.ENOENT: # "No such file or directory" - # Create the file and restrict reading/writing only to the owner, - # so others can't peak at our cookies: - open(cookie_file, "w").close() - chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) - else: - raise - - return _cookiejar - -def _create_sitesdb(): - """Initialize the sitesdb file with its three necessary tables.""" - script = """ - CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, - site_article_path, site_script_path); - CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value); - CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name); - """ - with sqlite.connect(_sitesdb) as conn: - conn.executescript(script) - -def _load_site_from_sitesdb(name): - """Return all information stored in the sitesdb relating to site 'name'. - - The information will be returned as a tuple, containing the site's project, - language, base URL, article path, script path, SQL connection data, and - namespaces, in that order. If the site is not found in the database, - SiteNotFoundError will be raised. An empty database will be created before - the exception is raised if none exists. - """ - query1 = "SELECT * FROM sites WHERE site_name = ?" - query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" - query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?" - error = "Site '{0}' not found in the sitesdb.".format(name) - with sqlite.connect(_sitesdb) as conn: - try: - site_data = conn.execute(query1, (name,)).fetchone() - except sqllite.OperationalError: - _create_sitesdb() - raise SiteNotFoundError(error) - if not site_data: - raise SiteNotFoundError(error) - sql_data = conn.execute(query2, (name,)).fetchall() - ns_data = conn.execute(query3, (name,)).fetchall() - - project, lang, base_url, article_path, script_path = site_data - sql = dict(sql_data) - namespaces = {} - for ns_id, ns_name, ns_is_primary_name in ns_data: - try: - if ns_is_primary_name: # "Primary" name goes first in list - namespaces[ns_id].insert(0, ns_name) - else: # Ordering of the aliases doesn't matter - namespaces[ns_id].append(ns_name) - except KeyError: - namespaces[ns_id] = [ns_name] - - return project, lang, base_url, article_path, script_path, sql, namespaces - -def _make_site_object(name): - """Return a Site object associated with the site 'name' in our sitesdb. - - This calls _load_site_from_sitesdb(), so SiteNotFoundError will be raised - if the site is not in our sitesdb. - """ - (project, lang, base_url, article_path, script_path, sql, - namespaces) = _load_site_from_sitesdb(name) - login = (config.wiki.get("username"), config.wiki.get("password")) - cookiejar = _get_cookiejar() - user_agent = config.wiki.get("userAgent") - assert_edit = config.wiki.get("assert") - maxlag = config.wiki.get("maxlag") - search_config = config.wiki.get("search") - - if user_agent: - user_agent = user_agent.replace("$1", __version__) - user_agent = user_agent.replace("$2", platform.python_version()) - - return Site(name=name, project=project, lang=lang, base_url=base_url, - article_path=article_path, script_path=script_path, sql=sql, - namespaces=namespaces, login=login, cookiejar=cookiejar, - user_agent=user_agent, assert_edit=assert_edit, maxlag=maxlag, - search_config=search_config) - -def _get_site_name_from_sitesdb(project, lang): - """Return the name of the first site with the specified project and lang. - - If the site is not found, return None. An empty sitesdb will be created if - none exists. - """ - query = "SELECT site_name FROM site WHERE site_project = ? and site_lang = ?" - with sqlite.connect(_sitesdb) as conn: - try: - return conn.execute(query, (project, lang)).fetchone() - except sqllite.OperationalError: - _create_sitesdb() - -def _add_site_to_sitesdb(site): - """Extract relevant info from a Site object and add it to the sitesdb. - - Works like a reverse _load_site_from_sitesdb(); the site's project, - language, base URL, article path, script path, SQL connection data, and - namespaces are extracted from the site and inserted into the sites - database. If the sitesdb doesn't exist, we'll create it first. - """ - name = site.name - sites_data = (name, site.project, site.lang, site._base_url, - site._article_path, site._script_path) - sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()] - ns_data = [] - for ns_id, ns_names in site._namespaces.iteritems(): - ns_data.append((name, ns_id, ns_names.pop(0), True)) - for ns_name in ns_names: - ns_data.append((name, ns_id, ns_name, False)) - - with sqlite.connect(_sitesdb) as conn: - check_exists = "SELECT 1 FROM sites WHERE site_name = ?" - try: - exists = conn.execute(check_exists, (name,)).fetchone() - except sqlite.OperationalError: - _create_sitesdb() - else: - if exists: - conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) - conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) - conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) - conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data) - conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) - conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) - -def _remove_site_from_sitesdb(name): - """Remove a site by name from the sitesdb.""" - with sqlite.connect(_sitesdb) as conn: - cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) - if cursor.rowcount == 0: - return False - else: - conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) - conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) - return True - -def get_site(name=None, project=None, lang=None): - """Return a Site instance based on information from the sitesdb. - - With no arguments, return the default site as specified by our config - file. This is config.wiki["defaultSite"]. - - With 'name' specified, return the site with that name. This is equivalent - to the site's 'wikiid' in the API, like 'enwiki'. - - With 'project' and 'lang' specified, return the site whose project and - language match these values. If there are multiple sites with the same - values (unlikely), this is not a reliable way of loading a site. Call the - function with an explicit 'name' in that case. - - We will attempt to login to the site automatically using - config.wiki["username"] and config.wiki["password"] if both are defined. - - Specifying a project without a lang or a lang without a project will raise - TypeError. If all three args are specified, 'name' will be first tried, - then 'project' and 'lang' if 'name' doesn't work. If a site cannot be found - in the sitesdb, SiteNotFoundError will be raised. An empty sitesdb will be - created if none is found. - """ - if not config.is_loaded(): - _load_config() - - # Someone specified a project without a lang (or a lang without a project)! - if (project and not lang) or (not project and lang): - e = "Keyword arguments 'lang' and 'project' must be specified together." - raise TypeError(e) - - # No args given, so return our default site: - if not name and not project and not lang: - try: - default = config.wiki["defaultSite"] - except KeyError: - e = "Default site is not specified in config." - raise SiteNotFoundError(e) - return _make_site_object(default) - - # Name arg given, but don't look at others unless `name` isn't found: - if name: - try: - return _make_site_object(name) - except SiteNotFoundError: - if project and lang: - name = _get_site_name_from_sitesdb(project, lang) - if name: - return _make_site_object(name) - raise - - # If we end up here, then project and lang are the only args given: - name = _get_site_name_from_sitesdb(project, lang) - if name: - return _make_site_object(name) - e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) - raise SiteNotFoundError(e) - -def add_site(project=None, lang=None, base_url=None, script_path="/w", - sql=None): - """Add a site to the sitesdb so it can be retrieved with get_site() later. - - If only a project and a lang are given, we'll guess the base_url as - "http://{lang}.{project}.org". If this is wrong, provide the correct - base_url as an argument (in which case project and lang are ignored). Most - wikis use "/w" as the script path (meaning the API is located at - "{base_url}{script_path}/api.php" -> "http://{lang}.{project}.org/w/api.php"), - so this is the default. If your wiki is different, provide the script_path - as an argument. The only other argument to Site() that we can't get from - config files or by querying the wiki itself is SQL connection info, so - provide a dict of kwargs as `sql` and Site will be pass it to - oursql.connect(**sql), allowing you to make queries with site.sql_query(). - - Returns True if the site was added successfully or False if the site is - already in our sitesdb (this can be done purposefully to update old site - info). Raises SiteNotFoundError if not enough information has been provided - to identify the site (e.g. a project but not a lang). - """ - if not config.is_loaded(): - _load_config() - - if not base_url: - if not project or not lang: - e = "Without a base_url, both a project and a lang must be given." - raise SiteNotFoundError(e) - base_url = "http://{0}.{1}.org".format(lang, project) - - login = (config.wiki.get("username"), config.wiki.get("password")) - cookiejar = _get_cookiejar() - user_agent = config.wiki.get("userAgent") - assert_edit = config.wiki.get("assert") - maxlag = config.wiki.get("maxlag") - search_config = config.wiki.get("search") - - # Create a temp Site object to log in and load the other attributes: - site = Site(base_url=base_url, script_path=script_path, sql=sql, - login=login, cookiejar=cookiejar, user_agent=user_agent, - assert_edit=assert_edit, maxlag=maxlag, - search_config=search_config) - - _add_site_to_sitesdb(site) - return site - -def remove_site(name=None, project=None, lang=None): - """Remove a site from the sitesdb. - - Returns True if the site was removed successfully or False if the site was - not in our sitesdb originally. If all three args (name, project, and lang) - are given, we'll first try 'name' and then try the latter two if 'name' - wasn't found in the database. Raises TypeError if a project was given but - not a language, or vice versa. Will create an empty sitesdb if none was - found. - """ - if not config.is_loaded(): - _load_config() - - # Someone specified a project without a lang (or a lang without a project)! - if (project and not lang) or (not project and lang): - e = "Keyword arguments 'lang' and 'project' must be specified together." - raise TypeError(e) - - if name: - was_removed = _remove_site_from_sitesdb(name) - if not was_removed: - if project and lang: - name = _get_site_name_from_sitesdb(project, lang) - if name: - return _remove_site_from_sitesdb(name) - return was_removed - - if project and lang: - name = _get_site_name_from_sitesdb(project, lang) - if name: - return _remove_site_from_sitesdb(name) - - return False diff --git a/earwigbot/wiki/site.py b/earwigbot/wiki/site.py index 0521f79..5c0b1c7 100644 --- a/earwigbot/wiki/site.py +++ b/earwigbot/wiki/site.py @@ -71,8 +71,8 @@ class Site(object): def __init__(self, name=None, project=None, lang=None, base_url=None, article_path=None, script_path=None, sql=None, namespaces=None, login=(None, None), cookiejar=None, - user_agent=None, assert_edit=None, maxlag=None, - search_config=(None, None)): + user_agent=None, use_https=False, assert_edit=None, + maxlag=None, search_config=(None, None)): """Constructor for new Site instances. This probably isn't necessary to call yourself unless you're building a @@ -100,7 +100,8 @@ class Site(object): self._script_path = script_path self._namespaces = namespaces - # Attributes used for API queries: + # Attributes used for API queries: + self._use_https = use_https self._assert_edit = assert_edit self._maxlag = maxlag self._max_retries = 5 @@ -138,10 +139,10 @@ class Site(object): res = ", ".join(( "Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", "base_url={_base_url!r}", "article_path={_article_path!r}", - "script_path={_script_path!r}", "assert_edit={_assert_edit!r}", - "maxlag={_maxlag!r}", "sql={_sql!r}", "login={0}", - "user_agent={2!r}", "cookiejar={1})" - )) + "script_path={_script_path!r}", "use_https={_use_https!r}", + "assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}", + "sql={_sql_data!r}", "login={0}", "user_agent={2!r}", + "cookiejar={1})")) name, password = self._login_info login = "({0}, {1})".format(repr(name), "hidden" if password else None) cookies = self._cookiejar.__class__.__name__ @@ -163,7 +164,9 @@ class Site(object): This will first attempt to construct an API url from self._base_url and self._script_path. We need both of these, or else we'll raise - SiteAPIError. + SiteAPIError. If self._base_url is protocol-relative (introduced in + MediaWiki 1.18), we'll choose HTTPS if self._user_https is True, + otherwise HTTP. We'll encode the given params, adding format=json along the way, as well as &assert= and &maxlag= based on self._assert_edit and _maxlag. @@ -185,7 +188,13 @@ class Site(object): e = "Tried to do an API query, but no API URL is known." raise SiteAPIError(e) - url = ''.join((self._base_url, self._script_path, "/api.php")) + base_url = self._base_url + if base_url.startswith("//"): # Protocol-relative URLs from 1.18 + if self._use_https: + base_url = "https:" + base_url + else: + base_url = "http:" + base_url + url = ''.join((base_url, self._script_path, "/api.php")) params["format"] = "json" # This is the only format we understand if self._assert_edit: # If requested, ensure that we're logged in @@ -194,7 +203,6 @@ class Site(object): params["maxlag"] = self._maxlag data = urlencode(params) - logger.debug("{0} -> {1}".format(url, data)) try: diff --git a/earwigbot/wiki/sitesdb.py b/earwigbot/wiki/sitesdb.py new file mode 100644 index 0000000..0bd5c76 --- /dev/null +++ b/earwigbot/wiki/sitesdb.py @@ -0,0 +1,392 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 by Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from cookielib import LWPCookieJar, LoadError +import errno +from getpass import getpass +from os import chmod, path +from platform import python_version +import stat +import sqlite3 as sqlite + +from earwigbot import __version__ +from earwigbot.config import config +from earwigbot.wiki.exceptions import SiteNotFoundError +from earwigbot.wiki.site import Site + +__all__ = ["SitesDBManager", "get_site", "add_site", "remove_site"] + +class SitesDBManager(object): + """ + EarwigBot's Wiki Toolset: Sites Database Manager + + This class controls the sites.db file, which stores information about all + wiki sites known to the bot. Three public methods act as bridges between + the bot's config files and Site objects: + get_site -- returns a Site object corresponding to a given site name + add_site -- stores a site in the database, given connection info + remove_site -- removes a site from the database, given its name + + There's usually no need to use this class directly. All public methods + here are available as earwigbot.wiki.get_site(), earwigbot.wiki.add_site(), + and earwigbot.wiki.remove_site(), which use a sites.db file located in the + same directory as our config.yml file. Lower-level access can be achieved + by importing the manager class + (`from earwigbot.wiki.sitesdb import SitesDBManager`). + """ + + def __init__(self, db_file): + """Set up the manager with an attribute for the sitesdb filename.""" + self._cookiejar = None + self._sitesdb = db_file + + def _load_config(self): + """Load the bot's config. + + Called by a config-requiring function, such as get_site(), when config + has not been loaded. This will usually happen only if we're running + code directly from Python's interpreter and not the bot itself, because + bot.py and earwigbot.runner will already call these functions. + """ + is_encrypted = config.load() + if is_encrypted: # Passwords in the config file are encrypted + key = getpass("Enter key to unencrypt bot passwords: ") + config._decryption_key = key + config.decrypt(config.wiki, "password") + + def _get_cookiejar(self): + """Return a LWPCookieJar object loaded from our .cookies file. + + The same .cookies file is returned every time, located in the project + root, same directory as config.yml and bot.py. If it doesn't exist, we + will create the file and set it to be readable and writeable only by + us. If it exists but the information inside is bogus, we'll ignore it. + + This is normally called by _make_site_object() (in turn called by + get_site()), and the cookiejar is passed to our Site's constructor, + used when it makes API queries. This way, we can easily preserve + cookies between sites (e.g., for CentralAuth), making logins easier. + """ + if self._cookiejar: + return self._cookiejar + + cookie_file = path.join(config.root_dir, ".cookies") + self._cookiejar = LWPCookieJar(cookie_file) + + try: + self._cookiejar.load() + except LoadError: + pass # File contains bad data, so ignore it completely + except IOError as e: + if e.errno == errno.ENOENT: # "No such file or directory" + # Create the file and restrict reading/writing only to the + # owner, so others can't peak at our cookies: + open(cookie_file, "w").close() + chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) + else: + raise + + return self._cookiejar + + def _create_sitesdb(self): + """Initialize the sitesdb file with its three necessary tables.""" + script = """ + CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, + site_article_path, site_script_path); + CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value); + CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name); + """ + with sqlite.connect(self._sitesdb) as conn: + conn.executescript(script) + + def _load_site_from_sitesdb(self, name): + """Return all information stored in the sitesdb relating to given site. + + The information will be returned as a tuple, containing the site's + name, project, language, base URL, article path, script path, SQL + connection data, and namespaces, in that order. If the site is not + found in the database, SiteNotFoundError will be raised. An empty + database will be created before the exception is raised if none exists. + """ + query1 = "SELECT * FROM sites WHERE site_name = ?" + query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" + query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?" + error = "Site '{0}' not found in the sitesdb.".format(name) + with sqlite.connect(self._sitesdb) as conn: + try: + site_data = conn.execute(query1, (name,)).fetchone() + except sqlite.OperationalError: + self._create_sitesdb() + raise SiteNotFoundError(error) + if not site_data: + raise SiteNotFoundError(error) + sql_data = conn.execute(query2, (name,)).fetchall() + ns_data = conn.execute(query3, (name,)).fetchall() + + name, project, lang, base_url, article_path, script_path = site_data + sql = dict(sql_data) + namespaces = {} + for ns_id, ns_name, ns_is_primary_name in ns_data: + try: + if ns_is_primary_name: # "Primary" name goes first in list + namespaces[ns_id].insert(0, ns_name) + else: # Ordering of the aliases doesn't matter + namespaces[ns_id].append(ns_name) + except KeyError: + namespaces[ns_id] = [ns_name] + + return (name, project, lang, base_url, article_path, script_path, sql, + namespaces) + + def _make_site_object(self, name): + """Return a Site object associated with the site 'name' in our sitesdb. + + This calls _load_site_from_sitesdb(), so SiteNotFoundError will be + raised if the site is not in our sitesdb. + """ + (name, project, lang, base_url, article_path, script_path, sql, + namespaces) = self._load_site_from_sitesdb(name) + login = (config.wiki.get("username"), config.wiki.get("password")) + cookiejar = self._get_cookiejar() + user_agent = config.wiki.get("userAgent") + use_https = config.wiki.get("useHTTPS", False) + assert_edit = config.wiki.get("assert") + maxlag = config.wiki.get("maxlag") + search_config = config.wiki.get("search") + + if user_agent: + user_agent = user_agent.replace("$1", __version__) + user_agent = user_agent.replace("$2", python_version()) + + return Site(name=name, project=project, lang=lang, base_url=base_url, + article_path=article_path, script_path=script_path, + sql=sql, namespaces=namespaces, login=login, + cookiejar=cookiejar, user_agent=user_agent, + use_https=use_https, assert_edit=assert_edit, + maxlag=maxlag, search_config=search_config) + + def _get_site_name_from_sitesdb(self, project, lang): + """Return the name of the first site with the given project and lang. + + If the site is not found, return None. An empty sitesdb will be created + if none exists. + """ + query = "SELECT site_name FROM site WHERE site_project = ? and site_lang = ?" + with sqlite.connect(self._sitesdb) as conn: + try: + return conn.execute(query, (project, lang)).fetchone() + except sqlite.OperationalError: + self._create_sitesdb() + + def _add_site_to_sitesdb(self, site): + """Extract relevant info from a Site object and add it to the sitesdb. + + Works like a reverse _load_site_from_sitesdb(); the site's project, + language, base URL, article path, script path, SQL connection data, and + namespaces are extracted from the site and inserted into the sites + database. If the sitesdb doesn't exist, we'll create it first. + """ + name = site.name() + sites_data = (name, site.project(), site.lang(), site._base_url, + site._article_path, site._script_path) + sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()] + ns_data = [] + for ns_id, ns_names in site._namespaces.iteritems(): + ns_data.append((name, ns_id, ns_names.pop(0), True)) + for ns_name in ns_names: + ns_data.append((name, ns_id, ns_name, False)) + + with sqlite.connect(self._sitesdb) as conn: + check_exists = "SELECT 1 FROM sites WHERE site_name = ?" + try: + exists = conn.execute(check_exists, (name,)).fetchone() + except sqlite.OperationalError: + self._create_sitesdb() + else: + if exists: + conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) + conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) + conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) + conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data) + conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) + conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) + + def _remove_site_from_sitesdb(self, name): + """Remove a site by name from the sitesdb.""" + with sqlite.connect(self._sitesdb) as conn: + cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) + if cursor.rowcount == 0: + return False + else: + conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) + conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) + return True + + def get_site(self, name=None, project=None, lang=None): + """Return a Site instance based on information from the sitesdb. + + With no arguments, return the default site as specified by our config + file. This is config.wiki["defaultSite"]. + + With 'name' specified, return the site with that name. This is + equivalent to the site's 'wikiid' in the API, like 'enwiki'. + + With 'project' and 'lang' specified, return the site whose project and + language match these values. If there are multiple sites with the same + values (unlikely), this is not a reliable way of loading a site. Call + the function with an explicit 'name' in that case. + + We will attempt to login to the site automatically using + config.wiki["username"] and config.wiki["password"] if both are + defined. + + Specifying a project without a lang or a lang without a project will + raise TypeError. If all three args are specified, 'name' will be first + tried, then 'project' and 'lang' if 'name' doesn't work. If a site + cannot be found in the sitesdb, SiteNotFoundError will be raised. An + empty sitesdb will be created if none is found. + """ + if not config.is_loaded(): + self._load_config() + + # Someone specified a project without a lang, or vice versa: + if (project and not lang) or (not project and lang): + e = "Keyword arguments 'lang' and 'project' must be specified together." + raise TypeError(e) + + # No args given, so return our default site: + if not name and not project and not lang: + try: + default = config.wiki["defaultSite"] + except KeyError: + e = "Default site is not specified in config." + raise SiteNotFoundError(e) + return self._make_site_object(default) + + # Name arg given, but don't look at others unless `name` isn't found: + if name: + try: + return self._make_site_object(name) + except SiteNotFoundError: + if project and lang: + name = self._get_site_name_from_sitesdb(project, lang) + if name: + return self._make_site_object(name) + raise + + # If we end up here, then project and lang are the only args given: + name = self._get_site_name_from_sitesdb(project, lang) + if name: + return self._make_site_object(name) + e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) + raise SiteNotFoundError(e) + + def add_site(self, project=None, lang=None, base_url=None, + script_path="/w", sql=None): + """Add a site to the sitesdb so it can be retrieved with get_site(). + + If only a project and a lang are given, we'll guess the base_url as + "//{lang}.{project}.org" (which is protocol-relative, becoming 'https' + if 'useHTTPS' is True in config otherwise 'http'). If this is wrong, + provide the correct base_url as an argument (in which case project and + lang are ignored). Most wikis use "/w" as the script path (meaning the + API is located at "{base_url}{script_path}/api.php" -> + "//{lang}.{project}.org/w/api.php"), so this is the default. If your + wiki is different, provide the script_path as an argument. The only + other argument to Site() that we can't get from config files or by + querying the wiki itself is SQL connection info, so provide a dict of + kwargs as `sql` and Site will pass it to oursql.connect(**sql), + allowing you to make queries with site.sql_query(). + + Returns True if the site was added successfully or False if the site is + already in our sitesdb (this can be done purposefully to update old + site info). Raises SiteNotFoundError if not enough information has + been provided to identify the site (e.g. a project but not a lang). + """ + if not config.is_loaded(): + self._load_config() + + if not base_url: + if not project or not lang: + e = "Without a base_url, both a project and a lang must be given." + raise SiteNotFoundError(e) + base_url = "//{0}.{1}.org".format(lang, project) + + login = (config.wiki.get("username"), config.wiki.get("password")) + cookiejar = self._get_cookiejar() + user_agent = config.wiki.get("userAgent") + use_https = config.wiki.get("useHTTPS", False) + assert_edit = config.wiki.get("assert") + maxlag = config.wiki.get("maxlag") + search_config = config.wiki.get("search") + + # Create a temp Site object to log in and load the other attributes: + site = Site(base_url=base_url, script_path=script_path, sql=sql, + login=login, cookiejar=cookiejar, user_agent=user_agent, + use_https=use_https, assert_edit=assert_edit, + maxlag=maxlag, search_config=search_config) + + self._add_site_to_sitesdb(site) + return site + + def remove_site(self, name=None, project=None, lang=None): + """Remove a site from the sitesdb. + + Returns True if the site was removed successfully or False if the site + was not in our sitesdb originally. If all three args (name, project, + and lang) are given, we'll first try 'name' and then try the latter two + if 'name' wasn't found in the database. Raises TypeError if a project + was given but not a language, or vice versa. Will create an empty + sitesdb if none was found. + """ + if not config.is_loaded(): + self._load_config() + + # Someone specified a project without a lang, or vice versa: + if (project and not lang) or (not project and lang): + e = "Keyword arguments 'lang' and 'project' must be specified together." + raise TypeError(e) + + if name: + was_removed = self._remove_site_from_sitesdb(name) + if not was_removed: + if project and lang: + name = self._get_site_name_from_sitesdb(project, lang) + if name: + return self._remove_site_from_sitesdb(name) + return was_removed + + if project and lang: + name = self._get_site_name_from_sitesdb(project, lang) + if name: + return self._remove_site_from_sitesdb(name) + + return False + +_root = path.split(path.split(path.dirname(path.abspath(__file__)))[0])[0] +_dbfile = path.join(_root, "sites.db") +_manager = SitesDBManager(_dbfile) +del _root, _dbfile + +get_site = _manager.get_site +add_site = _manager.add_site +remove_site = _manager.remove_site