diff --git a/.gitignore b/.gitignore index 5c965b9..d2b75fb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,19 +1,11 @@ -# Ignore python bytecode: -*.pyc - -# Ignore bot-specific config file: -config.yml - -# Ignore logs directory: +# Ignore bot-specific files: logs/ - -# Ignore cookies file: +config.yml +sites.db .cookies -# Ignore OS X's crud: -.DS_Store +# Ignore python bytecode: +*.pyc -# Ignore pydev's nonsense: -.project -.pydevproject -.settings/ +# Ignore OS X's stuff: +.DS_Store diff --git a/earwigbot/config.py b/earwigbot/config.py index e0ef26a..f1a977c 100644 --- a/earwigbot/config.py +++ b/earwigbot/config.py @@ -176,7 +176,7 @@ class _BotConfig(object): return self._root_dir @property - def config_path(self): + def path(self): return self._config_path @property diff --git a/earwigbot/irc/watcher.py b/earwigbot/irc/watcher.py index f3731a7..ad206d6 100644 --- a/earwigbot/irc/watcher.py +++ b/earwigbot/irc/watcher.py @@ -89,7 +89,7 @@ class Watcher(IRCConnection): return module = imp.new_module("_rc_event_processing_rules") try: - exec compile(rules, config.config_path, "exec") in module.__dict__ + exec compile(rules, config.path, "exec") in module.__dict__ except Exception: e = "Could not compile config file's RC event rules" self.logger.exception(e) diff --git a/earwigbot/wiki/__init__.py b/earwigbot/wiki/__init__.py index 03a8e9e..e48be82 100644 --- a/earwigbot/wiki/__init__.py +++ b/earwigbot/wiki/__init__.py @@ -36,9 +36,9 @@ logger.addHandler(_log.NullHandler()) from earwigbot.wiki.constants import * from earwigbot.wiki.exceptions import * -from earwigbot.wiki.functions import * from earwigbot.wiki.category import Category from earwigbot.wiki.page import Page from earwigbot.wiki.site import Site +from earwigbot.wiki.sitesdb import get_site, add_site, remove_site from earwigbot.wiki.user import User diff --git a/earwigbot/wiki/functions.py b/earwigbot/wiki/functions.py deleted file mode 100644 index 5504306..0000000 --- a/earwigbot/wiki/functions.py +++ /dev/null @@ -1,211 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 by Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -""" -EarwigBot's Wiki Toolset: Misc Functions - -This module, a component of the wiki package, contains miscellaneous functions -that are not methods of any class, like get_site(). - -There's no need to import this module explicitly. All functions here are -automatically available from earwigbot.wiki. -""" - -from cookielib import LWPCookieJar, LoadError -import errno -from getpass import getpass -from os import chmod, path -import platform -import stat - -import earwigbot -from earwigbot.config import config -from earwigbot.wiki.exceptions import SiteNotFoundError -from earwigbot.wiki.site import Site - -__all__ = ["get_site", "add_site", "del_site"] - -_cookiejar = None - -def _load_config(): - """Called by a config-requiring function, such as get_site(), when config - has not been loaded. This will usually happen only if we're running code - directly from Python's interpreter and not the bot itself, because - earwigbot.py or core/main.py will already call these functions. - """ - is_encrypted = config.load() - if is_encrypted: # Passwords in the config file are encrypted - key = getpass("Enter key to unencrypt bot passwords: ") - config._decryption_key = key - config.decrypt(config.wiki, "password") - -def _get_cookiejar(): - """Returns a LWPCookieJar object loaded from our .cookies file. The same - one is returned every time. - - The .cookies file is located in the project root, same directory as - config.yml and bot.py. If it doesn't exist, we will create the file and set - it to be readable and writeable only by us. If it exists but the - information inside is bogus, we will ignore it. - - This is normally called by _get_site_object_from_dict() (in turn called by - get_site()), and the cookiejar is passed to our Site's constructor, used - when it makes API queries. This way, we can easily preserve cookies between - sites (e.g., for CentralAuth), making logins easier. - """ - global _cookiejar - if _cookiejar is not None: - return _cookiejar - - cookie_file = path.join(config.root_dir, ".cookies") - _cookiejar = LWPCookieJar(cookie_file) - - try: - _cookiejar.load() - except LoadError: - pass # File contains bad data, so ignore it completely - except IOError as e: - if e.errno == errno.ENOENT: # "No such file or directory" - # Create the file and restrict reading/writing only to the owner, - # so others can't peak at our cookies: - open(cookie_file, "w").close() - chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) - else: - raise - - return _cookiejar - -def _get_site_object_from_dict(name, d): - """Return a Site object based on the contents of a dict, probably acquired - through our config file, and a separate name. - """ - project = d.get("project") - lang = d.get("lang") - base_url = d.get("baseURL") - article_path = d.get("articlePath") - script_path = d.get("scriptPath") - sql = d.get("sql", {}) - namespaces = d.get("namespaces", {}) - login = (config.wiki.get("username"), config.wiki.get("password")) - cookiejar = _get_cookiejar() - user_agent = config.wiki.get("userAgent") - assert_edit = config.wiki.get("assert") - maxlag = config.wiki.get("maxlag") - search_config = config.wiki.get("search") - - if user_agent: - user_agent = user_agent.replace("$1", earwigbot.__version__) - user_agent = user_agent.replace("$2", platform.python_version()) - - return Site(name=name, project=project, lang=lang, base_url=base_url, - article_path=article_path, script_path=script_path, sql=sql, - namespaces=namespaces, login=login, cookiejar=cookiejar, - user_agent=user_agent, assert_edit=assert_edit, maxlag=maxlag, - search_config=search_config) - -def get_site(name=None, project=None, lang=None): - """Returns a Site instance based on information from our config file. - - With no arguments, returns the default site as specified by our config - file. This is default = config.wiki["defaultSite"]; - config.wiki["sites"][default]. - - With `name` specified, returns the site specified by - config.wiki["sites"][name]. - - With `project` and `lang` specified, returns the site specified by the - member of config.wiki["sites"], `s`, for which s["project"] == project and - s["lang"] == lang. - - We will attempt to login to the site automatically - using config.wiki["username"] and config.wiki["password"] if both are - defined. - - Specifying a project without a lang or a lang without a project will raise - TypeError. If all three args are specified, `name` will be first tried, - then `project` and `lang`. If, with any number of args, a site cannot be - found in the config, SiteNotFoundError is raised. - """ - # Check if config has been loaded, and load it if it hasn't: - if not config.is_loaded(): - _load_config() - - # Someone specified a project without a lang (or a lang without a project)! - if (project is None and lang is not None) or (project is not None and - lang is None): - e = "Keyword arguments 'lang' and 'project' must be specified together." - raise TypeError(e) - - # No args given, so return our default site (project is None implies lang - # is None, so we don't need to add that in): - if name is None and project is None: - try: - default = config.wiki["defaultSite"] - except KeyError: - e = "Default site is not specified in config." - raise SiteNotFoundError(e) - try: - site = config.wiki["sites"][default] - except KeyError: - e = "Default site specified by config is not in the config's sites list." - raise SiteNotFoundError(e) - return _get_site_object_from_dict(default, site) - - # Name arg given, but don't look at others unless `name` isn't found: - if name is not None: - try: - site = config.wiki["sites"][name] - except KeyError: - if project is None: # Implies lang is None, so only name was given - e = "Site '{0}' not found in config.".format(name) - raise SiteNotFoundError(e) - for sitename, site in config.wiki["sites"].items(): - if site["project"] == project and site["lang"] == lang: - return _get_site_object_from_dict(sitename, site) - e = "Neither site '{0}' nor site '{1}:{2}' found in config." - e.format(name, project, lang) - raise SiteNotFoundError(e) - else: - return _get_site_object_from_dict(name, site) - - # If we end up here, then project and lang are both not None: - for sitename, site in config.wiki["sites"].items(): - if site["project"] == project and site["lang"] == lang: - return _get_site_object_from_dict(sitename, site) - e = "Site '{0}:{1}' not found in config.".format(project, lang) - raise SiteNotFoundError(e) - -def add_site(): - """STUB: config editing is required first. - - Returns True if the site was added successfully or False if the site was - already in our config. Raises ConfigError if saving the updated file failed - for some reason.""" - pass - -def del_site(name): - """STUB: config editing is required first. - - Returns True if the site was removed successfully or False if the site was - not in our config originally. Raises ConfigError if saving the updated file - failed for some reason.""" - pass diff --git a/earwigbot/wiki/page.py b/earwigbot/wiki/page.py index 8407108..dfd5268 100644 --- a/earwigbot/wiki/page.py +++ b/earwigbot/wiki/page.py @@ -174,7 +174,7 @@ class Page(CopyrightMixin): Assuming the API is sound, this should not raise any exceptions. """ - if result is None: + if not result: params = {"action": "query", "rvprop": "user", "intoken": "edit", "prop": "info|revisions", "rvlimit": 1, "rvdir": "newer", "titles": self._title, "inprop": "protection|url"} @@ -240,7 +240,7 @@ class Page(CopyrightMixin): Don't call this directly, ever - use .get(force=True) if you want to force content reloading. """ - if result is None: + if not result: params = {"action": "query", "prop": "revisions", "rvlimit": 1, "rvprop": "content|timestamp", "titles": self._title} result = self._site._api_query(params) @@ -471,7 +471,7 @@ class Page(CopyrightMixin): """ if force: self._load_wrapper() - if self._fullurl is not None: + if self._fullurl: return self._fullurl else: slug = quote(self._title.replace(" ", "_"), safe="/:") diff --git a/earwigbot/wiki/site.py b/earwigbot/wiki/site.py index 8719036..5c0b1c7 100644 --- a/earwigbot/wiki/site.py +++ b/earwigbot/wiki/site.py @@ -71,18 +71,19 @@ class Site(object): def __init__(self, name=None, project=None, lang=None, base_url=None, article_path=None, script_path=None, sql=None, namespaces=None, login=(None, None), cookiejar=None, - user_agent=None, assert_edit=None, maxlag=None, - search_config=(None, None)): + user_agent=None, use_https=False, assert_edit=None, + maxlag=None, search_config=(None, None)): """Constructor for new Site instances. This probably isn't necessary to call yourself unless you're building a Site that's not in your config and you don't want to add it - normally all you need is tools.get_site(name), which creates the Site for you - based on your config file. We accept a bunch of kwargs, but the only - ones you really "need" are `base_url` and `script_path` - this is - enough to figure out an API url. `login`, a tuple of - (username, password), is highly recommended. `cookiejar` will be used - to store cookies, and we'll use a normal CookieJar if none is given. + based on your config file and the sites database. We accept a bunch of + kwargs, but the only ones you really "need" are `base_url` and + `script_path` - this is enough to figure out an API url. `login`, a + tuple of (username, password), is highly recommended. `cookiejar` will + be used to store cookies, and we'll use a normal CookieJar if none is + given. First, we'll store the given arguments as attributes, then set up our URL opener. We'll load any of the attributes that weren't given from @@ -99,7 +100,8 @@ class Site(object): self._script_path = script_path self._namespaces = namespaces - # Attributes used for API queries: + # Attributes used for API queries: + self._use_https = use_https self._assert_edit = assert_edit self._maxlag = maxlag self._max_retries = 5 @@ -112,11 +114,11 @@ class Site(object): self._search_config = search_config # Set up cookiejar and URL opener for making API queries: - if cookiejar is not None: + if cookiejar: self._cookiejar = cookiejar else: self._cookiejar = CookieJar() - if user_agent is None: + if not user_agent: user_agent = USER_AGENT # Set default UA from wiki.constants self._opener = build_opener(HTTPCookieProcessor(self._cookiejar)) self._opener.addheaders = [("User-Agent", user_agent), @@ -127,9 +129,9 @@ class Site(object): # If we have a name/pass and the API says we're not logged in, log in: self._login_info = name, password = login - if name is not None and password is not None: + if name and password: logged_in_as = self._get_username_from_cookies() - if logged_in_as is None or name != logged_in_as: + if not logged_in_as or name != logged_in_as: self._login(login) def __repr__(self): @@ -137,10 +139,10 @@ class Site(object): res = ", ".join(( "Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", "base_url={_base_url!r}", "article_path={_article_path!r}", - "script_path={_script_path!r}", "assert_edit={_assert_edit!r}", - "maxlag={_maxlag!r}", "sql={_sql!r}", "login={0}", - "user_agent={2!r}", "cookiejar={1})" - )) + "script_path={_script_path!r}", "use_https={_use_https!r}", + "assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}", + "sql={_sql_data!r}", "login={0}", "user_agent={2!r}", + "cookiejar={1})")) name, password = self._login_info login = "({0}, {1})".format(repr(name), "hidden" if password else None) cookies = self._cookiejar.__class__.__name__ @@ -162,7 +164,9 @@ class Site(object): This will first attempt to construct an API url from self._base_url and self._script_path. We need both of these, or else we'll raise - SiteAPIError. + SiteAPIError. If self._base_url is protocol-relative (introduced in + MediaWiki 1.18), we'll choose HTTPS if self._user_https is True, + otherwise HTTP. We'll encode the given params, adding format=json along the way, as well as &assert= and &maxlag= based on self._assert_edit and _maxlag. @@ -180,11 +184,17 @@ class Site(object): There's helpful MediaWiki API documentation at . """ - if self._base_url is None or self._script_path is None: + if not self._base_url or self._script_path is None: e = "Tried to do an API query, but no API URL is known." raise SiteAPIError(e) - url = ''.join((self._base_url, self._script_path, "/api.php")) + base_url = self._base_url + if base_url.startswith("//"): # Protocol-relative URLs from 1.18 + if self._use_https: + base_url = "https:" + base_url + else: + base_url = "http:" + base_url + url = ''.join((base_url, self._script_path, "/api.php")) params["format"] = "json" # This is the only format we understand if self._assert_edit: # If requested, ensure that we're logged in @@ -193,7 +203,6 @@ class Site(object): params["maxlag"] = self._maxlag data = urlencode(params) - logger.debug("{0} -> {1}".format(url, data)) try: @@ -332,15 +341,15 @@ class Site(object): name = ''.join((self._name, "Token")) cookie = self._get_cookie(name, domain) - if cookie is not None: + if cookie: name = ''.join((self._name, "UserName")) user_name = self._get_cookie(name, domain) - if user_name is not None: + if user_name: return user_name.value name = "centralauth_Token" for cookie in self._cookiejar: - if cookie.domain_initial_dot is False or cookie.is_expired(): + if not cookie.domain_initial_dot or cookie.is_expired(): continue if cookie.name != name: continue @@ -348,7 +357,7 @@ class Site(object): search = ''.join(("(.*?)", re_escape(cookie.domain))) if re_match(search, domain): # Test it against our site user_name = self._get_cookie("centralauth_User", cookie.domain) - if user_name is not None: + if user_name: return user_name.value def _get_username_from_api(self): @@ -378,7 +387,7 @@ class Site(object): single API query for our username (or IP address) and return that. """ name = self._get_username_from_cookies() - if name is not None: + if name: return name return self._get_username_from_api() @@ -417,7 +426,7 @@ class Site(object): """ name, password = login params = {"action": "login", "lgname": name, "lgpassword": password} - if token is not None: + if token: params["lgtoken"] = token result = self._api_query(params) res = result["login"]["result"] @@ -455,10 +464,9 @@ class Site(object): def _sql_connect(self, **kwargs): """Attempt to establish a connection with this site's SQL database. - oursql.connect() will be called with self._sql_data as its kwargs, - which is usually config.wiki["sites"][self.name()]["sql"]. Any kwargs - given to this function will be passed to connect() and will have - precedence over the config file. + oursql.connect() will be called with self._sql_data as its kwargs. + Any kwargs given to this function will be passed to connect() and will + have precedence over the config file. Will raise SQLError() if the module "oursql" is not available. oursql may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot @@ -631,6 +639,6 @@ class Site(object): If `username` is left as None, then a User object representing the currently logged-in (or anonymous!) user is returned. """ - if username is None: + if not username: username = self._get_username() return User(self, username) diff --git a/earwigbot/wiki/sitesdb.py b/earwigbot/wiki/sitesdb.py new file mode 100644 index 0000000..0bd5c76 --- /dev/null +++ b/earwigbot/wiki/sitesdb.py @@ -0,0 +1,392 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 by Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from cookielib import LWPCookieJar, LoadError +import errno +from getpass import getpass +from os import chmod, path +from platform import python_version +import stat +import sqlite3 as sqlite + +from earwigbot import __version__ +from earwigbot.config import config +from earwigbot.wiki.exceptions import SiteNotFoundError +from earwigbot.wiki.site import Site + +__all__ = ["SitesDBManager", "get_site", "add_site", "remove_site"] + +class SitesDBManager(object): + """ + EarwigBot's Wiki Toolset: Sites Database Manager + + This class controls the sites.db file, which stores information about all + wiki sites known to the bot. Three public methods act as bridges between + the bot's config files and Site objects: + get_site -- returns a Site object corresponding to a given site name + add_site -- stores a site in the database, given connection info + remove_site -- removes a site from the database, given its name + + There's usually no need to use this class directly. All public methods + here are available as earwigbot.wiki.get_site(), earwigbot.wiki.add_site(), + and earwigbot.wiki.remove_site(), which use a sites.db file located in the + same directory as our config.yml file. Lower-level access can be achieved + by importing the manager class + (`from earwigbot.wiki.sitesdb import SitesDBManager`). + """ + + def __init__(self, db_file): + """Set up the manager with an attribute for the sitesdb filename.""" + self._cookiejar = None + self._sitesdb = db_file + + def _load_config(self): + """Load the bot's config. + + Called by a config-requiring function, such as get_site(), when config + has not been loaded. This will usually happen only if we're running + code directly from Python's interpreter and not the bot itself, because + bot.py and earwigbot.runner will already call these functions. + """ + is_encrypted = config.load() + if is_encrypted: # Passwords in the config file are encrypted + key = getpass("Enter key to unencrypt bot passwords: ") + config._decryption_key = key + config.decrypt(config.wiki, "password") + + def _get_cookiejar(self): + """Return a LWPCookieJar object loaded from our .cookies file. + + The same .cookies file is returned every time, located in the project + root, same directory as config.yml and bot.py. If it doesn't exist, we + will create the file and set it to be readable and writeable only by + us. If it exists but the information inside is bogus, we'll ignore it. + + This is normally called by _make_site_object() (in turn called by + get_site()), and the cookiejar is passed to our Site's constructor, + used when it makes API queries. This way, we can easily preserve + cookies between sites (e.g., for CentralAuth), making logins easier. + """ + if self._cookiejar: + return self._cookiejar + + cookie_file = path.join(config.root_dir, ".cookies") + self._cookiejar = LWPCookieJar(cookie_file) + + try: + self._cookiejar.load() + except LoadError: + pass # File contains bad data, so ignore it completely + except IOError as e: + if e.errno == errno.ENOENT: # "No such file or directory" + # Create the file and restrict reading/writing only to the + # owner, so others can't peak at our cookies: + open(cookie_file, "w").close() + chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) + else: + raise + + return self._cookiejar + + def _create_sitesdb(self): + """Initialize the sitesdb file with its three necessary tables.""" + script = """ + CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, + site_article_path, site_script_path); + CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value); + CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name); + """ + with sqlite.connect(self._sitesdb) as conn: + conn.executescript(script) + + def _load_site_from_sitesdb(self, name): + """Return all information stored in the sitesdb relating to given site. + + The information will be returned as a tuple, containing the site's + name, project, language, base URL, article path, script path, SQL + connection data, and namespaces, in that order. If the site is not + found in the database, SiteNotFoundError will be raised. An empty + database will be created before the exception is raised if none exists. + """ + query1 = "SELECT * FROM sites WHERE site_name = ?" + query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" + query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?" + error = "Site '{0}' not found in the sitesdb.".format(name) + with sqlite.connect(self._sitesdb) as conn: + try: + site_data = conn.execute(query1, (name,)).fetchone() + except sqlite.OperationalError: + self._create_sitesdb() + raise SiteNotFoundError(error) + if not site_data: + raise SiteNotFoundError(error) + sql_data = conn.execute(query2, (name,)).fetchall() + ns_data = conn.execute(query3, (name,)).fetchall() + + name, project, lang, base_url, article_path, script_path = site_data + sql = dict(sql_data) + namespaces = {} + for ns_id, ns_name, ns_is_primary_name in ns_data: + try: + if ns_is_primary_name: # "Primary" name goes first in list + namespaces[ns_id].insert(0, ns_name) + else: # Ordering of the aliases doesn't matter + namespaces[ns_id].append(ns_name) + except KeyError: + namespaces[ns_id] = [ns_name] + + return (name, project, lang, base_url, article_path, script_path, sql, + namespaces) + + def _make_site_object(self, name): + """Return a Site object associated with the site 'name' in our sitesdb. + + This calls _load_site_from_sitesdb(), so SiteNotFoundError will be + raised if the site is not in our sitesdb. + """ + (name, project, lang, base_url, article_path, script_path, sql, + namespaces) = self._load_site_from_sitesdb(name) + login = (config.wiki.get("username"), config.wiki.get("password")) + cookiejar = self._get_cookiejar() + user_agent = config.wiki.get("userAgent") + use_https = config.wiki.get("useHTTPS", False) + assert_edit = config.wiki.get("assert") + maxlag = config.wiki.get("maxlag") + search_config = config.wiki.get("search") + + if user_agent: + user_agent = user_agent.replace("$1", __version__) + user_agent = user_agent.replace("$2", python_version()) + + return Site(name=name, project=project, lang=lang, base_url=base_url, + article_path=article_path, script_path=script_path, + sql=sql, namespaces=namespaces, login=login, + cookiejar=cookiejar, user_agent=user_agent, + use_https=use_https, assert_edit=assert_edit, + maxlag=maxlag, search_config=search_config) + + def _get_site_name_from_sitesdb(self, project, lang): + """Return the name of the first site with the given project and lang. + + If the site is not found, return None. An empty sitesdb will be created + if none exists. + """ + query = "SELECT site_name FROM site WHERE site_project = ? and site_lang = ?" + with sqlite.connect(self._sitesdb) as conn: + try: + return conn.execute(query, (project, lang)).fetchone() + except sqlite.OperationalError: + self._create_sitesdb() + + def _add_site_to_sitesdb(self, site): + """Extract relevant info from a Site object and add it to the sitesdb. + + Works like a reverse _load_site_from_sitesdb(); the site's project, + language, base URL, article path, script path, SQL connection data, and + namespaces are extracted from the site and inserted into the sites + database. If the sitesdb doesn't exist, we'll create it first. + """ + name = site.name() + sites_data = (name, site.project(), site.lang(), site._base_url, + site._article_path, site._script_path) + sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()] + ns_data = [] + for ns_id, ns_names in site._namespaces.iteritems(): + ns_data.append((name, ns_id, ns_names.pop(0), True)) + for ns_name in ns_names: + ns_data.append((name, ns_id, ns_name, False)) + + with sqlite.connect(self._sitesdb) as conn: + check_exists = "SELECT 1 FROM sites WHERE site_name = ?" + try: + exists = conn.execute(check_exists, (name,)).fetchone() + except sqlite.OperationalError: + self._create_sitesdb() + else: + if exists: + conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) + conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) + conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) + conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data) + conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) + conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) + + def _remove_site_from_sitesdb(self, name): + """Remove a site by name from the sitesdb.""" + with sqlite.connect(self._sitesdb) as conn: + cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) + if cursor.rowcount == 0: + return False + else: + conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) + conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) + return True + + def get_site(self, name=None, project=None, lang=None): + """Return a Site instance based on information from the sitesdb. + + With no arguments, return the default site as specified by our config + file. This is config.wiki["defaultSite"]. + + With 'name' specified, return the site with that name. This is + equivalent to the site's 'wikiid' in the API, like 'enwiki'. + + With 'project' and 'lang' specified, return the site whose project and + language match these values. If there are multiple sites with the same + values (unlikely), this is not a reliable way of loading a site. Call + the function with an explicit 'name' in that case. + + We will attempt to login to the site automatically using + config.wiki["username"] and config.wiki["password"] if both are + defined. + + Specifying a project without a lang or a lang without a project will + raise TypeError. If all three args are specified, 'name' will be first + tried, then 'project' and 'lang' if 'name' doesn't work. If a site + cannot be found in the sitesdb, SiteNotFoundError will be raised. An + empty sitesdb will be created if none is found. + """ + if not config.is_loaded(): + self._load_config() + + # Someone specified a project without a lang, or vice versa: + if (project and not lang) or (not project and lang): + e = "Keyword arguments 'lang' and 'project' must be specified together." + raise TypeError(e) + + # No args given, so return our default site: + if not name and not project and not lang: + try: + default = config.wiki["defaultSite"] + except KeyError: + e = "Default site is not specified in config." + raise SiteNotFoundError(e) + return self._make_site_object(default) + + # Name arg given, but don't look at others unless `name` isn't found: + if name: + try: + return self._make_site_object(name) + except SiteNotFoundError: + if project and lang: + name = self._get_site_name_from_sitesdb(project, lang) + if name: + return self._make_site_object(name) + raise + + # If we end up here, then project and lang are the only args given: + name = self._get_site_name_from_sitesdb(project, lang) + if name: + return self._make_site_object(name) + e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) + raise SiteNotFoundError(e) + + def add_site(self, project=None, lang=None, base_url=None, + script_path="/w", sql=None): + """Add a site to the sitesdb so it can be retrieved with get_site(). + + If only a project and a lang are given, we'll guess the base_url as + "//{lang}.{project}.org" (which is protocol-relative, becoming 'https' + if 'useHTTPS' is True in config otherwise 'http'). If this is wrong, + provide the correct base_url as an argument (in which case project and + lang are ignored). Most wikis use "/w" as the script path (meaning the + API is located at "{base_url}{script_path}/api.php" -> + "//{lang}.{project}.org/w/api.php"), so this is the default. If your + wiki is different, provide the script_path as an argument. The only + other argument to Site() that we can't get from config files or by + querying the wiki itself is SQL connection info, so provide a dict of + kwargs as `sql` and Site will pass it to oursql.connect(**sql), + allowing you to make queries with site.sql_query(). + + Returns True if the site was added successfully or False if the site is + already in our sitesdb (this can be done purposefully to update old + site info). Raises SiteNotFoundError if not enough information has + been provided to identify the site (e.g. a project but not a lang). + """ + if not config.is_loaded(): + self._load_config() + + if not base_url: + if not project or not lang: + e = "Without a base_url, both a project and a lang must be given." + raise SiteNotFoundError(e) + base_url = "//{0}.{1}.org".format(lang, project) + + login = (config.wiki.get("username"), config.wiki.get("password")) + cookiejar = self._get_cookiejar() + user_agent = config.wiki.get("userAgent") + use_https = config.wiki.get("useHTTPS", False) + assert_edit = config.wiki.get("assert") + maxlag = config.wiki.get("maxlag") + search_config = config.wiki.get("search") + + # Create a temp Site object to log in and load the other attributes: + site = Site(base_url=base_url, script_path=script_path, sql=sql, + login=login, cookiejar=cookiejar, user_agent=user_agent, + use_https=use_https, assert_edit=assert_edit, + maxlag=maxlag, search_config=search_config) + + self._add_site_to_sitesdb(site) + return site + + def remove_site(self, name=None, project=None, lang=None): + """Remove a site from the sitesdb. + + Returns True if the site was removed successfully or False if the site + was not in our sitesdb originally. If all three args (name, project, + and lang) are given, we'll first try 'name' and then try the latter two + if 'name' wasn't found in the database. Raises TypeError if a project + was given but not a language, or vice versa. Will create an empty + sitesdb if none was found. + """ + if not config.is_loaded(): + self._load_config() + + # Someone specified a project without a lang, or vice versa: + if (project and not lang) or (not project and lang): + e = "Keyword arguments 'lang' and 'project' must be specified together." + raise TypeError(e) + + if name: + was_removed = self._remove_site_from_sitesdb(name) + if not was_removed: + if project and lang: + name = self._get_site_name_from_sitesdb(project, lang) + if name: + return self._remove_site_from_sitesdb(name) + return was_removed + + if project and lang: + name = self._get_site_name_from_sitesdb(project, lang) + if name: + return self._remove_site_from_sitesdb(name) + + return False + +_root = path.split(path.split(path.dirname(path.abspath(__file__)))[0])[0] +_dbfile = path.join(_root, "sites.db") +_manager = SitesDBManager(_dbfile) +del _root, _dbfile + +get_site = _manager.get_site +add_site = _manager.add_site +remove_site = _manager.remove_site