Browse Source

Re-organize SitesDB code; protocol-relative URLs

tags/v0.1^2
Ben Kurtovic 13 years ago
parent
commit
7edfb0b1af
7 changed files with 420 additions and 396 deletions
  1. +7
    -15
      .gitignore
  2. +1
    -1
      earwigbot/config.py
  3. +1
    -1
      earwigbot/irc/watcher.py
  4. +1
    -1
      earwigbot/wiki/__init__.py
  5. +0
    -368
      earwigbot/wiki/functions.py
  6. +18
    -10
      earwigbot/wiki/site.py
  7. +392
    -0
      earwigbot/wiki/sitesdb.py

+ 7
- 15
.gitignore View File

@@ -1,19 +1,11 @@
# Ignore python bytecode:
*.pyc

# Ignore bot-specific config file:
config.yml

# Ignore logs directory:
# Ignore bot-specific files:
logs/
# Ignore cookies file:
config.yml
sites.db
.cookies

# Ignore OS X's crud:
.DS_Store
# Ignore python bytecode:
*.pyc

# Ignore pydev's nonsense:
.project
.pydevproject
.settings/
# Ignore OS X's stuff:
.DS_Store

+ 1
- 1
earwigbot/config.py View File

@@ -176,7 +176,7 @@ class _BotConfig(object):
return self._root_dir

@property
def config_path(self):
def path(self):
return self._config_path

@property


+ 1
- 1
earwigbot/irc/watcher.py View File

@@ -89,7 +89,7 @@ class Watcher(IRCConnection):
return
module = imp.new_module("_rc_event_processing_rules")
try:
exec compile(rules, config.config_path, "exec") in module.__dict__
exec compile(rules, config.path, "exec") in module.__dict__
except Exception:
e = "Could not compile config file's RC event rules"
self.logger.exception(e)


+ 1
- 1
earwigbot/wiki/__init__.py View File

@@ -36,9 +36,9 @@ logger.addHandler(_log.NullHandler())

from earwigbot.wiki.constants import *
from earwigbot.wiki.exceptions import *
from earwigbot.wiki.functions import *

from earwigbot.wiki.category import Category
from earwigbot.wiki.page import Page
from earwigbot.wiki.site import Site
from earwigbot.wiki.sitesdb import get_site, add_site, remove_site
from earwigbot.wiki.user import User

+ 0
- 368
earwigbot/wiki/functions.py View File

@@ -1,368 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 by Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""
EarwigBot's Wiki Toolset: Misc Functions

This module, a component of the wiki package, contains miscellaneous functions
that are not methods of any class. Currently, it contains get_site(),
add_site(), and remove_site(). These functions act as bridges between the bot's
config files and Site objects.

There's no need to import this module explicitly. All functions here are
automatically available from earwigbot.wiki.
"""

from cookielib import LWPCookieJar, LoadError
import errno
from getpass import getpass
from os import chmod, path
import platform
import stat
import sqlite3 as sqlite

from earwigbot import __version__
from earwigbot.config import config
from earwigbot.wiki.exceptions import SiteNotFoundError
from earwigbot.wiki.site import Site

__all__ = ["get_site", "add_site", "remove_site"]

_cookiejar = None
_sitesdb = "sites.db"

def _load_config():
"""Load the bot's config.

Called by a config-requiring function, such as get_site(), when config
has not been loaded. This will usually happen only if we're running code
directly from Python's interpreter and not the bot itself, because
bot.py and earwigbot.runner will already call these functions.
"""
is_encrypted = config.load()
if is_encrypted: # Passwords in the config file are encrypted
key = getpass("Enter key to unencrypt bot passwords: ")
config._decryption_key = key
config.decrypt(config.wiki, "password")

def _get_cookiejar():
"""Return a LWPCookieJar object loaded from our .cookies file.

The same .cookies file is returned every time, located in the project root,
same directory as config.yml and bot.py. If it doesn't exist, we will
create the file and set it to be readable and writeable only by us. If it
exists but the information inside is bogus, we will ignore it.

This is normally called by _make_site_object() (in turn called by
get_site()), and the cookiejar is passed to our Site's constructor, used
when it makes API queries. This way, we can easily preserve cookies between
sites (e.g., for CentralAuth), making logins easier.
"""
global _cookiejar
if _cookiejar:
return _cookiejar

cookie_file = path.join(config.root_dir, ".cookies")
_cookiejar = LWPCookieJar(cookie_file)

try:
_cookiejar.load()
except LoadError:
pass # File contains bad data, so ignore it completely
except IOError as e:
if e.errno == errno.ENOENT: # "No such file or directory"
# Create the file and restrict reading/writing only to the owner,
# so others can't peak at our cookies:
open(cookie_file, "w").close()
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR)
else:
raise

return _cookiejar

def _create_sitesdb():
"""Initialize the sitesdb file with its three necessary tables."""
script = """
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url,
site_article_path, site_script_path);
CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value);
CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name);
"""
with sqlite.connect(_sitesdb) as conn:
conn.executescript(script)

def _load_site_from_sitesdb(name):
"""Return all information stored in the sitesdb relating to site 'name'.

The information will be returned as a tuple, containing the site's project,
language, base URL, article path, script path, SQL connection data, and
namespaces, in that order. If the site is not found in the database,
SiteNotFoundError will be raised. An empty database will be created before
the exception is raised if none exists.
"""
query1 = "SELECT * FROM sites WHERE site_name = ?"
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?"
query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?"
error = "Site '{0}' not found in the sitesdb.".format(name)
with sqlite.connect(_sitesdb) as conn:
try:
site_data = conn.execute(query1, (name,)).fetchone()
except sqllite.OperationalError:
_create_sitesdb()
raise SiteNotFoundError(error)
if not site_data:
raise SiteNotFoundError(error)
sql_data = conn.execute(query2, (name,)).fetchall()
ns_data = conn.execute(query3, (name,)).fetchall()

project, lang, base_url, article_path, script_path = site_data
sql = dict(sql_data)
namespaces = {}
for ns_id, ns_name, ns_is_primary_name in ns_data:
try:
if ns_is_primary_name: # "Primary" name goes first in list
namespaces[ns_id].insert(0, ns_name)
else: # Ordering of the aliases doesn't matter
namespaces[ns_id].append(ns_name)
except KeyError:
namespaces[ns_id] = [ns_name]

return project, lang, base_url, article_path, script_path, sql, namespaces

def _make_site_object(name):
"""Return a Site object associated with the site 'name' in our sitesdb.

This calls _load_site_from_sitesdb(), so SiteNotFoundError will be raised
if the site is not in our sitesdb.
"""
(project, lang, base_url, article_path, script_path, sql,
namespaces) = _load_site_from_sitesdb(name)
login = (config.wiki.get("username"), config.wiki.get("password"))
cookiejar = _get_cookiejar()
user_agent = config.wiki.get("userAgent")
assert_edit = config.wiki.get("assert")
maxlag = config.wiki.get("maxlag")
search_config = config.wiki.get("search")

if user_agent:
user_agent = user_agent.replace("$1", __version__)
user_agent = user_agent.replace("$2", platform.python_version())

return Site(name=name, project=project, lang=lang, base_url=base_url,
article_path=article_path, script_path=script_path, sql=sql,
namespaces=namespaces, login=login, cookiejar=cookiejar,
user_agent=user_agent, assert_edit=assert_edit, maxlag=maxlag,
search_config=search_config)

def _get_site_name_from_sitesdb(project, lang):
"""Return the name of the first site with the specified project and lang.

If the site is not found, return None. An empty sitesdb will be created if
none exists.
"""
query = "SELECT site_name FROM site WHERE site_project = ? and site_lang = ?"
with sqlite.connect(_sitesdb) as conn:
try:
return conn.execute(query, (project, lang)).fetchone()
except sqllite.OperationalError:
_create_sitesdb()

def _add_site_to_sitesdb(site):
"""Extract relevant info from a Site object and add it to the sitesdb.

Works like a reverse _load_site_from_sitesdb(); the site's project,
language, base URL, article path, script path, SQL connection data, and
namespaces are extracted from the site and inserted into the sites
database. If the sitesdb doesn't exist, we'll create it first.
"""
name = site.name
sites_data = (name, site.project, site.lang, site._base_url,
site._article_path, site._script_path)
sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()]
ns_data = []
for ns_id, ns_names in site._namespaces.iteritems():
ns_data.append((name, ns_id, ns_names.pop(0), True))
for ns_name in ns_names:
ns_data.append((name, ns_id, ns_name, False))

with sqlite.connect(_sitesdb) as conn:
check_exists = "SELECT 1 FROM sites WHERE site_name = ?"
try:
exists = conn.execute(check_exists, (name,)).fetchone()
except sqlite.OperationalError:
_create_sitesdb()
else:
if exists:
conn.execute("DELETE FROM sites WHERE site_name = ?", (name,))
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,))
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,))
conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data)
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data)
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data)

def _remove_site_from_sitesdb(name):
"""Remove a site by name from the sitesdb."""
with sqlite.connect(_sitesdb) as conn:
cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,))
if cursor.rowcount == 0:
return False
else:
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,))
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,))
return True

def get_site(name=None, project=None, lang=None):
"""Return a Site instance based on information from the sitesdb.

With no arguments, return the default site as specified by our config
file. This is config.wiki["defaultSite"].

With 'name' specified, return the site with that name. This is equivalent
to the site's 'wikiid' in the API, like 'enwiki'.

With 'project' and 'lang' specified, return the site whose project and
language match these values. If there are multiple sites with the same
values (unlikely), this is not a reliable way of loading a site. Call the
function with an explicit 'name' in that case.

We will attempt to login to the site automatically using
config.wiki["username"] and config.wiki["password"] if both are defined.

Specifying a project without a lang or a lang without a project will raise
TypeError. If all three args are specified, 'name' will be first tried,
then 'project' and 'lang' if 'name' doesn't work. If a site cannot be found
in the sitesdb, SiteNotFoundError will be raised. An empty sitesdb will be
created if none is found.
"""
if not config.is_loaded():
_load_config()

# Someone specified a project without a lang (or a lang without a project)!
if (project and not lang) or (not project and lang):
e = "Keyword arguments 'lang' and 'project' must be specified together."
raise TypeError(e)

# No args given, so return our default site:
if not name and not project and not lang:
try:
default = config.wiki["defaultSite"]
except KeyError:
e = "Default site is not specified in config."
raise SiteNotFoundError(e)
return _make_site_object(default)

# Name arg given, but don't look at others unless `name` isn't found:
if name:
try:
return _make_site_object(name)
except SiteNotFoundError:
if project and lang:
name = _get_site_name_from_sitesdb(project, lang)
if name:
return _make_site_object(name)
raise

# If we end up here, then project and lang are the only args given:
name = _get_site_name_from_sitesdb(project, lang)
if name:
return _make_site_object(name)
e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang)
raise SiteNotFoundError(e)

def add_site(project=None, lang=None, base_url=None, script_path="/w",
sql=None):
"""Add a site to the sitesdb so it can be retrieved with get_site() later.

If only a project and a lang are given, we'll guess the base_url as
"http://{lang}.{project}.org". If this is wrong, provide the correct
base_url as an argument (in which case project and lang are ignored). Most
wikis use "/w" as the script path (meaning the API is located at
"{base_url}{script_path}/api.php" -> "http://{lang}.{project}.org/w/api.php"),
so this is the default. If your wiki is different, provide the script_path
as an argument. The only other argument to Site() that we can't get from
config files or by querying the wiki itself is SQL connection info, so
provide a dict of kwargs as `sql` and Site will be pass it to
oursql.connect(**sql), allowing you to make queries with site.sql_query().

Returns True if the site was added successfully or False if the site is
already in our sitesdb (this can be done purposefully to update old site
info). Raises SiteNotFoundError if not enough information has been provided
to identify the site (e.g. a project but not a lang).
"""
if not config.is_loaded():
_load_config()

if not base_url:
if not project or not lang:
e = "Without a base_url, both a project and a lang must be given."
raise SiteNotFoundError(e)
base_url = "http://{0}.{1}.org".format(lang, project)

login = (config.wiki.get("username"), config.wiki.get("password"))
cookiejar = _get_cookiejar()
user_agent = config.wiki.get("userAgent")
assert_edit = config.wiki.get("assert")
maxlag = config.wiki.get("maxlag")
search_config = config.wiki.get("search")

# Create a temp Site object to log in and load the other attributes:
site = Site(base_url=base_url, script_path=script_path, sql=sql,
login=login, cookiejar=cookiejar, user_agent=user_agent,
assert_edit=assert_edit, maxlag=maxlag,
search_config=search_config)

_add_site_to_sitesdb(site)
return site

def remove_site(name=None, project=None, lang=None):
"""Remove a site from the sitesdb.

Returns True if the site was removed successfully or False if the site was
not in our sitesdb originally. If all three args (name, project, and lang)
are given, we'll first try 'name' and then try the latter two if 'name'
wasn't found in the database. Raises TypeError if a project was given but
not a language, or vice versa. Will create an empty sitesdb if none was
found.
"""
if not config.is_loaded():
_load_config()

# Someone specified a project without a lang (or a lang without a project)!
if (project and not lang) or (not project and lang):
e = "Keyword arguments 'lang' and 'project' must be specified together."
raise TypeError(e)

if name:
was_removed = _remove_site_from_sitesdb(name)
if not was_removed:
if project and lang:
name = _get_site_name_from_sitesdb(project, lang)
if name:
return _remove_site_from_sitesdb(name)
return was_removed

if project and lang:
name = _get_site_name_from_sitesdb(project, lang)
if name:
return _remove_site_from_sitesdb(name)

return False

+ 18
- 10
earwigbot/wiki/site.py View File

@@ -71,8 +71,8 @@ class Site(object):
def __init__(self, name=None, project=None, lang=None, base_url=None,
article_path=None, script_path=None, sql=None,
namespaces=None, login=(None, None), cookiejar=None,
user_agent=None, assert_edit=None, maxlag=None,
search_config=(None, None)):
user_agent=None, use_https=False, assert_edit=None,
maxlag=None, search_config=(None, None)):
"""Constructor for new Site instances.

This probably isn't necessary to call yourself unless you're building a
@@ -100,7 +100,8 @@ class Site(object):
self._script_path = script_path
self._namespaces = namespaces

# Attributes used for API queries:
# Attributes used for API queries:
self._use_https = use_https
self._assert_edit = assert_edit
self._maxlag = maxlag
self._max_retries = 5
@@ -138,10 +139,10 @@ class Site(object):
res = ", ".join((
"Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}",
"base_url={_base_url!r}", "article_path={_article_path!r}",
"script_path={_script_path!r}", "assert_edit={_assert_edit!r}",
"maxlag={_maxlag!r}", "sql={_sql!r}", "login={0}",
"user_agent={2!r}", "cookiejar={1})"
))
"script_path={_script_path!r}", "use_https={_use_https!r}",
"assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}",
"sql={_sql_data!r}", "login={0}", "user_agent={2!r}",
"cookiejar={1})"))
name, password = self._login_info
login = "({0}, {1})".format(repr(name), "hidden" if password else None)
cookies = self._cookiejar.__class__.__name__
@@ -163,7 +164,9 @@ class Site(object):

This will first attempt to construct an API url from self._base_url and
self._script_path. We need both of these, or else we'll raise
SiteAPIError.
SiteAPIError. If self._base_url is protocol-relative (introduced in
MediaWiki 1.18), we'll choose HTTPS if self._user_https is True,
otherwise HTTP.

We'll encode the given params, adding format=json along the way, as
well as &assert= and &maxlag= based on self._assert_edit and _maxlag.
@@ -185,7 +188,13 @@ class Site(object):
e = "Tried to do an API query, but no API URL is known."
raise SiteAPIError(e)

url = ''.join((self._base_url, self._script_path, "/api.php"))
base_url = self._base_url
if base_url.startswith("//"): # Protocol-relative URLs from 1.18
if self._use_https:
base_url = "https:" + base_url
else:
base_url = "http:" + base_url
url = ''.join((base_url, self._script_path, "/api.php"))

params["format"] = "json" # This is the only format we understand
if self._assert_edit: # If requested, ensure that we're logged in
@@ -194,7 +203,6 @@ class Site(object):
params["maxlag"] = self._maxlag

data = urlencode(params)

logger.debug("{0} -> {1}".format(url, data))

try:


+ 392
- 0
earwigbot/wiki/sitesdb.py View File

@@ -0,0 +1,392 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 by Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from cookielib import LWPCookieJar, LoadError
import errno
from getpass import getpass
from os import chmod, path
from platform import python_version
import stat
import sqlite3 as sqlite

from earwigbot import __version__
from earwigbot.config import config
from earwigbot.wiki.exceptions import SiteNotFoundError
from earwigbot.wiki.site import Site

__all__ = ["SitesDBManager", "get_site", "add_site", "remove_site"]

class SitesDBManager(object):
"""
EarwigBot's Wiki Toolset: Sites Database Manager

This class controls the sites.db file, which stores information about all
wiki sites known to the bot. Three public methods act as bridges between
the bot's config files and Site objects:
get_site -- returns a Site object corresponding to a given site name
add_site -- stores a site in the database, given connection info
remove_site -- removes a site from the database, given its name

There's usually no need to use this class directly. All public methods
here are available as earwigbot.wiki.get_site(), earwigbot.wiki.add_site(),
and earwigbot.wiki.remove_site(), which use a sites.db file located in the
same directory as our config.yml file. Lower-level access can be achieved
by importing the manager class
(`from earwigbot.wiki.sitesdb import SitesDBManager`).
"""

def __init__(self, db_file):
"""Set up the manager with an attribute for the sitesdb filename."""
self._cookiejar = None
self._sitesdb = db_file

def _load_config(self):
"""Load the bot's config.

Called by a config-requiring function, such as get_site(), when config
has not been loaded. This will usually happen only if we're running
code directly from Python's interpreter and not the bot itself, because
bot.py and earwigbot.runner will already call these functions.
"""
is_encrypted = config.load()
if is_encrypted: # Passwords in the config file are encrypted
key = getpass("Enter key to unencrypt bot passwords: ")
config._decryption_key = key
config.decrypt(config.wiki, "password")

def _get_cookiejar(self):
"""Return a LWPCookieJar object loaded from our .cookies file.

The same .cookies file is returned every time, located in the project
root, same directory as config.yml and bot.py. If it doesn't exist, we
will create the file and set it to be readable and writeable only by
us. If it exists but the information inside is bogus, we'll ignore it.

This is normally called by _make_site_object() (in turn called by
get_site()), and the cookiejar is passed to our Site's constructor,
used when it makes API queries. This way, we can easily preserve
cookies between sites (e.g., for CentralAuth), making logins easier.
"""
if self._cookiejar:
return self._cookiejar

cookie_file = path.join(config.root_dir, ".cookies")
self._cookiejar = LWPCookieJar(cookie_file)

try:
self._cookiejar.load()
except LoadError:
pass # File contains bad data, so ignore it completely
except IOError as e:
if e.errno == errno.ENOENT: # "No such file or directory"
# Create the file and restrict reading/writing only to the
# owner, so others can't peak at our cookies:
open(cookie_file, "w").close()
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR)
else:
raise

return self._cookiejar

def _create_sitesdb(self):
"""Initialize the sitesdb file with its three necessary tables."""
script = """
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url,
site_article_path, site_script_path);
CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value);
CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name);
"""
with sqlite.connect(self._sitesdb) as conn:
conn.executescript(script)

def _load_site_from_sitesdb(self, name):
"""Return all information stored in the sitesdb relating to given site.

The information will be returned as a tuple, containing the site's
name, project, language, base URL, article path, script path, SQL
connection data, and namespaces, in that order. If the site is not
found in the database, SiteNotFoundError will be raised. An empty
database will be created before the exception is raised if none exists.
"""
query1 = "SELECT * FROM sites WHERE site_name = ?"
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?"
query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?"
error = "Site '{0}' not found in the sitesdb.".format(name)
with sqlite.connect(self._sitesdb) as conn:
try:
site_data = conn.execute(query1, (name,)).fetchone()
except sqlite.OperationalError:
self._create_sitesdb()
raise SiteNotFoundError(error)
if not site_data:
raise SiteNotFoundError(error)
sql_data = conn.execute(query2, (name,)).fetchall()
ns_data = conn.execute(query3, (name,)).fetchall()

name, project, lang, base_url, article_path, script_path = site_data
sql = dict(sql_data)
namespaces = {}
for ns_id, ns_name, ns_is_primary_name in ns_data:
try:
if ns_is_primary_name: # "Primary" name goes first in list
namespaces[ns_id].insert(0, ns_name)
else: # Ordering of the aliases doesn't matter
namespaces[ns_id].append(ns_name)
except KeyError:
namespaces[ns_id] = [ns_name]

return (name, project, lang, base_url, article_path, script_path, sql,
namespaces)

def _make_site_object(self, name):
"""Return a Site object associated with the site 'name' in our sitesdb.

This calls _load_site_from_sitesdb(), so SiteNotFoundError will be
raised if the site is not in our sitesdb.
"""
(name, project, lang, base_url, article_path, script_path, sql,
namespaces) = self._load_site_from_sitesdb(name)
login = (config.wiki.get("username"), config.wiki.get("password"))
cookiejar = self._get_cookiejar()
user_agent = config.wiki.get("userAgent")
use_https = config.wiki.get("useHTTPS", False)
assert_edit = config.wiki.get("assert")
maxlag = config.wiki.get("maxlag")
search_config = config.wiki.get("search")

if user_agent:
user_agent = user_agent.replace("$1", __version__)
user_agent = user_agent.replace("$2", python_version())

return Site(name=name, project=project, lang=lang, base_url=base_url,
article_path=article_path, script_path=script_path,
sql=sql, namespaces=namespaces, login=login,
cookiejar=cookiejar, user_agent=user_agent,
use_https=use_https, assert_edit=assert_edit,
maxlag=maxlag, search_config=search_config)

def _get_site_name_from_sitesdb(self, project, lang):
"""Return the name of the first site with the given project and lang.

If the site is not found, return None. An empty sitesdb will be created
if none exists.
"""
query = "SELECT site_name FROM site WHERE site_project = ? and site_lang = ?"
with sqlite.connect(self._sitesdb) as conn:
try:
return conn.execute(query, (project, lang)).fetchone()
except sqlite.OperationalError:
self._create_sitesdb()

def _add_site_to_sitesdb(self, site):
"""Extract relevant info from a Site object and add it to the sitesdb.

Works like a reverse _load_site_from_sitesdb(); the site's project,
language, base URL, article path, script path, SQL connection data, and
namespaces are extracted from the site and inserted into the sites
database. If the sitesdb doesn't exist, we'll create it first.
"""
name = site.name()
sites_data = (name, site.project(), site.lang(), site._base_url,
site._article_path, site._script_path)
sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()]
ns_data = []
for ns_id, ns_names in site._namespaces.iteritems():
ns_data.append((name, ns_id, ns_names.pop(0), True))
for ns_name in ns_names:
ns_data.append((name, ns_id, ns_name, False))

with sqlite.connect(self._sitesdb) as conn:
check_exists = "SELECT 1 FROM sites WHERE site_name = ?"
try:
exists = conn.execute(check_exists, (name,)).fetchone()
except sqlite.OperationalError:
self._create_sitesdb()
else:
if exists:
conn.execute("DELETE FROM sites WHERE site_name = ?", (name,))
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,))
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,))
conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data)
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data)
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data)

def _remove_site_from_sitesdb(self, name):
"""Remove a site by name from the sitesdb."""
with sqlite.connect(self._sitesdb) as conn:
cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,))
if cursor.rowcount == 0:
return False
else:
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,))
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,))
return True

def get_site(self, name=None, project=None, lang=None):
"""Return a Site instance based on information from the sitesdb.

With no arguments, return the default site as specified by our config
file. This is config.wiki["defaultSite"].

With 'name' specified, return the site with that name. This is
equivalent to the site's 'wikiid' in the API, like 'enwiki'.

With 'project' and 'lang' specified, return the site whose project and
language match these values. If there are multiple sites with the same
values (unlikely), this is not a reliable way of loading a site. Call
the function with an explicit 'name' in that case.

We will attempt to login to the site automatically using
config.wiki["username"] and config.wiki["password"] if both are
defined.

Specifying a project without a lang or a lang without a project will
raise TypeError. If all three args are specified, 'name' will be first
tried, then 'project' and 'lang' if 'name' doesn't work. If a site
cannot be found in the sitesdb, SiteNotFoundError will be raised. An
empty sitesdb will be created if none is found.
"""
if not config.is_loaded():
self._load_config()

# Someone specified a project without a lang, or vice versa:
if (project and not lang) or (not project and lang):
e = "Keyword arguments 'lang' and 'project' must be specified together."
raise TypeError(e)

# No args given, so return our default site:
if not name and not project and not lang:
try:
default = config.wiki["defaultSite"]
except KeyError:
e = "Default site is not specified in config."
raise SiteNotFoundError(e)
return self._make_site_object(default)

# Name arg given, but don't look at others unless `name` isn't found:
if name:
try:
return self._make_site_object(name)
except SiteNotFoundError:
if project and lang:
name = self._get_site_name_from_sitesdb(project, lang)
if name:
return self._make_site_object(name)
raise

# If we end up here, then project and lang are the only args given:
name = self._get_site_name_from_sitesdb(project, lang)
if name:
return self._make_site_object(name)
e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang)
raise SiteNotFoundError(e)

def add_site(self, project=None, lang=None, base_url=None,
script_path="/w", sql=None):
"""Add a site to the sitesdb so it can be retrieved with get_site().

If only a project and a lang are given, we'll guess the base_url as
"//{lang}.{project}.org" (which is protocol-relative, becoming 'https'
if 'useHTTPS' is True in config otherwise 'http'). If this is wrong,
provide the correct base_url as an argument (in which case project and
lang are ignored). Most wikis use "/w" as the script path (meaning the
API is located at "{base_url}{script_path}/api.php" ->
"//{lang}.{project}.org/w/api.php"), so this is the default. If your
wiki is different, provide the script_path as an argument. The only
other argument to Site() that we can't get from config files or by
querying the wiki itself is SQL connection info, so provide a dict of
kwargs as `sql` and Site will pass it to oursql.connect(**sql),
allowing you to make queries with site.sql_query().

Returns True if the site was added successfully or False if the site is
already in our sitesdb (this can be done purposefully to update old
site info). Raises SiteNotFoundError if not enough information has
been provided to identify the site (e.g. a project but not a lang).
"""
if not config.is_loaded():
self._load_config()

if not base_url:
if not project or not lang:
e = "Without a base_url, both a project and a lang must be given."
raise SiteNotFoundError(e)
base_url = "//{0}.{1}.org".format(lang, project)

login = (config.wiki.get("username"), config.wiki.get("password"))
cookiejar = self._get_cookiejar()
user_agent = config.wiki.get("userAgent")
use_https = config.wiki.get("useHTTPS", False)
assert_edit = config.wiki.get("assert")
maxlag = config.wiki.get("maxlag")
search_config = config.wiki.get("search")

# Create a temp Site object to log in and load the other attributes:
site = Site(base_url=base_url, script_path=script_path, sql=sql,
login=login, cookiejar=cookiejar, user_agent=user_agent,
use_https=use_https, assert_edit=assert_edit,
maxlag=maxlag, search_config=search_config)

self._add_site_to_sitesdb(site)
return site

def remove_site(self, name=None, project=None, lang=None):
"""Remove a site from the sitesdb.

Returns True if the site was removed successfully or False if the site
was not in our sitesdb originally. If all three args (name, project,
and lang) are given, we'll first try 'name' and then try the latter two
if 'name' wasn't found in the database. Raises TypeError if a project
was given but not a language, or vice versa. Will create an empty
sitesdb if none was found.
"""
if not config.is_loaded():
self._load_config()

# Someone specified a project without a lang, or vice versa:
if (project and not lang) or (not project and lang):
e = "Keyword arguments 'lang' and 'project' must be specified together."
raise TypeError(e)

if name:
was_removed = self._remove_site_from_sitesdb(name)
if not was_removed:
if project and lang:
name = self._get_site_name_from_sitesdb(project, lang)
if name:
return self._remove_site_from_sitesdb(name)
return was_removed

if project and lang:
name = self._get_site_name_from_sitesdb(project, lang)
if name:
return self._remove_site_from_sitesdb(name)

return False

_root = path.split(path.split(path.dirname(path.abspath(__file__)))[0])[0]
_dbfile = path.join(_root, "sites.db")
_manager = SitesDBManager(_dbfile)
del _root, _dbfile

get_site = _manager.get_site
add_site = _manager.add_site
remove_site = _manager.remove_site

Loading…
Cancel
Save