Browse Source

get_site(), add_site(), remove_site() implemented

tags/v0.1^2
Ben Kurtovic 12 years ago
parent
commit
4a1cb41162
1 changed files with 238 additions and 81 deletions
  1. +238
    -81
      earwigbot/wiki/functions.py

+ 238
- 81
earwigbot/wiki/functions.py View File

@@ -24,7 +24,9 @@
EarwigBot's Wiki Toolset: Misc Functions

This module, a component of the wiki package, contains miscellaneous functions
that are not methods of any class, like get_site().
that are not methods of any class. Currently, it contains get_site(),
add_site(), and remove_site(). These functions act as bridges between the bot's
config files and Site objects.

There's no need to import this module explicitly. All functions here are
automatically available from earwigbot.wiki.
@@ -36,21 +38,25 @@ from getpass import getpass
from os import chmod, path
import platform
import stat
import sqlite3 as sqlite

import earwigbot
from earwigbot import __version__
from earwigbot.config import config
from earwigbot.wiki.exceptions import SiteNotFoundError
from earwigbot.wiki.site import Site

__all__ = ["get_site", "add_site", "del_site"]
__all__ = ["get_site", "add_site", "remove_site"]

_cookiejar = None
_sitesdb = "sites.db"

def _load_config():
"""Called by a config-requiring function, such as get_site(), when config
"""Load the bot's config.

Called by a config-requiring function, such as get_site(), when config
has not been loaded. This will usually happen only if we're running code
directly from Python's interpreter and not the bot itself, because
earwigbot.py or core/main.py will already call these functions.
bot.py and earwigbot.runner will already call these functions.
"""
is_encrypted = config.load()
if is_encrypted: # Passwords in the config file are encrypted
@@ -59,21 +65,20 @@ def _load_config():
config.decrypt(config.wiki, "password")

def _get_cookiejar():
"""Returns a LWPCookieJar object loaded from our .cookies file. The same
one is returned every time.
"""Return a LWPCookieJar object loaded from our .cookies file.

The .cookies file is located in the project root, same directory as
config.yml and bot.py. If it doesn't exist, we will create the file and set
it to be readable and writeable only by us. If it exists but the
information inside is bogus, we will ignore it.
The same .cookies file is returned every time, located in the project root,
same directory as config.yml and bot.py. If it doesn't exist, we will
create the file and set it to be readable and writeable only by us. If it
exists but the information inside is bogus, we will ignore it.

This is normally called by _get_site_object_from_dict() (in turn called by
This is normally called by _make_site_object() (in turn called by
get_site()), and the cookiejar is passed to our Site's constructor, used
when it makes API queries. This way, we can easily preserve cookies between
sites (e.g., for CentralAuth), making logins easier.
"""
global _cookiejar
if _cookiejar is not None:
if _cookiejar:
return _cookiejar

cookie_file = path.join(config.root_dir, ".cookies")
@@ -94,17 +99,63 @@ def _get_cookiejar():

return _cookiejar

def _get_site_object_from_dict(name, d):
"""Return a Site object based on the contents of a dict, probably acquired
through our config file, and a separate name.
def _create_sitesdb():
"""Initialize the sitesdb file with its three necessary tables."""
script = """
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url,
site_article_path, site_script_path);
CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value);
CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name);
"""
with sqlite.connect(_sitesdb) as conn:
conn.executescript(script)

def _load_site_from_sitesdb(name):
"""Return all information stored in the sitesdb relating to site 'name'.

The information will be returned as a tuple, containing the site's project,
language, base URL, article path, script path, SQL connection data, and
namespaces, in that order. If the site is not found in the database,
SiteNotFoundError will be raised. An empty database will be created before
the exception is raised if none exists.
"""
project = d.get("project")
lang = d.get("lang")
base_url = d.get("baseURL")
article_path = d.get("articlePath")
script_path = d.get("scriptPath")
sql = d.get("sql", {})
namespaces = d.get("namespaces", {})
query1 = "SELECT * FROM sites WHERE site_name = ?"
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?"
query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?"
error = "Site '{0}' not found in the sitesdb.".format(name)
with sqlite.connect(_sitesdb) as conn:
try:
site_data = conn.execute(query1, (name,)).fetchone()
except sqllite.OperationalError:
_create_sitesdb()
raise SiteNotFoundError(error)
if not site_data:
raise SiteNotFoundError(error)
sql_data = conn.execute(query2, (name,)).fetchall()
ns_data = conn.execute(query3, (name,)).fetchall()

project, lang, base_url, article_path, script_path = site_data
sql = dict(sql_data)
namespaces = {}
for ns_id, ns_name, ns_is_primary_name in ns_data:
try:
if ns_is_primary_name: # "Primary" name goes first in list
namespaces[ns_id].insert(0, ns_name)
else: # Ordering of the aliases doesn't matter
namespaces[ns_id].append(ns_name)
except KeyError:
namespaces[ns_id] = [ns_name]

return project, lang, base_url, article_path, script_path, sql, namespaces

def _make_site_object(name):
"""Return a Site object associated with the site 'name' in our sitesdb.

This calls _load_site_from_sitesdb(), so SiteNotFoundError will be raised
if the site is not in our sitesdb.
"""
(project, lang, base_url, article_path, script_path, sql,
namespaces) = _load_site_from_sitesdb(name)
login = (config.wiki.get("username"), config.wiki.get("password"))
cookiejar = _get_cookiejar()
user_agent = config.wiki.get("userAgent")
@@ -113,7 +164,7 @@ def _get_site_object_from_dict(name, d):
search_config = config.wiki.get("search")

if user_agent:
user_agent = user_agent.replace("$1", earwigbot.__version__)
user_agent = user_agent.replace("$1", __version__)
user_agent = user_agent.replace("$2", platform.python_version())

return Site(name=name, project=project, lang=lang, base_url=base_url,
@@ -122,90 +173,196 @@ def _get_site_object_from_dict(name, d):
user_agent=user_agent, assert_edit=assert_edit, maxlag=maxlag,
search_config=search_config)

def _get_site_name_from_sitesdb(project, lang):
"""Return the name of the first site with the specified project and lang.

If the site is not found, return None. An empty sitesdb will be created if
none exists.
"""
query = "SELECT site_name FROM site WHERE site_project = ? and site_lang = ?"
with sqlite.connect(_sitesdb) as conn:
try:
return conn.execute(query, (project, lang)).fetchone()
except sqllite.OperationalError:
_create_sitesdb()

def _add_site_to_sitesdb(site):
"""Extract relevant info from a Site object and add it to the sitesdb.

Works like a reverse _load_site_from_sitesdb(); the site's project,
language, base URL, article path, script path, SQL connection data, and
namespaces are extracted from the site and inserted into the sites
database. If the sitesdb doesn't exist, we'll create it first.
"""
name = site.name
sites_data = (name, site.project, site.lang, site._base_url,
site._article_path, site._script_path)
sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()]
ns_data = []
for ns_id, ns_names in site._namespaces.iteritems():
ns_data.append((name, ns_id, ns_names.pop(0), True))
for ns_name in ns_names:
ns_data.append((name, ns_id, ns_name, False))

with sqlite.connect(_sitesdb) as conn:
check_exists = "SELECT 1 FROM sites WHERE site_name = ?"
try:
exists = conn.execute(check_exists, (name,)).fetchone()
except sqlite.OperationalError:
_create_sitesdb()
else:
if exists:
conn.execute("DELETE FROM sites WHERE site_name = ?", (name,))
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,))
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,))
conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data)
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data)
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data)

def _remove_site_from_sitesdb(name):
"""Remove a site by name from the sitesdb."""
with sqlite.connect(_sitesdb) as conn:
cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,))
if cursor.rowcount == 0:
return False
else:
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,))
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,))
return True

def get_site(name=None, project=None, lang=None):
"""Returns a Site instance based on information from our config file.
"""Return a Site instance based on information from the sitesdb.

With no arguments, returns the default site as specified by our config
file. This is default = config.wiki["defaultSite"];
config.wiki["sites"][default].
With no arguments, return the default site as specified by our config
file. This is config.wiki["defaultSite"].

With `name` specified, returns the site specified by
config.wiki["sites"][name].
With 'name' specified, return the site with that name. This is equivalent
to the site's 'wikiid' in the API, like 'enwiki'.

With `project` and `lang` specified, returns the site specified by the
member of config.wiki["sites"], `s`, for which s["project"] == project and
s["lang"] == lang.
With 'project' and 'lang' specified, return the site whose project and
language match these values. If there are multiple sites with the same
values (unlikely), this is not a reliable way of loading a site. Call the
function with an explicit 'name' in that case.

We will attempt to login to the site automatically
using config.wiki["username"] and config.wiki["password"] if both are
defined.
We will attempt to login to the site automatically using
config.wiki["username"] and config.wiki["password"] if both are defined.

Specifying a project without a lang or a lang without a project will raise
TypeError. If all three args are specified, `name` will be first tried,
then `project` and `lang`. If, with any number of args, a site cannot be
found in the config, SiteNotFoundError is raised.
TypeError. If all three args are specified, 'name' will be first tried,
then 'project' and 'lang' if 'name' doesn't work. If a site cannot be found
in the sitesdb, SiteNotFoundError will be raised. An empty sitesdb will be
created if none is found.
"""
# Check if config has been loaded, and load it if it hasn't:
if not config.is_loaded():
_load_config()

# Someone specified a project without a lang (or a lang without a project)!
if (project is None and lang is not None) or (project is not None and
lang is None):
if (project and not lang) or (not project and lang):
e = "Keyword arguments 'lang' and 'project' must be specified together."
raise TypeError(e)

# No args given, so return our default site (project is None implies lang
# is None, so we don't need to add that in):
if name is None and project is None:
# No args given, so return our default site:
if not name and not project and not lang:
try:
default = config.wiki["defaultSite"]
except KeyError:
e = "Default site is not specified in config."
raise SiteNotFoundError(e)
try:
site = config.wiki["sites"][default]
except KeyError:
e = "Default site specified by config is not in the config's sites list."
raise SiteNotFoundError(e)
return _get_site_object_from_dict(default, site)
return _make_site_object(default)

# Name arg given, but don't look at others unless `name` isn't found:
if name is not None:
if name:
try:
site = config.wiki["sites"][name]
except KeyError:
if project is None: # Implies lang is None, so only name was given
e = "Site '{0}' not found in config.".format(name)
raise SiteNotFoundError(e)
for sitename, site in config.wiki["sites"].items():
if site["project"] == project and site["lang"] == lang:
return _get_site_object_from_dict(sitename, site)
e = "Neither site '{0}' nor site '{1}:{2}' found in config."
e.format(name, project, lang)
raise SiteNotFoundError(e)
else:
return _get_site_object_from_dict(name, site)
return _make_site_object(name)
except SiteNotFoundError:
if project and lang:
name = _get_site_name_from_sitesdb(project, lang)
if name:
return _make_site_object(name)
raise

# If we end up here, then project and lang are both not None:
for sitename, site in config.wiki["sites"].items():
if site["project"] == project and site["lang"] == lang:
return _get_site_object_from_dict(sitename, site)
e = "Site '{0}:{1}' not found in config.".format(project, lang)
# If we end up here, then project and lang are the only args given:
name = _get_site_name_from_sitesdb(project, lang)
if name:
return _make_site_object(name)
e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang)
raise SiteNotFoundError(e)

def add_site():
"""STUB: config editing is required first.
def add_site(project=None, lang=None, base_url=None, script_path="/w",
sql=None):
"""Add a site to the sitesdb so it can be retrieved with get_site() later.

If only a project and a lang are given, we'll guess the base_url as
"http://{lang}.{project}.org". If this is wrong, provide the correct
base_url as an argument (in which case project and lang are ignored). Most
wikis use "/w" as the script path (meaning the API is located at
"{base_url}{script_path}/api.php" -> "http://{lang}.{project}.org/w/api.php"),
so this is the default. If your wiki is different, provide the script_path
as an argument. The only other argument to Site() that we can't get from
config files or by querying the wiki itself is SQL connection info, so
provide a dict of kwargs as `sql` and Site will be pass it to
oursql.connect(**sql), allowing you to make queries with site.sql_query().

Returns True if the site was added successfully or False if the site is
already in our sitesdb (this can be done purposefully to update old site
info). Raises SiteNotFoundError if not enough information has been provided
to identify the site (e.g. a project but not a lang).
"""
if not config.is_loaded():
_load_config()

if not base_url:
if not project or not lang:
e = "Without a base_url, both a project and a lang must be given."
raise SiteNotFoundError(e)
base_url = "http://{0}.{1}.org".format(lang, project)

login = (config.wiki.get("username"), config.wiki.get("password"))
cookiejar = _get_cookiejar()
user_agent = config.wiki.get("userAgent")
assert_edit = config.wiki.get("assert")
maxlag = config.wiki.get("maxlag")
search_config = config.wiki.get("search")

# Create a temp Site object to log in and load the other attributes:
site = Site(base_url=base_url, script_path=script_path, sql=sql,
login=login, cookiejar=cookiejar, user_agent=user_agent,
assert_edit=assert_edit, maxlag=maxlag,
search_config=search_config)

Returns True if the site was added successfully or False if the site was
already in our config. Raises ConfigError if saving the updated file failed
for some reason."""
pass
_add_site_to_sitesdb(site)
return site

def del_site(name):
"""STUB: config editing is required first.
def remove_site(name=None, project=None, lang=None):
"""Remove a site from the sitesdb.

Returns True if the site was removed successfully or False if the site was
not in our config originally. Raises ConfigError if saving the updated file
failed for some reason."""
pass
not in our sitesdb originally. If all three args (name, project, and lang)
are given, we'll first try 'name' and then try the latter two if 'name'
wasn't found in the database. Raises TypeError if a project was given but
not a language, or vice versa. Will create an empty sitesdb if none was
found.
"""
if not config.is_loaded():
_load_config()

# Someone specified a project without a lang (or a lang without a project)!
if (project and not lang) or (not project and lang):
e = "Keyword arguments 'lang' and 'project' must be specified together."
raise TypeError(e)

if name:
was_removed = _remove_site_from_sitesdb(name)
if not was_removed:
if project and lang:
name = _get_site_name_from_sitesdb(project, lang)
if name:
return _remove_site_from_sitesdb(name)
return was_removed

if project and lang:
name = _get_site_name_from_sitesdb(project, lang)
if name:
return _remove_site_from_sitesdb(name)

return False

Loading…
Cancel
Save