Explorar el Código

Merge branch 'feature/sitesdb' into develop

tags/v0.1^2
Ben Kurtovic hace 12 años
padre
commit
f886729df5
Se han modificado 8 ficheros con 444 adiciones y 263 borrados
  1. +7
    -15
      .gitignore
  2. +1
    -1
      earwigbot/config.py
  3. +1
    -1
      earwigbot/irc/watcher.py
  4. +1
    -1
      earwigbot/wiki/__init__.py
  5. +0
    -211
      earwigbot/wiki/functions.py
  6. +3
    -3
      earwigbot/wiki/page.py
  7. +39
    -31
      earwigbot/wiki/site.py
  8. +392
    -0
      earwigbot/wiki/sitesdb.py

+ 7
- 15
.gitignore Ver fichero

@@ -1,19 +1,11 @@
# Ignore python bytecode:
*.pyc

# Ignore bot-specific config file:
config.yml

# Ignore logs directory:
# Ignore bot-specific files:
logs/
# Ignore cookies file:
config.yml
sites.db
.cookies

# Ignore OS X's crud:
.DS_Store
# Ignore python bytecode:
*.pyc

# Ignore pydev's nonsense:
.project
.pydevproject
.settings/
# Ignore OS X's stuff:
.DS_Store

+ 1
- 1
earwigbot/config.py Ver fichero

@@ -176,7 +176,7 @@ class _BotConfig(object):
return self._root_dir

@property
def config_path(self):
def path(self):
return self._config_path

@property


+ 1
- 1
earwigbot/irc/watcher.py Ver fichero

@@ -89,7 +89,7 @@ class Watcher(IRCConnection):
return
module = imp.new_module("_rc_event_processing_rules")
try:
exec compile(rules, config.config_path, "exec") in module.__dict__
exec compile(rules, config.path, "exec") in module.__dict__
except Exception:
e = "Could not compile config file's RC event rules"
self.logger.exception(e)


+ 1
- 1
earwigbot/wiki/__init__.py Ver fichero

@@ -36,9 +36,9 @@ logger.addHandler(_log.NullHandler())

from earwigbot.wiki.constants import *
from earwigbot.wiki.exceptions import *
from earwigbot.wiki.functions import *

from earwigbot.wiki.category import Category
from earwigbot.wiki.page import Page
from earwigbot.wiki.site import Site
from earwigbot.wiki.sitesdb import get_site, add_site, remove_site
from earwigbot.wiki.user import User

+ 0
- 211
earwigbot/wiki/functions.py Ver fichero

@@ -1,211 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 by Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""
EarwigBot's Wiki Toolset: Misc Functions

This module, a component of the wiki package, contains miscellaneous functions
that are not methods of any class, like get_site().

There's no need to import this module explicitly. All functions here are
automatically available from earwigbot.wiki.
"""

from cookielib import LWPCookieJar, LoadError
import errno
from getpass import getpass
from os import chmod, path
import platform
import stat

import earwigbot
from earwigbot.config import config
from earwigbot.wiki.exceptions import SiteNotFoundError
from earwigbot.wiki.site import Site

__all__ = ["get_site", "add_site", "del_site"]

_cookiejar = None

def _load_config():
"""Called by a config-requiring function, such as get_site(), when config
has not been loaded. This will usually happen only if we're running code
directly from Python's interpreter and not the bot itself, because
earwigbot.py or core/main.py will already call these functions.
"""
is_encrypted = config.load()
if is_encrypted: # Passwords in the config file are encrypted
key = getpass("Enter key to unencrypt bot passwords: ")
config._decryption_key = key
config.decrypt(config.wiki, "password")

def _get_cookiejar():
"""Returns a LWPCookieJar object loaded from our .cookies file. The same
one is returned every time.

The .cookies file is located in the project root, same directory as
config.yml and bot.py. If it doesn't exist, we will create the file and set
it to be readable and writeable only by us. If it exists but the
information inside is bogus, we will ignore it.

This is normally called by _get_site_object_from_dict() (in turn called by
get_site()), and the cookiejar is passed to our Site's constructor, used
when it makes API queries. This way, we can easily preserve cookies between
sites (e.g., for CentralAuth), making logins easier.
"""
global _cookiejar
if _cookiejar is not None:
return _cookiejar

cookie_file = path.join(config.root_dir, ".cookies")
_cookiejar = LWPCookieJar(cookie_file)

try:
_cookiejar.load()
except LoadError:
pass # File contains bad data, so ignore it completely
except IOError as e:
if e.errno == errno.ENOENT: # "No such file or directory"
# Create the file and restrict reading/writing only to the owner,
# so others can't peak at our cookies:
open(cookie_file, "w").close()
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR)
else:
raise

return _cookiejar

def _get_site_object_from_dict(name, d):
"""Return a Site object based on the contents of a dict, probably acquired
through our config file, and a separate name.
"""
project = d.get("project")
lang = d.get("lang")
base_url = d.get("baseURL")
article_path = d.get("articlePath")
script_path = d.get("scriptPath")
sql = d.get("sql", {})
namespaces = d.get("namespaces", {})
login = (config.wiki.get("username"), config.wiki.get("password"))
cookiejar = _get_cookiejar()
user_agent = config.wiki.get("userAgent")
assert_edit = config.wiki.get("assert")
maxlag = config.wiki.get("maxlag")
search_config = config.wiki.get("search")

if user_agent:
user_agent = user_agent.replace("$1", earwigbot.__version__)
user_agent = user_agent.replace("$2", platform.python_version())

return Site(name=name, project=project, lang=lang, base_url=base_url,
article_path=article_path, script_path=script_path, sql=sql,
namespaces=namespaces, login=login, cookiejar=cookiejar,
user_agent=user_agent, assert_edit=assert_edit, maxlag=maxlag,
search_config=search_config)

def get_site(name=None, project=None, lang=None):
"""Returns a Site instance based on information from our config file.

With no arguments, returns the default site as specified by our config
file. This is default = config.wiki["defaultSite"];
config.wiki["sites"][default].

With `name` specified, returns the site specified by
config.wiki["sites"][name].

With `project` and `lang` specified, returns the site specified by the
member of config.wiki["sites"], `s`, for which s["project"] == project and
s["lang"] == lang.

We will attempt to login to the site automatically
using config.wiki["username"] and config.wiki["password"] if both are
defined.

Specifying a project without a lang or a lang without a project will raise
TypeError. If all three args are specified, `name` will be first tried,
then `project` and `lang`. If, with any number of args, a site cannot be
found in the config, SiteNotFoundError is raised.
"""
# Check if config has been loaded, and load it if it hasn't:
if not config.is_loaded():
_load_config()

# Someone specified a project without a lang (or a lang without a project)!
if (project is None and lang is not None) or (project is not None and
lang is None):
e = "Keyword arguments 'lang' and 'project' must be specified together."
raise TypeError(e)

# No args given, so return our default site (project is None implies lang
# is None, so we don't need to add that in):
if name is None and project is None:
try:
default = config.wiki["defaultSite"]
except KeyError:
e = "Default site is not specified in config."
raise SiteNotFoundError(e)
try:
site = config.wiki["sites"][default]
except KeyError:
e = "Default site specified by config is not in the config's sites list."
raise SiteNotFoundError(e)
return _get_site_object_from_dict(default, site)

# Name arg given, but don't look at others unless `name` isn't found:
if name is not None:
try:
site = config.wiki["sites"][name]
except KeyError:
if project is None: # Implies lang is None, so only name was given
e = "Site '{0}' not found in config.".format(name)
raise SiteNotFoundError(e)
for sitename, site in config.wiki["sites"].items():
if site["project"] == project and site["lang"] == lang:
return _get_site_object_from_dict(sitename, site)
e = "Neither site '{0}' nor site '{1}:{2}' found in config."
e.format(name, project, lang)
raise SiteNotFoundError(e)
else:
return _get_site_object_from_dict(name, site)

# If we end up here, then project and lang are both not None:
for sitename, site in config.wiki["sites"].items():
if site["project"] == project and site["lang"] == lang:
return _get_site_object_from_dict(sitename, site)
e = "Site '{0}:{1}' not found in config.".format(project, lang)
raise SiteNotFoundError(e)

def add_site():
"""STUB: config editing is required first.

Returns True if the site was added successfully or False if the site was
already in our config. Raises ConfigError if saving the updated file failed
for some reason."""
pass

def del_site(name):
"""STUB: config editing is required first.

Returns True if the site was removed successfully or False if the site was
not in our config originally. Raises ConfigError if saving the updated file
failed for some reason."""
pass

+ 3
- 3
earwigbot/wiki/page.py Ver fichero

@@ -174,7 +174,7 @@ class Page(CopyrightMixin):

Assuming the API is sound, this should not raise any exceptions.
"""
if result is None:
if not result:
params = {"action": "query", "rvprop": "user", "intoken": "edit",
"prop": "info|revisions", "rvlimit": 1, "rvdir": "newer",
"titles": self._title, "inprop": "protection|url"}
@@ -240,7 +240,7 @@ class Page(CopyrightMixin):
Don't call this directly, ever - use .get(force=True) if you want to
force content reloading.
"""
if result is None:
if not result:
params = {"action": "query", "prop": "revisions", "rvlimit": 1,
"rvprop": "content|timestamp", "titles": self._title}
result = self._site._api_query(params)
@@ -471,7 +471,7 @@ class Page(CopyrightMixin):
"""
if force:
self._load_wrapper()
if self._fullurl is not None:
if self._fullurl:
return self._fullurl
else:
slug = quote(self._title.replace(" ", "_"), safe="/:")


+ 39
- 31
earwigbot/wiki/site.py Ver fichero

@@ -71,18 +71,19 @@ class Site(object):
def __init__(self, name=None, project=None, lang=None, base_url=None,
article_path=None, script_path=None, sql=None,
namespaces=None, login=(None, None), cookiejar=None,
user_agent=None, assert_edit=None, maxlag=None,
search_config=(None, None)):
user_agent=None, use_https=False, assert_edit=None,
maxlag=None, search_config=(None, None)):
"""Constructor for new Site instances.

This probably isn't necessary to call yourself unless you're building a
Site that's not in your config and you don't want to add it - normally
all you need is tools.get_site(name), which creates the Site for you
based on your config file. We accept a bunch of kwargs, but the only
ones you really "need" are `base_url` and `script_path` - this is
enough to figure out an API url. `login`, a tuple of
(username, password), is highly recommended. `cookiejar` will be used
to store cookies, and we'll use a normal CookieJar if none is given.
based on your config file and the sites database. We accept a bunch of
kwargs, but the only ones you really "need" are `base_url` and
`script_path` - this is enough to figure out an API url. `login`, a
tuple of (username, password), is highly recommended. `cookiejar` will
be used to store cookies, and we'll use a normal CookieJar if none is
given.

First, we'll store the given arguments as attributes, then set up our
URL opener. We'll load any of the attributes that weren't given from
@@ -99,7 +100,8 @@ class Site(object):
self._script_path = script_path
self._namespaces = namespaces

# Attributes used for API queries:
# Attributes used for API queries:
self._use_https = use_https
self._assert_edit = assert_edit
self._maxlag = maxlag
self._max_retries = 5
@@ -112,11 +114,11 @@ class Site(object):
self._search_config = search_config

# Set up cookiejar and URL opener for making API queries:
if cookiejar is not None:
if cookiejar:
self._cookiejar = cookiejar
else:
self._cookiejar = CookieJar()
if user_agent is None:
if not user_agent:
user_agent = USER_AGENT # Set default UA from wiki.constants
self._opener = build_opener(HTTPCookieProcessor(self._cookiejar))
self._opener.addheaders = [("User-Agent", user_agent),
@@ -127,9 +129,9 @@ class Site(object):

# If we have a name/pass and the API says we're not logged in, log in:
self._login_info = name, password = login
if name is not None and password is not None:
if name and password:
logged_in_as = self._get_username_from_cookies()
if logged_in_as is None or name != logged_in_as:
if not logged_in_as or name != logged_in_as:
self._login(login)

def __repr__(self):
@@ -137,10 +139,10 @@ class Site(object):
res = ", ".join((
"Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}",
"base_url={_base_url!r}", "article_path={_article_path!r}",
"script_path={_script_path!r}", "assert_edit={_assert_edit!r}",
"maxlag={_maxlag!r}", "sql={_sql!r}", "login={0}",
"user_agent={2!r}", "cookiejar={1})"
))
"script_path={_script_path!r}", "use_https={_use_https!r}",
"assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}",
"sql={_sql_data!r}", "login={0}", "user_agent={2!r}",
"cookiejar={1})"))
name, password = self._login_info
login = "({0}, {1})".format(repr(name), "hidden" if password else None)
cookies = self._cookiejar.__class__.__name__
@@ -162,7 +164,9 @@ class Site(object):

This will first attempt to construct an API url from self._base_url and
self._script_path. We need both of these, or else we'll raise
SiteAPIError.
SiteAPIError. If self._base_url is protocol-relative (introduced in
MediaWiki 1.18), we'll choose HTTPS if self._user_https is True,
otherwise HTTP.

We'll encode the given params, adding format=json along the way, as
well as &assert= and &maxlag= based on self._assert_edit and _maxlag.
@@ -180,11 +184,17 @@ class Site(object):
There's helpful MediaWiki API documentation at
<http://www.mediawiki.org/wiki/API>.
"""
if self._base_url is None or self._script_path is None:
if not self._base_url or self._script_path is None:
e = "Tried to do an API query, but no API URL is known."
raise SiteAPIError(e)

url = ''.join((self._base_url, self._script_path, "/api.php"))
base_url = self._base_url
if base_url.startswith("//"): # Protocol-relative URLs from 1.18
if self._use_https:
base_url = "https:" + base_url
else:
base_url = "http:" + base_url
url = ''.join((base_url, self._script_path, "/api.php"))

params["format"] = "json" # This is the only format we understand
if self._assert_edit: # If requested, ensure that we're logged in
@@ -193,7 +203,6 @@ class Site(object):
params["maxlag"] = self._maxlag

data = urlencode(params)

logger.debug("{0} -> {1}".format(url, data))

try:
@@ -332,15 +341,15 @@ class Site(object):
name = ''.join((self._name, "Token"))
cookie = self._get_cookie(name, domain)

if cookie is not None:
if cookie:
name = ''.join((self._name, "UserName"))
user_name = self._get_cookie(name, domain)
if user_name is not None:
if user_name:
return user_name.value

name = "centralauth_Token"
for cookie in self._cookiejar:
if cookie.domain_initial_dot is False or cookie.is_expired():
if not cookie.domain_initial_dot or cookie.is_expired():
continue
if cookie.name != name:
continue
@@ -348,7 +357,7 @@ class Site(object):
search = ''.join(("(.*?)", re_escape(cookie.domain)))
if re_match(search, domain): # Test it against our site
user_name = self._get_cookie("centralauth_User", cookie.domain)
if user_name is not None:
if user_name:
return user_name.value

def _get_username_from_api(self):
@@ -378,7 +387,7 @@ class Site(object):
single API query for our username (or IP address) and return that.
"""
name = self._get_username_from_cookies()
if name is not None:
if name:
return name
return self._get_username_from_api()

@@ -417,7 +426,7 @@ class Site(object):
"""
name, password = login
params = {"action": "login", "lgname": name, "lgpassword": password}
if token is not None:
if token:
params["lgtoken"] = token
result = self._api_query(params)
res = result["login"]["result"]
@@ -455,10 +464,9 @@ class Site(object):
def _sql_connect(self, **kwargs):
"""Attempt to establish a connection with this site's SQL database.

oursql.connect() will be called with self._sql_data as its kwargs,
which is usually config.wiki["sites"][self.name()]["sql"]. Any kwargs
given to this function will be passed to connect() and will have
precedence over the config file.
oursql.connect() will be called with self._sql_data as its kwargs.
Any kwargs given to this function will be passed to connect() and will
have precedence over the config file.

Will raise SQLError() if the module "oursql" is not available. oursql
may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot
@@ -631,6 +639,6 @@ class Site(object):
If `username` is left as None, then a User object representing the
currently logged-in (or anonymous!) user is returned.
"""
if username is None:
if not username:
username = self._get_username()
return User(self, username)

+ 392
- 0
earwigbot/wiki/sitesdb.py Ver fichero

@@ -0,0 +1,392 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 by Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from cookielib import LWPCookieJar, LoadError
import errno
from getpass import getpass
from os import chmod, path
from platform import python_version
import stat
import sqlite3 as sqlite

from earwigbot import __version__
from earwigbot.config import config
from earwigbot.wiki.exceptions import SiteNotFoundError
from earwigbot.wiki.site import Site

__all__ = ["SitesDBManager", "get_site", "add_site", "remove_site"]

class SitesDBManager(object):
"""
EarwigBot's Wiki Toolset: Sites Database Manager

This class controls the sites.db file, which stores information about all
wiki sites known to the bot. Three public methods act as bridges between
the bot's config files and Site objects:
get_site -- returns a Site object corresponding to a given site name
add_site -- stores a site in the database, given connection info
remove_site -- removes a site from the database, given its name

There's usually no need to use this class directly. All public methods
here are available as earwigbot.wiki.get_site(), earwigbot.wiki.add_site(),
and earwigbot.wiki.remove_site(), which use a sites.db file located in the
same directory as our config.yml file. Lower-level access can be achieved
by importing the manager class
(`from earwigbot.wiki.sitesdb import SitesDBManager`).
"""

def __init__(self, db_file):
"""Set up the manager with an attribute for the sitesdb filename."""
self._cookiejar = None
self._sitesdb = db_file

def _load_config(self):
"""Load the bot's config.

Called by a config-requiring function, such as get_site(), when config
has not been loaded. This will usually happen only if we're running
code directly from Python's interpreter and not the bot itself, because
bot.py and earwigbot.runner will already call these functions.
"""
is_encrypted = config.load()
if is_encrypted: # Passwords in the config file are encrypted
key = getpass("Enter key to unencrypt bot passwords: ")
config._decryption_key = key
config.decrypt(config.wiki, "password")

def _get_cookiejar(self):
"""Return a LWPCookieJar object loaded from our .cookies file.

The same .cookies file is returned every time, located in the project
root, same directory as config.yml and bot.py. If it doesn't exist, we
will create the file and set it to be readable and writeable only by
us. If it exists but the information inside is bogus, we'll ignore it.

This is normally called by _make_site_object() (in turn called by
get_site()), and the cookiejar is passed to our Site's constructor,
used when it makes API queries. This way, we can easily preserve
cookies between sites (e.g., for CentralAuth), making logins easier.
"""
if self._cookiejar:
return self._cookiejar

cookie_file = path.join(config.root_dir, ".cookies")
self._cookiejar = LWPCookieJar(cookie_file)

try:
self._cookiejar.load()
except LoadError:
pass # File contains bad data, so ignore it completely
except IOError as e:
if e.errno == errno.ENOENT: # "No such file or directory"
# Create the file and restrict reading/writing only to the
# owner, so others can't peak at our cookies:
open(cookie_file, "w").close()
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR)
else:
raise

return self._cookiejar

def _create_sitesdb(self):
"""Initialize the sitesdb file with its three necessary tables."""
script = """
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url,
site_article_path, site_script_path);
CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value);
CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name);
"""
with sqlite.connect(self._sitesdb) as conn:
conn.executescript(script)

def _load_site_from_sitesdb(self, name):
"""Return all information stored in the sitesdb relating to given site.

The information will be returned as a tuple, containing the site's
name, project, language, base URL, article path, script path, SQL
connection data, and namespaces, in that order. If the site is not
found in the database, SiteNotFoundError will be raised. An empty
database will be created before the exception is raised if none exists.
"""
query1 = "SELECT * FROM sites WHERE site_name = ?"
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?"
query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?"
error = "Site '{0}' not found in the sitesdb.".format(name)
with sqlite.connect(self._sitesdb) as conn:
try:
site_data = conn.execute(query1, (name,)).fetchone()
except sqlite.OperationalError:
self._create_sitesdb()
raise SiteNotFoundError(error)
if not site_data:
raise SiteNotFoundError(error)
sql_data = conn.execute(query2, (name,)).fetchall()
ns_data = conn.execute(query3, (name,)).fetchall()

name, project, lang, base_url, article_path, script_path = site_data
sql = dict(sql_data)
namespaces = {}
for ns_id, ns_name, ns_is_primary_name in ns_data:
try:
if ns_is_primary_name: # "Primary" name goes first in list
namespaces[ns_id].insert(0, ns_name)
else: # Ordering of the aliases doesn't matter
namespaces[ns_id].append(ns_name)
except KeyError:
namespaces[ns_id] = [ns_name]

return (name, project, lang, base_url, article_path, script_path, sql,
namespaces)

def _make_site_object(self, name):
"""Return a Site object associated with the site 'name' in our sitesdb.

This calls _load_site_from_sitesdb(), so SiteNotFoundError will be
raised if the site is not in our sitesdb.
"""
(name, project, lang, base_url, article_path, script_path, sql,
namespaces) = self._load_site_from_sitesdb(name)
login = (config.wiki.get("username"), config.wiki.get("password"))
cookiejar = self._get_cookiejar()
user_agent = config.wiki.get("userAgent")
use_https = config.wiki.get("useHTTPS", False)
assert_edit = config.wiki.get("assert")
maxlag = config.wiki.get("maxlag")
search_config = config.wiki.get("search")

if user_agent:
user_agent = user_agent.replace("$1", __version__)
user_agent = user_agent.replace("$2", python_version())

return Site(name=name, project=project, lang=lang, base_url=base_url,
article_path=article_path, script_path=script_path,
sql=sql, namespaces=namespaces, login=login,
cookiejar=cookiejar, user_agent=user_agent,
use_https=use_https, assert_edit=assert_edit,
maxlag=maxlag, search_config=search_config)

def _get_site_name_from_sitesdb(self, project, lang):
"""Return the name of the first site with the given project and lang.

If the site is not found, return None. An empty sitesdb will be created
if none exists.
"""
query = "SELECT site_name FROM site WHERE site_project = ? and site_lang = ?"
with sqlite.connect(self._sitesdb) as conn:
try:
return conn.execute(query, (project, lang)).fetchone()
except sqlite.OperationalError:
self._create_sitesdb()

def _add_site_to_sitesdb(self, site):
"""Extract relevant info from a Site object and add it to the sitesdb.

Works like a reverse _load_site_from_sitesdb(); the site's project,
language, base URL, article path, script path, SQL connection data, and
namespaces are extracted from the site and inserted into the sites
database. If the sitesdb doesn't exist, we'll create it first.
"""
name = site.name()
sites_data = (name, site.project(), site.lang(), site._base_url,
site._article_path, site._script_path)
sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()]
ns_data = []
for ns_id, ns_names in site._namespaces.iteritems():
ns_data.append((name, ns_id, ns_names.pop(0), True))
for ns_name in ns_names:
ns_data.append((name, ns_id, ns_name, False))

with sqlite.connect(self._sitesdb) as conn:
check_exists = "SELECT 1 FROM sites WHERE site_name = ?"
try:
exists = conn.execute(check_exists, (name,)).fetchone()
except sqlite.OperationalError:
self._create_sitesdb()
else:
if exists:
conn.execute("DELETE FROM sites WHERE site_name = ?", (name,))
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,))
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,))
conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data)
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data)
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data)

def _remove_site_from_sitesdb(self, name):
"""Remove a site by name from the sitesdb."""
with sqlite.connect(self._sitesdb) as conn:
cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,))
if cursor.rowcount == 0:
return False
else:
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,))
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,))
return True

def get_site(self, name=None, project=None, lang=None):
"""Return a Site instance based on information from the sitesdb.

With no arguments, return the default site as specified by our config
file. This is config.wiki["defaultSite"].

With 'name' specified, return the site with that name. This is
equivalent to the site's 'wikiid' in the API, like 'enwiki'.

With 'project' and 'lang' specified, return the site whose project and
language match these values. If there are multiple sites with the same
values (unlikely), this is not a reliable way of loading a site. Call
the function with an explicit 'name' in that case.

We will attempt to login to the site automatically using
config.wiki["username"] and config.wiki["password"] if both are
defined.

Specifying a project without a lang or a lang without a project will
raise TypeError. If all three args are specified, 'name' will be first
tried, then 'project' and 'lang' if 'name' doesn't work. If a site
cannot be found in the sitesdb, SiteNotFoundError will be raised. An
empty sitesdb will be created if none is found.
"""
if not config.is_loaded():
self._load_config()

# Someone specified a project without a lang, or vice versa:
if (project and not lang) or (not project and lang):
e = "Keyword arguments 'lang' and 'project' must be specified together."
raise TypeError(e)

# No args given, so return our default site:
if not name and not project and not lang:
try:
default = config.wiki["defaultSite"]
except KeyError:
e = "Default site is not specified in config."
raise SiteNotFoundError(e)
return self._make_site_object(default)

# Name arg given, but don't look at others unless `name` isn't found:
if name:
try:
return self._make_site_object(name)
except SiteNotFoundError:
if project and lang:
name = self._get_site_name_from_sitesdb(project, lang)
if name:
return self._make_site_object(name)
raise

# If we end up here, then project and lang are the only args given:
name = self._get_site_name_from_sitesdb(project, lang)
if name:
return self._make_site_object(name)
e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang)
raise SiteNotFoundError(e)

def add_site(self, project=None, lang=None, base_url=None,
script_path="/w", sql=None):
"""Add a site to the sitesdb so it can be retrieved with get_site().

If only a project and a lang are given, we'll guess the base_url as
"//{lang}.{project}.org" (which is protocol-relative, becoming 'https'
if 'useHTTPS' is True in config otherwise 'http'). If this is wrong,
provide the correct base_url as an argument (in which case project and
lang are ignored). Most wikis use "/w" as the script path (meaning the
API is located at "{base_url}{script_path}/api.php" ->
"//{lang}.{project}.org/w/api.php"), so this is the default. If your
wiki is different, provide the script_path as an argument. The only
other argument to Site() that we can't get from config files or by
querying the wiki itself is SQL connection info, so provide a dict of
kwargs as `sql` and Site will pass it to oursql.connect(**sql),
allowing you to make queries with site.sql_query().

Returns True if the site was added successfully or False if the site is
already in our sitesdb (this can be done purposefully to update old
site info). Raises SiteNotFoundError if not enough information has
been provided to identify the site (e.g. a project but not a lang).
"""
if not config.is_loaded():
self._load_config()

if not base_url:
if not project or not lang:
e = "Without a base_url, both a project and a lang must be given."
raise SiteNotFoundError(e)
base_url = "//{0}.{1}.org".format(lang, project)

login = (config.wiki.get("username"), config.wiki.get("password"))
cookiejar = self._get_cookiejar()
user_agent = config.wiki.get("userAgent")
use_https = config.wiki.get("useHTTPS", False)
assert_edit = config.wiki.get("assert")
maxlag = config.wiki.get("maxlag")
search_config = config.wiki.get("search")

# Create a temp Site object to log in and load the other attributes:
site = Site(base_url=base_url, script_path=script_path, sql=sql,
login=login, cookiejar=cookiejar, user_agent=user_agent,
use_https=use_https, assert_edit=assert_edit,
maxlag=maxlag, search_config=search_config)

self._add_site_to_sitesdb(site)
return site

def remove_site(self, name=None, project=None, lang=None):
"""Remove a site from the sitesdb.

Returns True if the site was removed successfully or False if the site
was not in our sitesdb originally. If all three args (name, project,
and lang) are given, we'll first try 'name' and then try the latter two
if 'name' wasn't found in the database. Raises TypeError if a project
was given but not a language, or vice versa. Will create an empty
sitesdb if none was found.
"""
if not config.is_loaded():
self._load_config()

# Someone specified a project without a lang, or vice versa:
if (project and not lang) or (not project and lang):
e = "Keyword arguments 'lang' and 'project' must be specified together."
raise TypeError(e)

if name:
was_removed = self._remove_site_from_sitesdb(name)
if not was_removed:
if project and lang:
name = self._get_site_name_from_sitesdb(project, lang)
if name:
return self._remove_site_from_sitesdb(name)
return was_removed

if project and lang:
name = self._get_site_name_from_sitesdb(project, lang)
if name:
return self._remove_site_from_sitesdb(name)

return False

_root = path.split(path.split(path.dirname(path.abspath(__file__)))[0])[0]
_dbfile = path.join(_root, "sites.db")
_manager = SitesDBManager(_dbfile)
del _root, _dbfile

get_site = _manager.get_site
add_site = _manager.add_site
remove_site = _manager.remove_site

Cargando…
Cancelar
Guardar