@@ -1,19 +1,11 @@ | |||
# Ignore python bytecode: | |||
*.pyc | |||
# Ignore bot-specific config file: | |||
config.yml | |||
# Ignore logs directory: | |||
# Ignore bot-specific files: | |||
logs/ | |||
# Ignore cookies file: | |||
config.yml | |||
sites.db | |||
.cookies | |||
# Ignore OS X's crud: | |||
.DS_Store | |||
# Ignore python bytecode: | |||
*.pyc | |||
# Ignore pydev's nonsense: | |||
.project | |||
.pydevproject | |||
.settings/ | |||
# Ignore OS X's stuff: | |||
.DS_Store |
@@ -176,7 +176,7 @@ class _BotConfig(object): | |||
return self._root_dir | |||
@property | |||
def config_path(self): | |||
def path(self): | |||
return self._config_path | |||
@property | |||
@@ -89,7 +89,7 @@ class Watcher(IRCConnection): | |||
return | |||
module = imp.new_module("_rc_event_processing_rules") | |||
try: | |||
exec compile(rules, config.config_path, "exec") in module.__dict__ | |||
exec compile(rules, config.path, "exec") in module.__dict__ | |||
except Exception: | |||
e = "Could not compile config file's RC event rules" | |||
self.logger.exception(e) | |||
@@ -36,9 +36,9 @@ logger.addHandler(_log.NullHandler()) | |||
from earwigbot.wiki.constants import * | |||
from earwigbot.wiki.exceptions import * | |||
from earwigbot.wiki.functions import * | |||
from earwigbot.wiki.category import Category | |||
from earwigbot.wiki.page import Page | |||
from earwigbot.wiki.site import Site | |||
from earwigbot.wiki.sitesdb import get_site, add_site, remove_site | |||
from earwigbot.wiki.user import User |
@@ -1,211 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 by Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
""" | |||
EarwigBot's Wiki Toolset: Misc Functions | |||
This module, a component of the wiki package, contains miscellaneous functions | |||
that are not methods of any class, like get_site(). | |||
There's no need to import this module explicitly. All functions here are | |||
automatically available from earwigbot.wiki. | |||
""" | |||
from cookielib import LWPCookieJar, LoadError | |||
import errno | |||
from getpass import getpass | |||
from os import chmod, path | |||
import platform | |||
import stat | |||
import earwigbot | |||
from earwigbot.config import config | |||
from earwigbot.wiki.exceptions import SiteNotFoundError | |||
from earwigbot.wiki.site import Site | |||
__all__ = ["get_site", "add_site", "del_site"] | |||
_cookiejar = None | |||
def _load_config(): | |||
"""Called by a config-requiring function, such as get_site(), when config | |||
has not been loaded. This will usually happen only if we're running code | |||
directly from Python's interpreter and not the bot itself, because | |||
earwigbot.py or core/main.py will already call these functions. | |||
""" | |||
is_encrypted = config.load() | |||
if is_encrypted: # Passwords in the config file are encrypted | |||
key = getpass("Enter key to unencrypt bot passwords: ") | |||
config._decryption_key = key | |||
config.decrypt(config.wiki, "password") | |||
def _get_cookiejar(): | |||
"""Returns a LWPCookieJar object loaded from our .cookies file. The same | |||
one is returned every time. | |||
The .cookies file is located in the project root, same directory as | |||
config.yml and bot.py. If it doesn't exist, we will create the file and set | |||
it to be readable and writeable only by us. If it exists but the | |||
information inside is bogus, we will ignore it. | |||
This is normally called by _get_site_object_from_dict() (in turn called by | |||
get_site()), and the cookiejar is passed to our Site's constructor, used | |||
when it makes API queries. This way, we can easily preserve cookies between | |||
sites (e.g., for CentralAuth), making logins easier. | |||
""" | |||
global _cookiejar | |||
if _cookiejar is not None: | |||
return _cookiejar | |||
cookie_file = path.join(config.root_dir, ".cookies") | |||
_cookiejar = LWPCookieJar(cookie_file) | |||
try: | |||
_cookiejar.load() | |||
except LoadError: | |||
pass # File contains bad data, so ignore it completely | |||
except IOError as e: | |||
if e.errno == errno.ENOENT: # "No such file or directory" | |||
# Create the file and restrict reading/writing only to the owner, | |||
# so others can't peak at our cookies: | |||
open(cookie_file, "w").close() | |||
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) | |||
else: | |||
raise | |||
return _cookiejar | |||
def _get_site_object_from_dict(name, d): | |||
"""Return a Site object based on the contents of a dict, probably acquired | |||
through our config file, and a separate name. | |||
""" | |||
project = d.get("project") | |||
lang = d.get("lang") | |||
base_url = d.get("baseURL") | |||
article_path = d.get("articlePath") | |||
script_path = d.get("scriptPath") | |||
sql = d.get("sql", {}) | |||
namespaces = d.get("namespaces", {}) | |||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||
cookiejar = _get_cookiejar() | |||
user_agent = config.wiki.get("userAgent") | |||
assert_edit = config.wiki.get("assert") | |||
maxlag = config.wiki.get("maxlag") | |||
search_config = config.wiki.get("search") | |||
if user_agent: | |||
user_agent = user_agent.replace("$1", earwigbot.__version__) | |||
user_agent = user_agent.replace("$2", platform.python_version()) | |||
return Site(name=name, project=project, lang=lang, base_url=base_url, | |||
article_path=article_path, script_path=script_path, sql=sql, | |||
namespaces=namespaces, login=login, cookiejar=cookiejar, | |||
user_agent=user_agent, assert_edit=assert_edit, maxlag=maxlag, | |||
search_config=search_config) | |||
def get_site(name=None, project=None, lang=None): | |||
"""Returns a Site instance based on information from our config file. | |||
With no arguments, returns the default site as specified by our config | |||
file. This is default = config.wiki["defaultSite"]; | |||
config.wiki["sites"][default]. | |||
With `name` specified, returns the site specified by | |||
config.wiki["sites"][name]. | |||
With `project` and `lang` specified, returns the site specified by the | |||
member of config.wiki["sites"], `s`, for which s["project"] == project and | |||
s["lang"] == lang. | |||
We will attempt to login to the site automatically | |||
using config.wiki["username"] and config.wiki["password"] if both are | |||
defined. | |||
Specifying a project without a lang or a lang without a project will raise | |||
TypeError. If all three args are specified, `name` will be first tried, | |||
then `project` and `lang`. If, with any number of args, a site cannot be | |||
found in the config, SiteNotFoundError is raised. | |||
""" | |||
# Check if config has been loaded, and load it if it hasn't: | |||
if not config.is_loaded(): | |||
_load_config() | |||
# Someone specified a project without a lang (or a lang without a project)! | |||
if (project is None and lang is not None) or (project is not None and | |||
lang is None): | |||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||
raise TypeError(e) | |||
# No args given, so return our default site (project is None implies lang | |||
# is None, so we don't need to add that in): | |||
if name is None and project is None: | |||
try: | |||
default = config.wiki["defaultSite"] | |||
except KeyError: | |||
e = "Default site is not specified in config." | |||
raise SiteNotFoundError(e) | |||
try: | |||
site = config.wiki["sites"][default] | |||
except KeyError: | |||
e = "Default site specified by config is not in the config's sites list." | |||
raise SiteNotFoundError(e) | |||
return _get_site_object_from_dict(default, site) | |||
# Name arg given, but don't look at others unless `name` isn't found: | |||
if name is not None: | |||
try: | |||
site = config.wiki["sites"][name] | |||
except KeyError: | |||
if project is None: # Implies lang is None, so only name was given | |||
e = "Site '{0}' not found in config.".format(name) | |||
raise SiteNotFoundError(e) | |||
for sitename, site in config.wiki["sites"].items(): | |||
if site["project"] == project and site["lang"] == lang: | |||
return _get_site_object_from_dict(sitename, site) | |||
e = "Neither site '{0}' nor site '{1}:{2}' found in config." | |||
e.format(name, project, lang) | |||
raise SiteNotFoundError(e) | |||
else: | |||
return _get_site_object_from_dict(name, site) | |||
# If we end up here, then project and lang are both not None: | |||
for sitename, site in config.wiki["sites"].items(): | |||
if site["project"] == project and site["lang"] == lang: | |||
return _get_site_object_from_dict(sitename, site) | |||
e = "Site '{0}:{1}' not found in config.".format(project, lang) | |||
raise SiteNotFoundError(e) | |||
def add_site(): | |||
"""STUB: config editing is required first. | |||
Returns True if the site was added successfully or False if the site was | |||
already in our config. Raises ConfigError if saving the updated file failed | |||
for some reason.""" | |||
pass | |||
def del_site(name): | |||
"""STUB: config editing is required first. | |||
Returns True if the site was removed successfully or False if the site was | |||
not in our config originally. Raises ConfigError if saving the updated file | |||
failed for some reason.""" | |||
pass |
@@ -174,7 +174,7 @@ class Page(CopyrightMixin): | |||
Assuming the API is sound, this should not raise any exceptions. | |||
""" | |||
if result is None: | |||
if not result: | |||
params = {"action": "query", "rvprop": "user", "intoken": "edit", | |||
"prop": "info|revisions", "rvlimit": 1, "rvdir": "newer", | |||
"titles": self._title, "inprop": "protection|url"} | |||
@@ -240,7 +240,7 @@ class Page(CopyrightMixin): | |||
Don't call this directly, ever - use .get(force=True) if you want to | |||
force content reloading. | |||
""" | |||
if result is None: | |||
if not result: | |||
params = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||
"rvprop": "content|timestamp", "titles": self._title} | |||
result = self._site._api_query(params) | |||
@@ -471,7 +471,7 @@ class Page(CopyrightMixin): | |||
""" | |||
if force: | |||
self._load_wrapper() | |||
if self._fullurl is not None: | |||
if self._fullurl: | |||
return self._fullurl | |||
else: | |||
slug = quote(self._title.replace(" ", "_"), safe="/:") | |||
@@ -71,18 +71,19 @@ class Site(object): | |||
def __init__(self, name=None, project=None, lang=None, base_url=None, | |||
article_path=None, script_path=None, sql=None, | |||
namespaces=None, login=(None, None), cookiejar=None, | |||
user_agent=None, assert_edit=None, maxlag=None, | |||
search_config=(None, None)): | |||
user_agent=None, use_https=False, assert_edit=None, | |||
maxlag=None, search_config=(None, None)): | |||
"""Constructor for new Site instances. | |||
This probably isn't necessary to call yourself unless you're building a | |||
Site that's not in your config and you don't want to add it - normally | |||
all you need is tools.get_site(name), which creates the Site for you | |||
based on your config file. We accept a bunch of kwargs, but the only | |||
ones you really "need" are `base_url` and `script_path` - this is | |||
enough to figure out an API url. `login`, a tuple of | |||
(username, password), is highly recommended. `cookiejar` will be used | |||
to store cookies, and we'll use a normal CookieJar if none is given. | |||
based on your config file and the sites database. We accept a bunch of | |||
kwargs, but the only ones you really "need" are `base_url` and | |||
`script_path` - this is enough to figure out an API url. `login`, a | |||
tuple of (username, password), is highly recommended. `cookiejar` will | |||
be used to store cookies, and we'll use a normal CookieJar if none is | |||
given. | |||
First, we'll store the given arguments as attributes, then set up our | |||
URL opener. We'll load any of the attributes that weren't given from | |||
@@ -99,7 +100,8 @@ class Site(object): | |||
self._script_path = script_path | |||
self._namespaces = namespaces | |||
# Attributes used for API queries: | |||
# Attributes used for API queries: | |||
self._use_https = use_https | |||
self._assert_edit = assert_edit | |||
self._maxlag = maxlag | |||
self._max_retries = 5 | |||
@@ -112,11 +114,11 @@ class Site(object): | |||
self._search_config = search_config | |||
# Set up cookiejar and URL opener for making API queries: | |||
if cookiejar is not None: | |||
if cookiejar: | |||
self._cookiejar = cookiejar | |||
else: | |||
self._cookiejar = CookieJar() | |||
if user_agent is None: | |||
if not user_agent: | |||
user_agent = USER_AGENT # Set default UA from wiki.constants | |||
self._opener = build_opener(HTTPCookieProcessor(self._cookiejar)) | |||
self._opener.addheaders = [("User-Agent", user_agent), | |||
@@ -127,9 +129,9 @@ class Site(object): | |||
# If we have a name/pass and the API says we're not logged in, log in: | |||
self._login_info = name, password = login | |||
if name is not None and password is not None: | |||
if name and password: | |||
logged_in_as = self._get_username_from_cookies() | |||
if logged_in_as is None or name != logged_in_as: | |||
if not logged_in_as or name != logged_in_as: | |||
self._login(login) | |||
def __repr__(self): | |||
@@ -137,10 +139,10 @@ class Site(object): | |||
res = ", ".join(( | |||
"Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", | |||
"base_url={_base_url!r}", "article_path={_article_path!r}", | |||
"script_path={_script_path!r}", "assert_edit={_assert_edit!r}", | |||
"maxlag={_maxlag!r}", "sql={_sql!r}", "login={0}", | |||
"user_agent={2!r}", "cookiejar={1})" | |||
)) | |||
"script_path={_script_path!r}", "use_https={_use_https!r}", | |||
"assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}", | |||
"sql={_sql_data!r}", "login={0}", "user_agent={2!r}", | |||
"cookiejar={1})")) | |||
name, password = self._login_info | |||
login = "({0}, {1})".format(repr(name), "hidden" if password else None) | |||
cookies = self._cookiejar.__class__.__name__ | |||
@@ -162,7 +164,9 @@ class Site(object): | |||
This will first attempt to construct an API url from self._base_url and | |||
self._script_path. We need both of these, or else we'll raise | |||
SiteAPIError. | |||
SiteAPIError. If self._base_url is protocol-relative (introduced in | |||
MediaWiki 1.18), we'll choose HTTPS if self._user_https is True, | |||
otherwise HTTP. | |||
We'll encode the given params, adding format=json along the way, as | |||
well as &assert= and &maxlag= based on self._assert_edit and _maxlag. | |||
@@ -180,11 +184,17 @@ class Site(object): | |||
There's helpful MediaWiki API documentation at | |||
<http://www.mediawiki.org/wiki/API>. | |||
""" | |||
if self._base_url is None or self._script_path is None: | |||
if not self._base_url or self._script_path is None: | |||
e = "Tried to do an API query, but no API URL is known." | |||
raise SiteAPIError(e) | |||
url = ''.join((self._base_url, self._script_path, "/api.php")) | |||
base_url = self._base_url | |||
if base_url.startswith("//"): # Protocol-relative URLs from 1.18 | |||
if self._use_https: | |||
base_url = "https:" + base_url | |||
else: | |||
base_url = "http:" + base_url | |||
url = ''.join((base_url, self._script_path, "/api.php")) | |||
params["format"] = "json" # This is the only format we understand | |||
if self._assert_edit: # If requested, ensure that we're logged in | |||
@@ -193,7 +203,6 @@ class Site(object): | |||
params["maxlag"] = self._maxlag | |||
data = urlencode(params) | |||
logger.debug("{0} -> {1}".format(url, data)) | |||
try: | |||
@@ -332,15 +341,15 @@ class Site(object): | |||
name = ''.join((self._name, "Token")) | |||
cookie = self._get_cookie(name, domain) | |||
if cookie is not None: | |||
if cookie: | |||
name = ''.join((self._name, "UserName")) | |||
user_name = self._get_cookie(name, domain) | |||
if user_name is not None: | |||
if user_name: | |||
return user_name.value | |||
name = "centralauth_Token" | |||
for cookie in self._cookiejar: | |||
if cookie.domain_initial_dot is False or cookie.is_expired(): | |||
if not cookie.domain_initial_dot or cookie.is_expired(): | |||
continue | |||
if cookie.name != name: | |||
continue | |||
@@ -348,7 +357,7 @@ class Site(object): | |||
search = ''.join(("(.*?)", re_escape(cookie.domain))) | |||
if re_match(search, domain): # Test it against our site | |||
user_name = self._get_cookie("centralauth_User", cookie.domain) | |||
if user_name is not None: | |||
if user_name: | |||
return user_name.value | |||
def _get_username_from_api(self): | |||
@@ -378,7 +387,7 @@ class Site(object): | |||
single API query for our username (or IP address) and return that. | |||
""" | |||
name = self._get_username_from_cookies() | |||
if name is not None: | |||
if name: | |||
return name | |||
return self._get_username_from_api() | |||
@@ -417,7 +426,7 @@ class Site(object): | |||
""" | |||
name, password = login | |||
params = {"action": "login", "lgname": name, "lgpassword": password} | |||
if token is not None: | |||
if token: | |||
params["lgtoken"] = token | |||
result = self._api_query(params) | |||
res = result["login"]["result"] | |||
@@ -455,10 +464,9 @@ class Site(object): | |||
def _sql_connect(self, **kwargs): | |||
"""Attempt to establish a connection with this site's SQL database. | |||
oursql.connect() will be called with self._sql_data as its kwargs, | |||
which is usually config.wiki["sites"][self.name()]["sql"]. Any kwargs | |||
given to this function will be passed to connect() and will have | |||
precedence over the config file. | |||
oursql.connect() will be called with self._sql_data as its kwargs. | |||
Any kwargs given to this function will be passed to connect() and will | |||
have precedence over the config file. | |||
Will raise SQLError() if the module "oursql" is not available. oursql | |||
may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot | |||
@@ -631,6 +639,6 @@ class Site(object): | |||
If `username` is left as None, then a User object representing the | |||
currently logged-in (or anonymous!) user is returned. | |||
""" | |||
if username is None: | |||
if not username: | |||
username = self._get_username() | |||
return User(self, username) |
@@ -0,0 +1,392 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 by Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from cookielib import LWPCookieJar, LoadError | |||
import errno | |||
from getpass import getpass | |||
from os import chmod, path | |||
from platform import python_version | |||
import stat | |||
import sqlite3 as sqlite | |||
from earwigbot import __version__ | |||
from earwigbot.config import config | |||
from earwigbot.wiki.exceptions import SiteNotFoundError | |||
from earwigbot.wiki.site import Site | |||
__all__ = ["SitesDBManager", "get_site", "add_site", "remove_site"] | |||
class SitesDBManager(object): | |||
""" | |||
EarwigBot's Wiki Toolset: Sites Database Manager | |||
This class controls the sites.db file, which stores information about all | |||
wiki sites known to the bot. Three public methods act as bridges between | |||
the bot's config files and Site objects: | |||
get_site -- returns a Site object corresponding to a given site name | |||
add_site -- stores a site in the database, given connection info | |||
remove_site -- removes a site from the database, given its name | |||
There's usually no need to use this class directly. All public methods | |||
here are available as earwigbot.wiki.get_site(), earwigbot.wiki.add_site(), | |||
and earwigbot.wiki.remove_site(), which use a sites.db file located in the | |||
same directory as our config.yml file. Lower-level access can be achieved | |||
by importing the manager class | |||
(`from earwigbot.wiki.sitesdb import SitesDBManager`). | |||
""" | |||
def __init__(self, db_file): | |||
"""Set up the manager with an attribute for the sitesdb filename.""" | |||
self._cookiejar = None | |||
self._sitesdb = db_file | |||
def _load_config(self): | |||
"""Load the bot's config. | |||
Called by a config-requiring function, such as get_site(), when config | |||
has not been loaded. This will usually happen only if we're running | |||
code directly from Python's interpreter and not the bot itself, because | |||
bot.py and earwigbot.runner will already call these functions. | |||
""" | |||
is_encrypted = config.load() | |||
if is_encrypted: # Passwords in the config file are encrypted | |||
key = getpass("Enter key to unencrypt bot passwords: ") | |||
config._decryption_key = key | |||
config.decrypt(config.wiki, "password") | |||
def _get_cookiejar(self): | |||
"""Return a LWPCookieJar object loaded from our .cookies file. | |||
The same .cookies file is returned every time, located in the project | |||
root, same directory as config.yml and bot.py. If it doesn't exist, we | |||
will create the file and set it to be readable and writeable only by | |||
us. If it exists but the information inside is bogus, we'll ignore it. | |||
This is normally called by _make_site_object() (in turn called by | |||
get_site()), and the cookiejar is passed to our Site's constructor, | |||
used when it makes API queries. This way, we can easily preserve | |||
cookies between sites (e.g., for CentralAuth), making logins easier. | |||
""" | |||
if self._cookiejar: | |||
return self._cookiejar | |||
cookie_file = path.join(config.root_dir, ".cookies") | |||
self._cookiejar = LWPCookieJar(cookie_file) | |||
try: | |||
self._cookiejar.load() | |||
except LoadError: | |||
pass # File contains bad data, so ignore it completely | |||
except IOError as e: | |||
if e.errno == errno.ENOENT: # "No such file or directory" | |||
# Create the file and restrict reading/writing only to the | |||
# owner, so others can't peak at our cookies: | |||
open(cookie_file, "w").close() | |||
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) | |||
else: | |||
raise | |||
return self._cookiejar | |||
def _create_sitesdb(self): | |||
"""Initialize the sitesdb file with its three necessary tables.""" | |||
script = """ | |||
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, | |||
site_article_path, site_script_path); | |||
CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value); | |||
CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name); | |||
""" | |||
with sqlite.connect(self._sitesdb) as conn: | |||
conn.executescript(script) | |||
def _load_site_from_sitesdb(self, name): | |||
"""Return all information stored in the sitesdb relating to given site. | |||
The information will be returned as a tuple, containing the site's | |||
name, project, language, base URL, article path, script path, SQL | |||
connection data, and namespaces, in that order. If the site is not | |||
found in the database, SiteNotFoundError will be raised. An empty | |||
database will be created before the exception is raised if none exists. | |||
""" | |||
query1 = "SELECT * FROM sites WHERE site_name = ?" | |||
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" | |||
query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?" | |||
error = "Site '{0}' not found in the sitesdb.".format(name) | |||
with sqlite.connect(self._sitesdb) as conn: | |||
try: | |||
site_data = conn.execute(query1, (name,)).fetchone() | |||
except sqlite.OperationalError: | |||
self._create_sitesdb() | |||
raise SiteNotFoundError(error) | |||
if not site_data: | |||
raise SiteNotFoundError(error) | |||
sql_data = conn.execute(query2, (name,)).fetchall() | |||
ns_data = conn.execute(query3, (name,)).fetchall() | |||
name, project, lang, base_url, article_path, script_path = site_data | |||
sql = dict(sql_data) | |||
namespaces = {} | |||
for ns_id, ns_name, ns_is_primary_name in ns_data: | |||
try: | |||
if ns_is_primary_name: # "Primary" name goes first in list | |||
namespaces[ns_id].insert(0, ns_name) | |||
else: # Ordering of the aliases doesn't matter | |||
namespaces[ns_id].append(ns_name) | |||
except KeyError: | |||
namespaces[ns_id] = [ns_name] | |||
return (name, project, lang, base_url, article_path, script_path, sql, | |||
namespaces) | |||
def _make_site_object(self, name): | |||
"""Return a Site object associated with the site 'name' in our sitesdb. | |||
This calls _load_site_from_sitesdb(), so SiteNotFoundError will be | |||
raised if the site is not in our sitesdb. | |||
""" | |||
(name, project, lang, base_url, article_path, script_path, sql, | |||
namespaces) = self._load_site_from_sitesdb(name) | |||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||
cookiejar = self._get_cookiejar() | |||
user_agent = config.wiki.get("userAgent") | |||
use_https = config.wiki.get("useHTTPS", False) | |||
assert_edit = config.wiki.get("assert") | |||
maxlag = config.wiki.get("maxlag") | |||
search_config = config.wiki.get("search") | |||
if user_agent: | |||
user_agent = user_agent.replace("$1", __version__) | |||
user_agent = user_agent.replace("$2", python_version()) | |||
return Site(name=name, project=project, lang=lang, base_url=base_url, | |||
article_path=article_path, script_path=script_path, | |||
sql=sql, namespaces=namespaces, login=login, | |||
cookiejar=cookiejar, user_agent=user_agent, | |||
use_https=use_https, assert_edit=assert_edit, | |||
maxlag=maxlag, search_config=search_config) | |||
def _get_site_name_from_sitesdb(self, project, lang): | |||
"""Return the name of the first site with the given project and lang. | |||
If the site is not found, return None. An empty sitesdb will be created | |||
if none exists. | |||
""" | |||
query = "SELECT site_name FROM site WHERE site_project = ? and site_lang = ?" | |||
with sqlite.connect(self._sitesdb) as conn: | |||
try: | |||
return conn.execute(query, (project, lang)).fetchone() | |||
except sqlite.OperationalError: | |||
self._create_sitesdb() | |||
def _add_site_to_sitesdb(self, site): | |||
"""Extract relevant info from a Site object and add it to the sitesdb. | |||
Works like a reverse _load_site_from_sitesdb(); the site's project, | |||
language, base URL, article path, script path, SQL connection data, and | |||
namespaces are extracted from the site and inserted into the sites | |||
database. If the sitesdb doesn't exist, we'll create it first. | |||
""" | |||
name = site.name() | |||
sites_data = (name, site.project(), site.lang(), site._base_url, | |||
site._article_path, site._script_path) | |||
sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()] | |||
ns_data = [] | |||
for ns_id, ns_names in site._namespaces.iteritems(): | |||
ns_data.append((name, ns_id, ns_names.pop(0), True)) | |||
for ns_name in ns_names: | |||
ns_data.append((name, ns_id, ns_name, False)) | |||
with sqlite.connect(self._sitesdb) as conn: | |||
check_exists = "SELECT 1 FROM sites WHERE site_name = ?" | |||
try: | |||
exists = conn.execute(check_exists, (name,)).fetchone() | |||
except sqlite.OperationalError: | |||
self._create_sitesdb() | |||
else: | |||
if exists: | |||
conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||
conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data) | |||
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) | |||
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) | |||
def _remove_site_from_sitesdb(self, name): | |||
"""Remove a site by name from the sitesdb.""" | |||
with sqlite.connect(self._sitesdb) as conn: | |||
cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||
if cursor.rowcount == 0: | |||
return False | |||
else: | |||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||
return True | |||
def get_site(self, name=None, project=None, lang=None): | |||
"""Return a Site instance based on information from the sitesdb. | |||
With no arguments, return the default site as specified by our config | |||
file. This is config.wiki["defaultSite"]. | |||
With 'name' specified, return the site with that name. This is | |||
equivalent to the site's 'wikiid' in the API, like 'enwiki'. | |||
With 'project' and 'lang' specified, return the site whose project and | |||
language match these values. If there are multiple sites with the same | |||
values (unlikely), this is not a reliable way of loading a site. Call | |||
the function with an explicit 'name' in that case. | |||
We will attempt to login to the site automatically using | |||
config.wiki["username"] and config.wiki["password"] if both are | |||
defined. | |||
Specifying a project without a lang or a lang without a project will | |||
raise TypeError. If all three args are specified, 'name' will be first | |||
tried, then 'project' and 'lang' if 'name' doesn't work. If a site | |||
cannot be found in the sitesdb, SiteNotFoundError will be raised. An | |||
empty sitesdb will be created if none is found. | |||
""" | |||
if not config.is_loaded(): | |||
self._load_config() | |||
# Someone specified a project without a lang, or vice versa: | |||
if (project and not lang) or (not project and lang): | |||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||
raise TypeError(e) | |||
# No args given, so return our default site: | |||
if not name and not project and not lang: | |||
try: | |||
default = config.wiki["defaultSite"] | |||
except KeyError: | |||
e = "Default site is not specified in config." | |||
raise SiteNotFoundError(e) | |||
return self._make_site_object(default) | |||
# Name arg given, but don't look at others unless `name` isn't found: | |||
if name: | |||
try: | |||
return self._make_site_object(name) | |||
except SiteNotFoundError: | |||
if project and lang: | |||
name = self._get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return self._make_site_object(name) | |||
raise | |||
# If we end up here, then project and lang are the only args given: | |||
name = self._get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return self._make_site_object(name) | |||
e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) | |||
raise SiteNotFoundError(e) | |||
def add_site(self, project=None, lang=None, base_url=None, | |||
script_path="/w", sql=None): | |||
"""Add a site to the sitesdb so it can be retrieved with get_site(). | |||
If only a project and a lang are given, we'll guess the base_url as | |||
"//{lang}.{project}.org" (which is protocol-relative, becoming 'https' | |||
if 'useHTTPS' is True in config otherwise 'http'). If this is wrong, | |||
provide the correct base_url as an argument (in which case project and | |||
lang are ignored). Most wikis use "/w" as the script path (meaning the | |||
API is located at "{base_url}{script_path}/api.php" -> | |||
"//{lang}.{project}.org/w/api.php"), so this is the default. If your | |||
wiki is different, provide the script_path as an argument. The only | |||
other argument to Site() that we can't get from config files or by | |||
querying the wiki itself is SQL connection info, so provide a dict of | |||
kwargs as `sql` and Site will pass it to oursql.connect(**sql), | |||
allowing you to make queries with site.sql_query(). | |||
Returns True if the site was added successfully or False if the site is | |||
already in our sitesdb (this can be done purposefully to update old | |||
site info). Raises SiteNotFoundError if not enough information has | |||
been provided to identify the site (e.g. a project but not a lang). | |||
""" | |||
if not config.is_loaded(): | |||
self._load_config() | |||
if not base_url: | |||
if not project or not lang: | |||
e = "Without a base_url, both a project and a lang must be given." | |||
raise SiteNotFoundError(e) | |||
base_url = "//{0}.{1}.org".format(lang, project) | |||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||
cookiejar = self._get_cookiejar() | |||
user_agent = config.wiki.get("userAgent") | |||
use_https = config.wiki.get("useHTTPS", False) | |||
assert_edit = config.wiki.get("assert") | |||
maxlag = config.wiki.get("maxlag") | |||
search_config = config.wiki.get("search") | |||
# Create a temp Site object to log in and load the other attributes: | |||
site = Site(base_url=base_url, script_path=script_path, sql=sql, | |||
login=login, cookiejar=cookiejar, user_agent=user_agent, | |||
use_https=use_https, assert_edit=assert_edit, | |||
maxlag=maxlag, search_config=search_config) | |||
self._add_site_to_sitesdb(site) | |||
return site | |||
def remove_site(self, name=None, project=None, lang=None): | |||
"""Remove a site from the sitesdb. | |||
Returns True if the site was removed successfully or False if the site | |||
was not in our sitesdb originally. If all three args (name, project, | |||
and lang) are given, we'll first try 'name' and then try the latter two | |||
if 'name' wasn't found in the database. Raises TypeError if a project | |||
was given but not a language, or vice versa. Will create an empty | |||
sitesdb if none was found. | |||
""" | |||
if not config.is_loaded(): | |||
self._load_config() | |||
# Someone specified a project without a lang, or vice versa: | |||
if (project and not lang) or (not project and lang): | |||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||
raise TypeError(e) | |||
if name: | |||
was_removed = self._remove_site_from_sitesdb(name) | |||
if not was_removed: | |||
if project and lang: | |||
name = self._get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return self._remove_site_from_sitesdb(name) | |||
return was_removed | |||
if project and lang: | |||
name = self._get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return self._remove_site_from_sitesdb(name) | |||
return False | |||
_root = path.split(path.split(path.dirname(path.abspath(__file__)))[0])[0] | |||
_dbfile = path.join(_root, "sites.db") | |||
_manager = SitesDBManager(_dbfile) | |||
del _root, _dbfile | |||
get_site = _manager.get_site | |||
add_site = _manager.add_site | |||
remove_site = _manager.remove_site |