@@ -1,19 +1,11 @@ | |||||
# Ignore python bytecode: | |||||
*.pyc | |||||
# Ignore bot-specific config file: | |||||
config.yml | |||||
# Ignore logs directory: | |||||
# Ignore bot-specific files: | |||||
logs/ | logs/ | ||||
# Ignore cookies file: | |||||
config.yml | |||||
sites.db | |||||
.cookies | .cookies | ||||
# Ignore OS X's crud: | |||||
.DS_Store | |||||
# Ignore python bytecode: | |||||
*.pyc | |||||
# Ignore pydev's nonsense: | |||||
.project | |||||
.pydevproject | |||||
.settings/ | |||||
# Ignore OS X's stuff: | |||||
.DS_Store |
@@ -176,7 +176,7 @@ class _BotConfig(object): | |||||
return self._root_dir | return self._root_dir | ||||
@property | @property | ||||
def config_path(self): | |||||
def path(self): | |||||
return self._config_path | return self._config_path | ||||
@property | @property | ||||
@@ -89,7 +89,7 @@ class Watcher(IRCConnection): | |||||
return | return | ||||
module = imp.new_module("_rc_event_processing_rules") | module = imp.new_module("_rc_event_processing_rules") | ||||
try: | try: | ||||
exec compile(rules, config.config_path, "exec") in module.__dict__ | |||||
exec compile(rules, config.path, "exec") in module.__dict__ | |||||
except Exception: | except Exception: | ||||
e = "Could not compile config file's RC event rules" | e = "Could not compile config file's RC event rules" | ||||
self.logger.exception(e) | self.logger.exception(e) | ||||
@@ -36,9 +36,9 @@ logger.addHandler(_log.NullHandler()) | |||||
from earwigbot.wiki.constants import * | from earwigbot.wiki.constants import * | ||||
from earwigbot.wiki.exceptions import * | from earwigbot.wiki.exceptions import * | ||||
from earwigbot.wiki.functions import * | |||||
from earwigbot.wiki.category import Category | from earwigbot.wiki.category import Category | ||||
from earwigbot.wiki.page import Page | from earwigbot.wiki.page import Page | ||||
from earwigbot.wiki.site import Site | from earwigbot.wiki.site import Site | ||||
from earwigbot.wiki.sitesdb import get_site, add_site, remove_site | |||||
from earwigbot.wiki.user import User | from earwigbot.wiki.user import User |
@@ -1,368 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 by Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
""" | |||||
EarwigBot's Wiki Toolset: Misc Functions | |||||
This module, a component of the wiki package, contains miscellaneous functions | |||||
that are not methods of any class. Currently, it contains get_site(), | |||||
add_site(), and remove_site(). These functions act as bridges between the bot's | |||||
config files and Site objects. | |||||
There's no need to import this module explicitly. All functions here are | |||||
automatically available from earwigbot.wiki. | |||||
""" | |||||
from cookielib import LWPCookieJar, LoadError | |||||
import errno | |||||
from getpass import getpass | |||||
from os import chmod, path | |||||
import platform | |||||
import stat | |||||
import sqlite3 as sqlite | |||||
from earwigbot import __version__ | |||||
from earwigbot.config import config | |||||
from earwigbot.wiki.exceptions import SiteNotFoundError | |||||
from earwigbot.wiki.site import Site | |||||
__all__ = ["get_site", "add_site", "remove_site"] | |||||
_cookiejar = None | |||||
_sitesdb = "sites.db" | |||||
def _load_config(): | |||||
"""Load the bot's config. | |||||
Called by a config-requiring function, such as get_site(), when config | |||||
has not been loaded. This will usually happen only if we're running code | |||||
directly from Python's interpreter and not the bot itself, because | |||||
bot.py and earwigbot.runner will already call these functions. | |||||
""" | |||||
is_encrypted = config.load() | |||||
if is_encrypted: # Passwords in the config file are encrypted | |||||
key = getpass("Enter key to unencrypt bot passwords: ") | |||||
config._decryption_key = key | |||||
config.decrypt(config.wiki, "password") | |||||
def _get_cookiejar(): | |||||
"""Return a LWPCookieJar object loaded from our .cookies file. | |||||
The same .cookies file is returned every time, located in the project root, | |||||
same directory as config.yml and bot.py. If it doesn't exist, we will | |||||
create the file and set it to be readable and writeable only by us. If it | |||||
exists but the information inside is bogus, we will ignore it. | |||||
This is normally called by _make_site_object() (in turn called by | |||||
get_site()), and the cookiejar is passed to our Site's constructor, used | |||||
when it makes API queries. This way, we can easily preserve cookies between | |||||
sites (e.g., for CentralAuth), making logins easier. | |||||
""" | |||||
global _cookiejar | |||||
if _cookiejar: | |||||
return _cookiejar | |||||
cookie_file = path.join(config.root_dir, ".cookies") | |||||
_cookiejar = LWPCookieJar(cookie_file) | |||||
try: | |||||
_cookiejar.load() | |||||
except LoadError: | |||||
pass # File contains bad data, so ignore it completely | |||||
except IOError as e: | |||||
if e.errno == errno.ENOENT: # "No such file or directory" | |||||
# Create the file and restrict reading/writing only to the owner, | |||||
# so others can't peak at our cookies: | |||||
open(cookie_file, "w").close() | |||||
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) | |||||
else: | |||||
raise | |||||
return _cookiejar | |||||
def _create_sitesdb(): | |||||
"""Initialize the sitesdb file with its three necessary tables.""" | |||||
script = """ | |||||
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, | |||||
site_article_path, site_script_path); | |||||
CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value); | |||||
CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name); | |||||
""" | |||||
with sqlite.connect(_sitesdb) as conn: | |||||
conn.executescript(script) | |||||
def _load_site_from_sitesdb(name): | |||||
"""Return all information stored in the sitesdb relating to site 'name'. | |||||
The information will be returned as a tuple, containing the site's project, | |||||
language, base URL, article path, script path, SQL connection data, and | |||||
namespaces, in that order. If the site is not found in the database, | |||||
SiteNotFoundError will be raised. An empty database will be created before | |||||
the exception is raised if none exists. | |||||
""" | |||||
query1 = "SELECT * FROM sites WHERE site_name = ?" | |||||
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" | |||||
query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?" | |||||
error = "Site '{0}' not found in the sitesdb.".format(name) | |||||
with sqlite.connect(_sitesdb) as conn: | |||||
try: | |||||
site_data = conn.execute(query1, (name,)).fetchone() | |||||
except sqllite.OperationalError: | |||||
_create_sitesdb() | |||||
raise SiteNotFoundError(error) | |||||
if not site_data: | |||||
raise SiteNotFoundError(error) | |||||
sql_data = conn.execute(query2, (name,)).fetchall() | |||||
ns_data = conn.execute(query3, (name,)).fetchall() | |||||
project, lang, base_url, article_path, script_path = site_data | |||||
sql = dict(sql_data) | |||||
namespaces = {} | |||||
for ns_id, ns_name, ns_is_primary_name in ns_data: | |||||
try: | |||||
if ns_is_primary_name: # "Primary" name goes first in list | |||||
namespaces[ns_id].insert(0, ns_name) | |||||
else: # Ordering of the aliases doesn't matter | |||||
namespaces[ns_id].append(ns_name) | |||||
except KeyError: | |||||
namespaces[ns_id] = [ns_name] | |||||
return project, lang, base_url, article_path, script_path, sql, namespaces | |||||
def _make_site_object(name): | |||||
"""Return a Site object associated with the site 'name' in our sitesdb. | |||||
This calls _load_site_from_sitesdb(), so SiteNotFoundError will be raised | |||||
if the site is not in our sitesdb. | |||||
""" | |||||
(project, lang, base_url, article_path, script_path, sql, | |||||
namespaces) = _load_site_from_sitesdb(name) | |||||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||||
cookiejar = _get_cookiejar() | |||||
user_agent = config.wiki.get("userAgent") | |||||
assert_edit = config.wiki.get("assert") | |||||
maxlag = config.wiki.get("maxlag") | |||||
search_config = config.wiki.get("search") | |||||
if user_agent: | |||||
user_agent = user_agent.replace("$1", __version__) | |||||
user_agent = user_agent.replace("$2", platform.python_version()) | |||||
return Site(name=name, project=project, lang=lang, base_url=base_url, | |||||
article_path=article_path, script_path=script_path, sql=sql, | |||||
namespaces=namespaces, login=login, cookiejar=cookiejar, | |||||
user_agent=user_agent, assert_edit=assert_edit, maxlag=maxlag, | |||||
search_config=search_config) | |||||
def _get_site_name_from_sitesdb(project, lang): | |||||
"""Return the name of the first site with the specified project and lang. | |||||
If the site is not found, return None. An empty sitesdb will be created if | |||||
none exists. | |||||
""" | |||||
query = "SELECT site_name FROM site WHERE site_project = ? and site_lang = ?" | |||||
with sqlite.connect(_sitesdb) as conn: | |||||
try: | |||||
return conn.execute(query, (project, lang)).fetchone() | |||||
except sqllite.OperationalError: | |||||
_create_sitesdb() | |||||
def _add_site_to_sitesdb(site): | |||||
"""Extract relevant info from a Site object and add it to the sitesdb. | |||||
Works like a reverse _load_site_from_sitesdb(); the site's project, | |||||
language, base URL, article path, script path, SQL connection data, and | |||||
namespaces are extracted from the site and inserted into the sites | |||||
database. If the sitesdb doesn't exist, we'll create it first. | |||||
""" | |||||
name = site.name | |||||
sites_data = (name, site.project, site.lang, site._base_url, | |||||
site._article_path, site._script_path) | |||||
sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()] | |||||
ns_data = [] | |||||
for ns_id, ns_names in site._namespaces.iteritems(): | |||||
ns_data.append((name, ns_id, ns_names.pop(0), True)) | |||||
for ns_name in ns_names: | |||||
ns_data.append((name, ns_id, ns_name, False)) | |||||
with sqlite.connect(_sitesdb) as conn: | |||||
check_exists = "SELECT 1 FROM sites WHERE site_name = ?" | |||||
try: | |||||
exists = conn.execute(check_exists, (name,)).fetchone() | |||||
except sqlite.OperationalError: | |||||
_create_sitesdb() | |||||
else: | |||||
if exists: | |||||
conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||||
conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data) | |||||
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) | |||||
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) | |||||
def _remove_site_from_sitesdb(name): | |||||
"""Remove a site by name from the sitesdb.""" | |||||
with sqlite.connect(_sitesdb) as conn: | |||||
cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||||
if cursor.rowcount == 0: | |||||
return False | |||||
else: | |||||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||||
return True | |||||
def get_site(name=None, project=None, lang=None): | |||||
"""Return a Site instance based on information from the sitesdb. | |||||
With no arguments, return the default site as specified by our config | |||||
file. This is config.wiki["defaultSite"]. | |||||
With 'name' specified, return the site with that name. This is equivalent | |||||
to the site's 'wikiid' in the API, like 'enwiki'. | |||||
With 'project' and 'lang' specified, return the site whose project and | |||||
language match these values. If there are multiple sites with the same | |||||
values (unlikely), this is not a reliable way of loading a site. Call the | |||||
function with an explicit 'name' in that case. | |||||
We will attempt to login to the site automatically using | |||||
config.wiki["username"] and config.wiki["password"] if both are defined. | |||||
Specifying a project without a lang or a lang without a project will raise | |||||
TypeError. If all three args are specified, 'name' will be first tried, | |||||
then 'project' and 'lang' if 'name' doesn't work. If a site cannot be found | |||||
in the sitesdb, SiteNotFoundError will be raised. An empty sitesdb will be | |||||
created if none is found. | |||||
""" | |||||
if not config.is_loaded(): | |||||
_load_config() | |||||
# Someone specified a project without a lang (or a lang without a project)! | |||||
if (project and not lang) or (not project and lang): | |||||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||||
raise TypeError(e) | |||||
# No args given, so return our default site: | |||||
if not name and not project and not lang: | |||||
try: | |||||
default = config.wiki["defaultSite"] | |||||
except KeyError: | |||||
e = "Default site is not specified in config." | |||||
raise SiteNotFoundError(e) | |||||
return _make_site_object(default) | |||||
# Name arg given, but don't look at others unless `name` isn't found: | |||||
if name: | |||||
try: | |||||
return _make_site_object(name) | |||||
except SiteNotFoundError: | |||||
if project and lang: | |||||
name = _get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return _make_site_object(name) | |||||
raise | |||||
# If we end up here, then project and lang are the only args given: | |||||
name = _get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return _make_site_object(name) | |||||
e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) | |||||
raise SiteNotFoundError(e) | |||||
def add_site(project=None, lang=None, base_url=None, script_path="/w", | |||||
sql=None): | |||||
"""Add a site to the sitesdb so it can be retrieved with get_site() later. | |||||
If only a project and a lang are given, we'll guess the base_url as | |||||
"http://{lang}.{project}.org". If this is wrong, provide the correct | |||||
base_url as an argument (in which case project and lang are ignored). Most | |||||
wikis use "/w" as the script path (meaning the API is located at | |||||
"{base_url}{script_path}/api.php" -> "http://{lang}.{project}.org/w/api.php"), | |||||
so this is the default. If your wiki is different, provide the script_path | |||||
as an argument. The only other argument to Site() that we can't get from | |||||
config files or by querying the wiki itself is SQL connection info, so | |||||
provide a dict of kwargs as `sql` and Site will be pass it to | |||||
oursql.connect(**sql), allowing you to make queries with site.sql_query(). | |||||
Returns True if the site was added successfully or False if the site is | |||||
already in our sitesdb (this can be done purposefully to update old site | |||||
info). Raises SiteNotFoundError if not enough information has been provided | |||||
to identify the site (e.g. a project but not a lang). | |||||
""" | |||||
if not config.is_loaded(): | |||||
_load_config() | |||||
if not base_url: | |||||
if not project or not lang: | |||||
e = "Without a base_url, both a project and a lang must be given." | |||||
raise SiteNotFoundError(e) | |||||
base_url = "http://{0}.{1}.org".format(lang, project) | |||||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||||
cookiejar = _get_cookiejar() | |||||
user_agent = config.wiki.get("userAgent") | |||||
assert_edit = config.wiki.get("assert") | |||||
maxlag = config.wiki.get("maxlag") | |||||
search_config = config.wiki.get("search") | |||||
# Create a temp Site object to log in and load the other attributes: | |||||
site = Site(base_url=base_url, script_path=script_path, sql=sql, | |||||
login=login, cookiejar=cookiejar, user_agent=user_agent, | |||||
assert_edit=assert_edit, maxlag=maxlag, | |||||
search_config=search_config) | |||||
_add_site_to_sitesdb(site) | |||||
return site | |||||
def remove_site(name=None, project=None, lang=None): | |||||
"""Remove a site from the sitesdb. | |||||
Returns True if the site was removed successfully or False if the site was | |||||
not in our sitesdb originally. If all three args (name, project, and lang) | |||||
are given, we'll first try 'name' and then try the latter two if 'name' | |||||
wasn't found in the database. Raises TypeError if a project was given but | |||||
not a language, or vice versa. Will create an empty sitesdb if none was | |||||
found. | |||||
""" | |||||
if not config.is_loaded(): | |||||
_load_config() | |||||
# Someone specified a project without a lang (or a lang without a project)! | |||||
if (project and not lang) or (not project and lang): | |||||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||||
raise TypeError(e) | |||||
if name: | |||||
was_removed = _remove_site_from_sitesdb(name) | |||||
if not was_removed: | |||||
if project and lang: | |||||
name = _get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return _remove_site_from_sitesdb(name) | |||||
return was_removed | |||||
if project and lang: | |||||
name = _get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return _remove_site_from_sitesdb(name) | |||||
return False |
@@ -71,8 +71,8 @@ class Site(object): | |||||
def __init__(self, name=None, project=None, lang=None, base_url=None, | def __init__(self, name=None, project=None, lang=None, base_url=None, | ||||
article_path=None, script_path=None, sql=None, | article_path=None, script_path=None, sql=None, | ||||
namespaces=None, login=(None, None), cookiejar=None, | namespaces=None, login=(None, None), cookiejar=None, | ||||
user_agent=None, assert_edit=None, maxlag=None, | |||||
search_config=(None, None)): | |||||
user_agent=None, use_https=False, assert_edit=None, | |||||
maxlag=None, search_config=(None, None)): | |||||
"""Constructor for new Site instances. | """Constructor for new Site instances. | ||||
This probably isn't necessary to call yourself unless you're building a | This probably isn't necessary to call yourself unless you're building a | ||||
@@ -100,7 +100,8 @@ class Site(object): | |||||
self._script_path = script_path | self._script_path = script_path | ||||
self._namespaces = namespaces | self._namespaces = namespaces | ||||
# Attributes used for API queries: | |||||
# Attributes used for API queries: | |||||
self._use_https = use_https | |||||
self._assert_edit = assert_edit | self._assert_edit = assert_edit | ||||
self._maxlag = maxlag | self._maxlag = maxlag | ||||
self._max_retries = 5 | self._max_retries = 5 | ||||
@@ -138,10 +139,10 @@ class Site(object): | |||||
res = ", ".join(( | res = ", ".join(( | ||||
"Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", | "Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", | ||||
"base_url={_base_url!r}", "article_path={_article_path!r}", | "base_url={_base_url!r}", "article_path={_article_path!r}", | ||||
"script_path={_script_path!r}", "assert_edit={_assert_edit!r}", | |||||
"maxlag={_maxlag!r}", "sql={_sql!r}", "login={0}", | |||||
"user_agent={2!r}", "cookiejar={1})" | |||||
)) | |||||
"script_path={_script_path!r}", "use_https={_use_https!r}", | |||||
"assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}", | |||||
"sql={_sql_data!r}", "login={0}", "user_agent={2!r}", | |||||
"cookiejar={1})")) | |||||
name, password = self._login_info | name, password = self._login_info | ||||
login = "({0}, {1})".format(repr(name), "hidden" if password else None) | login = "({0}, {1})".format(repr(name), "hidden" if password else None) | ||||
cookies = self._cookiejar.__class__.__name__ | cookies = self._cookiejar.__class__.__name__ | ||||
@@ -163,7 +164,9 @@ class Site(object): | |||||
This will first attempt to construct an API url from self._base_url and | This will first attempt to construct an API url from self._base_url and | ||||
self._script_path. We need both of these, or else we'll raise | self._script_path. We need both of these, or else we'll raise | ||||
SiteAPIError. | |||||
SiteAPIError. If self._base_url is protocol-relative (introduced in | |||||
MediaWiki 1.18), we'll choose HTTPS if self._user_https is True, | |||||
otherwise HTTP. | |||||
We'll encode the given params, adding format=json along the way, as | We'll encode the given params, adding format=json along the way, as | ||||
well as &assert= and &maxlag= based on self._assert_edit and _maxlag. | well as &assert= and &maxlag= based on self._assert_edit and _maxlag. | ||||
@@ -185,7 +188,13 @@ class Site(object): | |||||
e = "Tried to do an API query, but no API URL is known." | e = "Tried to do an API query, but no API URL is known." | ||||
raise SiteAPIError(e) | raise SiteAPIError(e) | ||||
url = ''.join((self._base_url, self._script_path, "/api.php")) | |||||
base_url = self._base_url | |||||
if base_url.startswith("//"): # Protocol-relative URLs from 1.18 | |||||
if self._use_https: | |||||
base_url = "https:" + base_url | |||||
else: | |||||
base_url = "http:" + base_url | |||||
url = ''.join((base_url, self._script_path, "/api.php")) | |||||
params["format"] = "json" # This is the only format we understand | params["format"] = "json" # This is the only format we understand | ||||
if self._assert_edit: # If requested, ensure that we're logged in | if self._assert_edit: # If requested, ensure that we're logged in | ||||
@@ -194,7 +203,6 @@ class Site(object): | |||||
params["maxlag"] = self._maxlag | params["maxlag"] = self._maxlag | ||||
data = urlencode(params) | data = urlencode(params) | ||||
logger.debug("{0} -> {1}".format(url, data)) | logger.debug("{0} -> {1}".format(url, data)) | ||||
try: | try: | ||||
@@ -0,0 +1,392 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 by Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from cookielib import LWPCookieJar, LoadError | |||||
import errno | |||||
from getpass import getpass | |||||
from os import chmod, path | |||||
from platform import python_version | |||||
import stat | |||||
import sqlite3 as sqlite | |||||
from earwigbot import __version__ | |||||
from earwigbot.config import config | |||||
from earwigbot.wiki.exceptions import SiteNotFoundError | |||||
from earwigbot.wiki.site import Site | |||||
__all__ = ["SitesDBManager", "get_site", "add_site", "remove_site"] | |||||
class SitesDBManager(object): | |||||
""" | |||||
EarwigBot's Wiki Toolset: Sites Database Manager | |||||
This class controls the sites.db file, which stores information about all | |||||
wiki sites known to the bot. Three public methods act as bridges between | |||||
the bot's config files and Site objects: | |||||
get_site -- returns a Site object corresponding to a given site name | |||||
add_site -- stores a site in the database, given connection info | |||||
remove_site -- removes a site from the database, given its name | |||||
There's usually no need to use this class directly. All public methods | |||||
here are available as earwigbot.wiki.get_site(), earwigbot.wiki.add_site(), | |||||
and earwigbot.wiki.remove_site(), which use a sites.db file located in the | |||||
same directory as our config.yml file. Lower-level access can be achieved | |||||
by importing the manager class | |||||
(`from earwigbot.wiki.sitesdb import SitesDBManager`). | |||||
""" | |||||
def __init__(self, db_file): | |||||
"""Set up the manager with an attribute for the sitesdb filename.""" | |||||
self._cookiejar = None | |||||
self._sitesdb = db_file | |||||
def _load_config(self): | |||||
"""Load the bot's config. | |||||
Called by a config-requiring function, such as get_site(), when config | |||||
has not been loaded. This will usually happen only if we're running | |||||
code directly from Python's interpreter and not the bot itself, because | |||||
bot.py and earwigbot.runner will already call these functions. | |||||
""" | |||||
is_encrypted = config.load() | |||||
if is_encrypted: # Passwords in the config file are encrypted | |||||
key = getpass("Enter key to unencrypt bot passwords: ") | |||||
config._decryption_key = key | |||||
config.decrypt(config.wiki, "password") | |||||
def _get_cookiejar(self): | |||||
"""Return a LWPCookieJar object loaded from our .cookies file. | |||||
The same .cookies file is returned every time, located in the project | |||||
root, same directory as config.yml and bot.py. If it doesn't exist, we | |||||
will create the file and set it to be readable and writeable only by | |||||
us. If it exists but the information inside is bogus, we'll ignore it. | |||||
This is normally called by _make_site_object() (in turn called by | |||||
get_site()), and the cookiejar is passed to our Site's constructor, | |||||
used when it makes API queries. This way, we can easily preserve | |||||
cookies between sites (e.g., for CentralAuth), making logins easier. | |||||
""" | |||||
if self._cookiejar: | |||||
return self._cookiejar | |||||
cookie_file = path.join(config.root_dir, ".cookies") | |||||
self._cookiejar = LWPCookieJar(cookie_file) | |||||
try: | |||||
self._cookiejar.load() | |||||
except LoadError: | |||||
pass # File contains bad data, so ignore it completely | |||||
except IOError as e: | |||||
if e.errno == errno.ENOENT: # "No such file or directory" | |||||
# Create the file and restrict reading/writing only to the | |||||
# owner, so others can't peak at our cookies: | |||||
open(cookie_file, "w").close() | |||||
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) | |||||
else: | |||||
raise | |||||
return self._cookiejar | |||||
def _create_sitesdb(self): | |||||
"""Initialize the sitesdb file with its three necessary tables.""" | |||||
script = """ | |||||
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, | |||||
site_article_path, site_script_path); | |||||
CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value); | |||||
CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name); | |||||
""" | |||||
with sqlite.connect(self._sitesdb) as conn: | |||||
conn.executescript(script) | |||||
def _load_site_from_sitesdb(self, name): | |||||
"""Return all information stored in the sitesdb relating to given site. | |||||
The information will be returned as a tuple, containing the site's | |||||
name, project, language, base URL, article path, script path, SQL | |||||
connection data, and namespaces, in that order. If the site is not | |||||
found in the database, SiteNotFoundError will be raised. An empty | |||||
database will be created before the exception is raised if none exists. | |||||
""" | |||||
query1 = "SELECT * FROM sites WHERE site_name = ?" | |||||
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" | |||||
query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?" | |||||
error = "Site '{0}' not found in the sitesdb.".format(name) | |||||
with sqlite.connect(self._sitesdb) as conn: | |||||
try: | |||||
site_data = conn.execute(query1, (name,)).fetchone() | |||||
except sqlite.OperationalError: | |||||
self._create_sitesdb() | |||||
raise SiteNotFoundError(error) | |||||
if not site_data: | |||||
raise SiteNotFoundError(error) | |||||
sql_data = conn.execute(query2, (name,)).fetchall() | |||||
ns_data = conn.execute(query3, (name,)).fetchall() | |||||
name, project, lang, base_url, article_path, script_path = site_data | |||||
sql = dict(sql_data) | |||||
namespaces = {} | |||||
for ns_id, ns_name, ns_is_primary_name in ns_data: | |||||
try: | |||||
if ns_is_primary_name: # "Primary" name goes first in list | |||||
namespaces[ns_id].insert(0, ns_name) | |||||
else: # Ordering of the aliases doesn't matter | |||||
namespaces[ns_id].append(ns_name) | |||||
except KeyError: | |||||
namespaces[ns_id] = [ns_name] | |||||
return (name, project, lang, base_url, article_path, script_path, sql, | |||||
namespaces) | |||||
def _make_site_object(self, name): | |||||
"""Return a Site object associated with the site 'name' in our sitesdb. | |||||
This calls _load_site_from_sitesdb(), so SiteNotFoundError will be | |||||
raised if the site is not in our sitesdb. | |||||
""" | |||||
(name, project, lang, base_url, article_path, script_path, sql, | |||||
namespaces) = self._load_site_from_sitesdb(name) | |||||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||||
cookiejar = self._get_cookiejar() | |||||
user_agent = config.wiki.get("userAgent") | |||||
use_https = config.wiki.get("useHTTPS", False) | |||||
assert_edit = config.wiki.get("assert") | |||||
maxlag = config.wiki.get("maxlag") | |||||
search_config = config.wiki.get("search") | |||||
if user_agent: | |||||
user_agent = user_agent.replace("$1", __version__) | |||||
user_agent = user_agent.replace("$2", python_version()) | |||||
return Site(name=name, project=project, lang=lang, base_url=base_url, | |||||
article_path=article_path, script_path=script_path, | |||||
sql=sql, namespaces=namespaces, login=login, | |||||
cookiejar=cookiejar, user_agent=user_agent, | |||||
use_https=use_https, assert_edit=assert_edit, | |||||
maxlag=maxlag, search_config=search_config) | |||||
def _get_site_name_from_sitesdb(self, project, lang): | |||||
"""Return the name of the first site with the given project and lang. | |||||
If the site is not found, return None. An empty sitesdb will be created | |||||
if none exists. | |||||
""" | |||||
query = "SELECT site_name FROM site WHERE site_project = ? and site_lang = ?" | |||||
with sqlite.connect(self._sitesdb) as conn: | |||||
try: | |||||
return conn.execute(query, (project, lang)).fetchone() | |||||
except sqlite.OperationalError: | |||||
self._create_sitesdb() | |||||
def _add_site_to_sitesdb(self, site): | |||||
"""Extract relevant info from a Site object and add it to the sitesdb. | |||||
Works like a reverse _load_site_from_sitesdb(); the site's project, | |||||
language, base URL, article path, script path, SQL connection data, and | |||||
namespaces are extracted from the site and inserted into the sites | |||||
database. If the sitesdb doesn't exist, we'll create it first. | |||||
""" | |||||
name = site.name() | |||||
sites_data = (name, site.project(), site.lang(), site._base_url, | |||||
site._article_path, site._script_path) | |||||
sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()] | |||||
ns_data = [] | |||||
for ns_id, ns_names in site._namespaces.iteritems(): | |||||
ns_data.append((name, ns_id, ns_names.pop(0), True)) | |||||
for ns_name in ns_names: | |||||
ns_data.append((name, ns_id, ns_name, False)) | |||||
with sqlite.connect(self._sitesdb) as conn: | |||||
check_exists = "SELECT 1 FROM sites WHERE site_name = ?" | |||||
try: | |||||
exists = conn.execute(check_exists, (name,)).fetchone() | |||||
except sqlite.OperationalError: | |||||
self._create_sitesdb() | |||||
else: | |||||
if exists: | |||||
conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||||
conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data) | |||||
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) | |||||
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) | |||||
def _remove_site_from_sitesdb(self, name): | |||||
"""Remove a site by name from the sitesdb.""" | |||||
with sqlite.connect(self._sitesdb) as conn: | |||||
cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||||
if cursor.rowcount == 0: | |||||
return False | |||||
else: | |||||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||||
return True | |||||
def get_site(self, name=None, project=None, lang=None): | |||||
"""Return a Site instance based on information from the sitesdb. | |||||
With no arguments, return the default site as specified by our config | |||||
file. This is config.wiki["defaultSite"]. | |||||
With 'name' specified, return the site with that name. This is | |||||
equivalent to the site's 'wikiid' in the API, like 'enwiki'. | |||||
With 'project' and 'lang' specified, return the site whose project and | |||||
language match these values. If there are multiple sites with the same | |||||
values (unlikely), this is not a reliable way of loading a site. Call | |||||
the function with an explicit 'name' in that case. | |||||
We will attempt to login to the site automatically using | |||||
config.wiki["username"] and config.wiki["password"] if both are | |||||
defined. | |||||
Specifying a project without a lang or a lang without a project will | |||||
raise TypeError. If all three args are specified, 'name' will be first | |||||
tried, then 'project' and 'lang' if 'name' doesn't work. If a site | |||||
cannot be found in the sitesdb, SiteNotFoundError will be raised. An | |||||
empty sitesdb will be created if none is found. | |||||
""" | |||||
if not config.is_loaded(): | |||||
self._load_config() | |||||
# Someone specified a project without a lang, or vice versa: | |||||
if (project and not lang) or (not project and lang): | |||||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||||
raise TypeError(e) | |||||
# No args given, so return our default site: | |||||
if not name and not project and not lang: | |||||
try: | |||||
default = config.wiki["defaultSite"] | |||||
except KeyError: | |||||
e = "Default site is not specified in config." | |||||
raise SiteNotFoundError(e) | |||||
return self._make_site_object(default) | |||||
# Name arg given, but don't look at others unless `name` isn't found: | |||||
if name: | |||||
try: | |||||
return self._make_site_object(name) | |||||
except SiteNotFoundError: | |||||
if project and lang: | |||||
name = self._get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return self._make_site_object(name) | |||||
raise | |||||
# If we end up here, then project and lang are the only args given: | |||||
name = self._get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return self._make_site_object(name) | |||||
e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) | |||||
raise SiteNotFoundError(e) | |||||
def add_site(self, project=None, lang=None, base_url=None, | |||||
script_path="/w", sql=None): | |||||
"""Add a site to the sitesdb so it can be retrieved with get_site(). | |||||
If only a project and a lang are given, we'll guess the base_url as | |||||
"//{lang}.{project}.org" (which is protocol-relative, becoming 'https' | |||||
if 'useHTTPS' is True in config otherwise 'http'). If this is wrong, | |||||
provide the correct base_url as an argument (in which case project and | |||||
lang are ignored). Most wikis use "/w" as the script path (meaning the | |||||
API is located at "{base_url}{script_path}/api.php" -> | |||||
"//{lang}.{project}.org/w/api.php"), so this is the default. If your | |||||
wiki is different, provide the script_path as an argument. The only | |||||
other argument to Site() that we can't get from config files or by | |||||
querying the wiki itself is SQL connection info, so provide a dict of | |||||
kwargs as `sql` and Site will pass it to oursql.connect(**sql), | |||||
allowing you to make queries with site.sql_query(). | |||||
Returns True if the site was added successfully or False if the site is | |||||
already in our sitesdb (this can be done purposefully to update old | |||||
site info). Raises SiteNotFoundError if not enough information has | |||||
been provided to identify the site (e.g. a project but not a lang). | |||||
""" | |||||
if not config.is_loaded(): | |||||
self._load_config() | |||||
if not base_url: | |||||
if not project or not lang: | |||||
e = "Without a base_url, both a project and a lang must be given." | |||||
raise SiteNotFoundError(e) | |||||
base_url = "//{0}.{1}.org".format(lang, project) | |||||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||||
cookiejar = self._get_cookiejar() | |||||
user_agent = config.wiki.get("userAgent") | |||||
use_https = config.wiki.get("useHTTPS", False) | |||||
assert_edit = config.wiki.get("assert") | |||||
maxlag = config.wiki.get("maxlag") | |||||
search_config = config.wiki.get("search") | |||||
# Create a temp Site object to log in and load the other attributes: | |||||
site = Site(base_url=base_url, script_path=script_path, sql=sql, | |||||
login=login, cookiejar=cookiejar, user_agent=user_agent, | |||||
use_https=use_https, assert_edit=assert_edit, | |||||
maxlag=maxlag, search_config=search_config) | |||||
self._add_site_to_sitesdb(site) | |||||
return site | |||||
def remove_site(self, name=None, project=None, lang=None): | |||||
"""Remove a site from the sitesdb. | |||||
Returns True if the site was removed successfully or False if the site | |||||
was not in our sitesdb originally. If all three args (name, project, | |||||
and lang) are given, we'll first try 'name' and then try the latter two | |||||
if 'name' wasn't found in the database. Raises TypeError if a project | |||||
was given but not a language, or vice versa. Will create an empty | |||||
sitesdb if none was found. | |||||
""" | |||||
if not config.is_loaded(): | |||||
self._load_config() | |||||
# Someone specified a project without a lang, or vice versa: | |||||
if (project and not lang) or (not project and lang): | |||||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||||
raise TypeError(e) | |||||
if name: | |||||
was_removed = self._remove_site_from_sitesdb(name) | |||||
if not was_removed: | |||||
if project and lang: | |||||
name = self._get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return self._remove_site_from_sitesdb(name) | |||||
return was_removed | |||||
if project and lang: | |||||
name = self._get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return self._remove_site_from_sitesdb(name) | |||||
return False | |||||
_root = path.split(path.split(path.dirname(path.abspath(__file__)))[0])[0] | |||||
_dbfile = path.join(_root, "sites.db") | |||||
_manager = SitesDBManager(_dbfile) | |||||
del _root, _dbfile | |||||
get_site = _manager.get_site | |||||
add_site = _manager.add_site | |||||
remove_site = _manager.remove_site |