@@ -1,19 +1,11 @@ | |||
# Ignore python bytecode: | |||
*.pyc | |||
# Ignore bot-specific config file: | |||
config.yml | |||
# Ignore logs directory: | |||
# Ignore bot-specific files: | |||
logs/ | |||
# Ignore cookies file: | |||
config.yml | |||
sites.db | |||
.cookies | |||
# Ignore OS X's crud: | |||
.DS_Store | |||
# Ignore python bytecode: | |||
*.pyc | |||
# Ignore pydev's nonsense: | |||
.project | |||
.pydevproject | |||
.settings/ | |||
# Ignore OS X's stuff: | |||
.DS_Store |
@@ -176,7 +176,7 @@ class _BotConfig(object): | |||
return self._root_dir | |||
@property | |||
def config_path(self): | |||
def path(self): | |||
return self._config_path | |||
@property | |||
@@ -89,7 +89,7 @@ class Watcher(IRCConnection): | |||
return | |||
module = imp.new_module("_rc_event_processing_rules") | |||
try: | |||
exec compile(rules, config.config_path, "exec") in module.__dict__ | |||
exec compile(rules, config.path, "exec") in module.__dict__ | |||
except Exception: | |||
e = "Could not compile config file's RC event rules" | |||
self.logger.exception(e) | |||
@@ -36,9 +36,9 @@ logger.addHandler(_log.NullHandler()) | |||
from earwigbot.wiki.constants import * | |||
from earwigbot.wiki.exceptions import * | |||
from earwigbot.wiki.functions import * | |||
from earwigbot.wiki.category import Category | |||
from earwigbot.wiki.page import Page | |||
from earwigbot.wiki.site import Site | |||
from earwigbot.wiki.sitesdb import get_site, add_site, remove_site | |||
from earwigbot.wiki.user import User |
@@ -1,368 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 by Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
""" | |||
EarwigBot's Wiki Toolset: Misc Functions | |||
This module, a component of the wiki package, contains miscellaneous functions | |||
that are not methods of any class. Currently, it contains get_site(), | |||
add_site(), and remove_site(). These functions act as bridges between the bot's | |||
config files and Site objects. | |||
There's no need to import this module explicitly. All functions here are | |||
automatically available from earwigbot.wiki. | |||
""" | |||
from cookielib import LWPCookieJar, LoadError | |||
import errno | |||
from getpass import getpass | |||
from os import chmod, path | |||
import platform | |||
import stat | |||
import sqlite3 as sqlite | |||
from earwigbot import __version__ | |||
from earwigbot.config import config | |||
from earwigbot.wiki.exceptions import SiteNotFoundError | |||
from earwigbot.wiki.site import Site | |||
__all__ = ["get_site", "add_site", "remove_site"] | |||
_cookiejar = None | |||
_sitesdb = "sites.db" | |||
def _load_config(): | |||
"""Load the bot's config. | |||
Called by a config-requiring function, such as get_site(), when config | |||
has not been loaded. This will usually happen only if we're running code | |||
directly from Python's interpreter and not the bot itself, because | |||
bot.py and earwigbot.runner will already call these functions. | |||
""" | |||
is_encrypted = config.load() | |||
if is_encrypted: # Passwords in the config file are encrypted | |||
key = getpass("Enter key to unencrypt bot passwords: ") | |||
config._decryption_key = key | |||
config.decrypt(config.wiki, "password") | |||
def _get_cookiejar(): | |||
"""Return a LWPCookieJar object loaded from our .cookies file. | |||
The same .cookies file is returned every time, located in the project root, | |||
same directory as config.yml and bot.py. If it doesn't exist, we will | |||
create the file and set it to be readable and writeable only by us. If it | |||
exists but the information inside is bogus, we will ignore it. | |||
This is normally called by _make_site_object() (in turn called by | |||
get_site()), and the cookiejar is passed to our Site's constructor, used | |||
when it makes API queries. This way, we can easily preserve cookies between | |||
sites (e.g., for CentralAuth), making logins easier. | |||
""" | |||
global _cookiejar | |||
if _cookiejar: | |||
return _cookiejar | |||
cookie_file = path.join(config.root_dir, ".cookies") | |||
_cookiejar = LWPCookieJar(cookie_file) | |||
try: | |||
_cookiejar.load() | |||
except LoadError: | |||
pass # File contains bad data, so ignore it completely | |||
except IOError as e: | |||
if e.errno == errno.ENOENT: # "No such file or directory" | |||
# Create the file and restrict reading/writing only to the owner, | |||
# so others can't peak at our cookies: | |||
open(cookie_file, "w").close() | |||
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) | |||
else: | |||
raise | |||
return _cookiejar | |||
def _create_sitesdb(): | |||
"""Initialize the sitesdb file with its three necessary tables.""" | |||
script = """ | |||
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, | |||
site_article_path, site_script_path); | |||
CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value); | |||
CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name); | |||
""" | |||
with sqlite.connect(_sitesdb) as conn: | |||
conn.executescript(script) | |||
def _load_site_from_sitesdb(name): | |||
"""Return all information stored in the sitesdb relating to site 'name'. | |||
The information will be returned as a tuple, containing the site's project, | |||
language, base URL, article path, script path, SQL connection data, and | |||
namespaces, in that order. If the site is not found in the database, | |||
SiteNotFoundError will be raised. An empty database will be created before | |||
the exception is raised if none exists. | |||
""" | |||
query1 = "SELECT * FROM sites WHERE site_name = ?" | |||
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" | |||
query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?" | |||
error = "Site '{0}' not found in the sitesdb.".format(name) | |||
with sqlite.connect(_sitesdb) as conn: | |||
try: | |||
site_data = conn.execute(query1, (name,)).fetchone() | |||
except sqllite.OperationalError: | |||
_create_sitesdb() | |||
raise SiteNotFoundError(error) | |||
if not site_data: | |||
raise SiteNotFoundError(error) | |||
sql_data = conn.execute(query2, (name,)).fetchall() | |||
ns_data = conn.execute(query3, (name,)).fetchall() | |||
project, lang, base_url, article_path, script_path = site_data | |||
sql = dict(sql_data) | |||
namespaces = {} | |||
for ns_id, ns_name, ns_is_primary_name in ns_data: | |||
try: | |||
if ns_is_primary_name: # "Primary" name goes first in list | |||
namespaces[ns_id].insert(0, ns_name) | |||
else: # Ordering of the aliases doesn't matter | |||
namespaces[ns_id].append(ns_name) | |||
except KeyError: | |||
namespaces[ns_id] = [ns_name] | |||
return project, lang, base_url, article_path, script_path, sql, namespaces | |||
def _make_site_object(name): | |||
"""Return a Site object associated with the site 'name' in our sitesdb. | |||
This calls _load_site_from_sitesdb(), so SiteNotFoundError will be raised | |||
if the site is not in our sitesdb. | |||
""" | |||
(project, lang, base_url, article_path, script_path, sql, | |||
namespaces) = _load_site_from_sitesdb(name) | |||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||
cookiejar = _get_cookiejar() | |||
user_agent = config.wiki.get("userAgent") | |||
assert_edit = config.wiki.get("assert") | |||
maxlag = config.wiki.get("maxlag") | |||
search_config = config.wiki.get("search") | |||
if user_agent: | |||
user_agent = user_agent.replace("$1", __version__) | |||
user_agent = user_agent.replace("$2", platform.python_version()) | |||
return Site(name=name, project=project, lang=lang, base_url=base_url, | |||
article_path=article_path, script_path=script_path, sql=sql, | |||
namespaces=namespaces, login=login, cookiejar=cookiejar, | |||
user_agent=user_agent, assert_edit=assert_edit, maxlag=maxlag, | |||
search_config=search_config) | |||
def _get_site_name_from_sitesdb(project, lang): | |||
"""Return the name of the first site with the specified project and lang. | |||
If the site is not found, return None. An empty sitesdb will be created if | |||
none exists. | |||
""" | |||
query = "SELECT site_name FROM site WHERE site_project = ? and site_lang = ?" | |||
with sqlite.connect(_sitesdb) as conn: | |||
try: | |||
return conn.execute(query, (project, lang)).fetchone() | |||
except sqllite.OperationalError: | |||
_create_sitesdb() | |||
def _add_site_to_sitesdb(site): | |||
"""Extract relevant info from a Site object and add it to the sitesdb. | |||
Works like a reverse _load_site_from_sitesdb(); the site's project, | |||
language, base URL, article path, script path, SQL connection data, and | |||
namespaces are extracted from the site and inserted into the sites | |||
database. If the sitesdb doesn't exist, we'll create it first. | |||
""" | |||
name = site.name | |||
sites_data = (name, site.project, site.lang, site._base_url, | |||
site._article_path, site._script_path) | |||
sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()] | |||
ns_data = [] | |||
for ns_id, ns_names in site._namespaces.iteritems(): | |||
ns_data.append((name, ns_id, ns_names.pop(0), True)) | |||
for ns_name in ns_names: | |||
ns_data.append((name, ns_id, ns_name, False)) | |||
with sqlite.connect(_sitesdb) as conn: | |||
check_exists = "SELECT 1 FROM sites WHERE site_name = ?" | |||
try: | |||
exists = conn.execute(check_exists, (name,)).fetchone() | |||
except sqlite.OperationalError: | |||
_create_sitesdb() | |||
else: | |||
if exists: | |||
conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||
conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data) | |||
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) | |||
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) | |||
def _remove_site_from_sitesdb(name): | |||
"""Remove a site by name from the sitesdb.""" | |||
with sqlite.connect(_sitesdb) as conn: | |||
cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||
if cursor.rowcount == 0: | |||
return False | |||
else: | |||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||
return True | |||
def get_site(name=None, project=None, lang=None): | |||
"""Return a Site instance based on information from the sitesdb. | |||
With no arguments, return the default site as specified by our config | |||
file. This is config.wiki["defaultSite"]. | |||
With 'name' specified, return the site with that name. This is equivalent | |||
to the site's 'wikiid' in the API, like 'enwiki'. | |||
With 'project' and 'lang' specified, return the site whose project and | |||
language match these values. If there are multiple sites with the same | |||
values (unlikely), this is not a reliable way of loading a site. Call the | |||
function with an explicit 'name' in that case. | |||
We will attempt to login to the site automatically using | |||
config.wiki["username"] and config.wiki["password"] if both are defined. | |||
Specifying a project without a lang or a lang without a project will raise | |||
TypeError. If all three args are specified, 'name' will be first tried, | |||
then 'project' and 'lang' if 'name' doesn't work. If a site cannot be found | |||
in the sitesdb, SiteNotFoundError will be raised. An empty sitesdb will be | |||
created if none is found. | |||
""" | |||
if not config.is_loaded(): | |||
_load_config() | |||
# Someone specified a project without a lang (or a lang without a project)! | |||
if (project and not lang) or (not project and lang): | |||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||
raise TypeError(e) | |||
# No args given, so return our default site: | |||
if not name and not project and not lang: | |||
try: | |||
default = config.wiki["defaultSite"] | |||
except KeyError: | |||
e = "Default site is not specified in config." | |||
raise SiteNotFoundError(e) | |||
return _make_site_object(default) | |||
# Name arg given, but don't look at others unless `name` isn't found: | |||
if name: | |||
try: | |||
return _make_site_object(name) | |||
except SiteNotFoundError: | |||
if project and lang: | |||
name = _get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return _make_site_object(name) | |||
raise | |||
# If we end up here, then project and lang are the only args given: | |||
name = _get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return _make_site_object(name) | |||
e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) | |||
raise SiteNotFoundError(e) | |||
def add_site(project=None, lang=None, base_url=None, script_path="/w", | |||
sql=None): | |||
"""Add a site to the sitesdb so it can be retrieved with get_site() later. | |||
If only a project and a lang are given, we'll guess the base_url as | |||
"http://{lang}.{project}.org". If this is wrong, provide the correct | |||
base_url as an argument (in which case project and lang are ignored). Most | |||
wikis use "/w" as the script path (meaning the API is located at | |||
"{base_url}{script_path}/api.php" -> "http://{lang}.{project}.org/w/api.php"), | |||
so this is the default. If your wiki is different, provide the script_path | |||
as an argument. The only other argument to Site() that we can't get from | |||
config files or by querying the wiki itself is SQL connection info, so | |||
provide a dict of kwargs as `sql` and Site will be pass it to | |||
oursql.connect(**sql), allowing you to make queries with site.sql_query(). | |||
Returns True if the site was added successfully or False if the site is | |||
already in our sitesdb (this can be done purposefully to update old site | |||
info). Raises SiteNotFoundError if not enough information has been provided | |||
to identify the site (e.g. a project but not a lang). | |||
""" | |||
if not config.is_loaded(): | |||
_load_config() | |||
if not base_url: | |||
if not project or not lang: | |||
e = "Without a base_url, both a project and a lang must be given." | |||
raise SiteNotFoundError(e) | |||
base_url = "http://{0}.{1}.org".format(lang, project) | |||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||
cookiejar = _get_cookiejar() | |||
user_agent = config.wiki.get("userAgent") | |||
assert_edit = config.wiki.get("assert") | |||
maxlag = config.wiki.get("maxlag") | |||
search_config = config.wiki.get("search") | |||
# Create a temp Site object to log in and load the other attributes: | |||
site = Site(base_url=base_url, script_path=script_path, sql=sql, | |||
login=login, cookiejar=cookiejar, user_agent=user_agent, | |||
assert_edit=assert_edit, maxlag=maxlag, | |||
search_config=search_config) | |||
_add_site_to_sitesdb(site) | |||
return site | |||
def remove_site(name=None, project=None, lang=None): | |||
"""Remove a site from the sitesdb. | |||
Returns True if the site was removed successfully or False if the site was | |||
not in our sitesdb originally. If all three args (name, project, and lang) | |||
are given, we'll first try 'name' and then try the latter two if 'name' | |||
wasn't found in the database. Raises TypeError if a project was given but | |||
not a language, or vice versa. Will create an empty sitesdb if none was | |||
found. | |||
""" | |||
if not config.is_loaded(): | |||
_load_config() | |||
# Someone specified a project without a lang (or a lang without a project)! | |||
if (project and not lang) or (not project and lang): | |||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||
raise TypeError(e) | |||
if name: | |||
was_removed = _remove_site_from_sitesdb(name) | |||
if not was_removed: | |||
if project and lang: | |||
name = _get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return _remove_site_from_sitesdb(name) | |||
return was_removed | |||
if project and lang: | |||
name = _get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return _remove_site_from_sitesdb(name) | |||
return False |
@@ -71,8 +71,8 @@ class Site(object): | |||
def __init__(self, name=None, project=None, lang=None, base_url=None, | |||
article_path=None, script_path=None, sql=None, | |||
namespaces=None, login=(None, None), cookiejar=None, | |||
user_agent=None, assert_edit=None, maxlag=None, | |||
search_config=(None, None)): | |||
user_agent=None, use_https=False, assert_edit=None, | |||
maxlag=None, search_config=(None, None)): | |||
"""Constructor for new Site instances. | |||
This probably isn't necessary to call yourself unless you're building a | |||
@@ -100,7 +100,8 @@ class Site(object): | |||
self._script_path = script_path | |||
self._namespaces = namespaces | |||
# Attributes used for API queries: | |||
# Attributes used for API queries: | |||
self._use_https = use_https | |||
self._assert_edit = assert_edit | |||
self._maxlag = maxlag | |||
self._max_retries = 5 | |||
@@ -138,10 +139,10 @@ class Site(object): | |||
res = ", ".join(( | |||
"Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", | |||
"base_url={_base_url!r}", "article_path={_article_path!r}", | |||
"script_path={_script_path!r}", "assert_edit={_assert_edit!r}", | |||
"maxlag={_maxlag!r}", "sql={_sql!r}", "login={0}", | |||
"user_agent={2!r}", "cookiejar={1})" | |||
)) | |||
"script_path={_script_path!r}", "use_https={_use_https!r}", | |||
"assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}", | |||
"sql={_sql_data!r}", "login={0}", "user_agent={2!r}", | |||
"cookiejar={1})")) | |||
name, password = self._login_info | |||
login = "({0}, {1})".format(repr(name), "hidden" if password else None) | |||
cookies = self._cookiejar.__class__.__name__ | |||
@@ -163,7 +164,9 @@ class Site(object): | |||
This will first attempt to construct an API url from self._base_url and | |||
self._script_path. We need both of these, or else we'll raise | |||
SiteAPIError. | |||
SiteAPIError. If self._base_url is protocol-relative (introduced in | |||
MediaWiki 1.18), we'll choose HTTPS if self._user_https is True, | |||
otherwise HTTP. | |||
We'll encode the given params, adding format=json along the way, as | |||
well as &assert= and &maxlag= based on self._assert_edit and _maxlag. | |||
@@ -185,7 +188,13 @@ class Site(object): | |||
e = "Tried to do an API query, but no API URL is known." | |||
raise SiteAPIError(e) | |||
url = ''.join((self._base_url, self._script_path, "/api.php")) | |||
base_url = self._base_url | |||
if base_url.startswith("//"): # Protocol-relative URLs from 1.18 | |||
if self._use_https: | |||
base_url = "https:" + base_url | |||
else: | |||
base_url = "http:" + base_url | |||
url = ''.join((base_url, self._script_path, "/api.php")) | |||
params["format"] = "json" # This is the only format we understand | |||
if self._assert_edit: # If requested, ensure that we're logged in | |||
@@ -194,7 +203,6 @@ class Site(object): | |||
params["maxlag"] = self._maxlag | |||
data = urlencode(params) | |||
logger.debug("{0} -> {1}".format(url, data)) | |||
try: | |||
@@ -0,0 +1,392 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 by Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from cookielib import LWPCookieJar, LoadError | |||
import errno | |||
from getpass import getpass | |||
from os import chmod, path | |||
from platform import python_version | |||
import stat | |||
import sqlite3 as sqlite | |||
from earwigbot import __version__ | |||
from earwigbot.config import config | |||
from earwigbot.wiki.exceptions import SiteNotFoundError | |||
from earwigbot.wiki.site import Site | |||
__all__ = ["SitesDBManager", "get_site", "add_site", "remove_site"] | |||
class SitesDBManager(object): | |||
""" | |||
EarwigBot's Wiki Toolset: Sites Database Manager | |||
This class controls the sites.db file, which stores information about all | |||
wiki sites known to the bot. Three public methods act as bridges between | |||
the bot's config files and Site objects: | |||
get_site -- returns a Site object corresponding to a given site name | |||
add_site -- stores a site in the database, given connection info | |||
remove_site -- removes a site from the database, given its name | |||
There's usually no need to use this class directly. All public methods | |||
here are available as earwigbot.wiki.get_site(), earwigbot.wiki.add_site(), | |||
and earwigbot.wiki.remove_site(), which use a sites.db file located in the | |||
same directory as our config.yml file. Lower-level access can be achieved | |||
by importing the manager class | |||
(`from earwigbot.wiki.sitesdb import SitesDBManager`). | |||
""" | |||
def __init__(self, db_file): | |||
"""Set up the manager with an attribute for the sitesdb filename.""" | |||
self._cookiejar = None | |||
self._sitesdb = db_file | |||
def _load_config(self): | |||
"""Load the bot's config. | |||
Called by a config-requiring function, such as get_site(), when config | |||
has not been loaded. This will usually happen only if we're running | |||
code directly from Python's interpreter and not the bot itself, because | |||
bot.py and earwigbot.runner will already call these functions. | |||
""" | |||
is_encrypted = config.load() | |||
if is_encrypted: # Passwords in the config file are encrypted | |||
key = getpass("Enter key to unencrypt bot passwords: ") | |||
config._decryption_key = key | |||
config.decrypt(config.wiki, "password") | |||
def _get_cookiejar(self): | |||
"""Return a LWPCookieJar object loaded from our .cookies file. | |||
The same .cookies file is returned every time, located in the project | |||
root, same directory as config.yml and bot.py. If it doesn't exist, we | |||
will create the file and set it to be readable and writeable only by | |||
us. If it exists but the information inside is bogus, we'll ignore it. | |||
This is normally called by _make_site_object() (in turn called by | |||
get_site()), and the cookiejar is passed to our Site's constructor, | |||
used when it makes API queries. This way, we can easily preserve | |||
cookies between sites (e.g., for CentralAuth), making logins easier. | |||
""" | |||
if self._cookiejar: | |||
return self._cookiejar | |||
cookie_file = path.join(config.root_dir, ".cookies") | |||
self._cookiejar = LWPCookieJar(cookie_file) | |||
try: | |||
self._cookiejar.load() | |||
except LoadError: | |||
pass # File contains bad data, so ignore it completely | |||
except IOError as e: | |||
if e.errno == errno.ENOENT: # "No such file or directory" | |||
# Create the file and restrict reading/writing only to the | |||
# owner, so others can't peak at our cookies: | |||
open(cookie_file, "w").close() | |||
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) | |||
else: | |||
raise | |||
return self._cookiejar | |||
def _create_sitesdb(self): | |||
"""Initialize the sitesdb file with its three necessary tables.""" | |||
script = """ | |||
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, | |||
site_article_path, site_script_path); | |||
CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value); | |||
CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name); | |||
""" | |||
with sqlite.connect(self._sitesdb) as conn: | |||
conn.executescript(script) | |||
def _load_site_from_sitesdb(self, name): | |||
"""Return all information stored in the sitesdb relating to given site. | |||
The information will be returned as a tuple, containing the site's | |||
name, project, language, base URL, article path, script path, SQL | |||
connection data, and namespaces, in that order. If the site is not | |||
found in the database, SiteNotFoundError will be raised. An empty | |||
database will be created before the exception is raised if none exists. | |||
""" | |||
query1 = "SELECT * FROM sites WHERE site_name = ?" | |||
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" | |||
query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?" | |||
error = "Site '{0}' not found in the sitesdb.".format(name) | |||
with sqlite.connect(self._sitesdb) as conn: | |||
try: | |||
site_data = conn.execute(query1, (name,)).fetchone() | |||
except sqlite.OperationalError: | |||
self._create_sitesdb() | |||
raise SiteNotFoundError(error) | |||
if not site_data: | |||
raise SiteNotFoundError(error) | |||
sql_data = conn.execute(query2, (name,)).fetchall() | |||
ns_data = conn.execute(query3, (name,)).fetchall() | |||
name, project, lang, base_url, article_path, script_path = site_data | |||
sql = dict(sql_data) | |||
namespaces = {} | |||
for ns_id, ns_name, ns_is_primary_name in ns_data: | |||
try: | |||
if ns_is_primary_name: # "Primary" name goes first in list | |||
namespaces[ns_id].insert(0, ns_name) | |||
else: # Ordering of the aliases doesn't matter | |||
namespaces[ns_id].append(ns_name) | |||
except KeyError: | |||
namespaces[ns_id] = [ns_name] | |||
return (name, project, lang, base_url, article_path, script_path, sql, | |||
namespaces) | |||
def _make_site_object(self, name): | |||
"""Return a Site object associated with the site 'name' in our sitesdb. | |||
This calls _load_site_from_sitesdb(), so SiteNotFoundError will be | |||
raised if the site is not in our sitesdb. | |||
""" | |||
(name, project, lang, base_url, article_path, script_path, sql, | |||
namespaces) = self._load_site_from_sitesdb(name) | |||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||
cookiejar = self._get_cookiejar() | |||
user_agent = config.wiki.get("userAgent") | |||
use_https = config.wiki.get("useHTTPS", False) | |||
assert_edit = config.wiki.get("assert") | |||
maxlag = config.wiki.get("maxlag") | |||
search_config = config.wiki.get("search") | |||
if user_agent: | |||
user_agent = user_agent.replace("$1", __version__) | |||
user_agent = user_agent.replace("$2", python_version()) | |||
return Site(name=name, project=project, lang=lang, base_url=base_url, | |||
article_path=article_path, script_path=script_path, | |||
sql=sql, namespaces=namespaces, login=login, | |||
cookiejar=cookiejar, user_agent=user_agent, | |||
use_https=use_https, assert_edit=assert_edit, | |||
maxlag=maxlag, search_config=search_config) | |||
def _get_site_name_from_sitesdb(self, project, lang): | |||
"""Return the name of the first site with the given project and lang. | |||
If the site is not found, return None. An empty sitesdb will be created | |||
if none exists. | |||
""" | |||
query = "SELECT site_name FROM site WHERE site_project = ? and site_lang = ?" | |||
with sqlite.connect(self._sitesdb) as conn: | |||
try: | |||
return conn.execute(query, (project, lang)).fetchone() | |||
except sqlite.OperationalError: | |||
self._create_sitesdb() | |||
def _add_site_to_sitesdb(self, site): | |||
"""Extract relevant info from a Site object and add it to the sitesdb. | |||
Works like a reverse _load_site_from_sitesdb(); the site's project, | |||
language, base URL, article path, script path, SQL connection data, and | |||
namespaces are extracted from the site and inserted into the sites | |||
database. If the sitesdb doesn't exist, we'll create it first. | |||
""" | |||
name = site.name() | |||
sites_data = (name, site.project(), site.lang(), site._base_url, | |||
site._article_path, site._script_path) | |||
sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()] | |||
ns_data = [] | |||
for ns_id, ns_names in site._namespaces.iteritems(): | |||
ns_data.append((name, ns_id, ns_names.pop(0), True)) | |||
for ns_name in ns_names: | |||
ns_data.append((name, ns_id, ns_name, False)) | |||
with sqlite.connect(self._sitesdb) as conn: | |||
check_exists = "SELECT 1 FROM sites WHERE site_name = ?" | |||
try: | |||
exists = conn.execute(check_exists, (name,)).fetchone() | |||
except sqlite.OperationalError: | |||
self._create_sitesdb() | |||
else: | |||
if exists: | |||
conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||
conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data) | |||
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) | |||
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) | |||
def _remove_site_from_sitesdb(self, name): | |||
"""Remove a site by name from the sitesdb.""" | |||
with sqlite.connect(self._sitesdb) as conn: | |||
cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||
if cursor.rowcount == 0: | |||
return False | |||
else: | |||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||
return True | |||
def get_site(self, name=None, project=None, lang=None): | |||
"""Return a Site instance based on information from the sitesdb. | |||
With no arguments, return the default site as specified by our config | |||
file. This is config.wiki["defaultSite"]. | |||
With 'name' specified, return the site with that name. This is | |||
equivalent to the site's 'wikiid' in the API, like 'enwiki'. | |||
With 'project' and 'lang' specified, return the site whose project and | |||
language match these values. If there are multiple sites with the same | |||
values (unlikely), this is not a reliable way of loading a site. Call | |||
the function with an explicit 'name' in that case. | |||
We will attempt to login to the site automatically using | |||
config.wiki["username"] and config.wiki["password"] if both are | |||
defined. | |||
Specifying a project without a lang or a lang without a project will | |||
raise TypeError. If all three args are specified, 'name' will be first | |||
tried, then 'project' and 'lang' if 'name' doesn't work. If a site | |||
cannot be found in the sitesdb, SiteNotFoundError will be raised. An | |||
empty sitesdb will be created if none is found. | |||
""" | |||
if not config.is_loaded(): | |||
self._load_config() | |||
# Someone specified a project without a lang, or vice versa: | |||
if (project and not lang) or (not project and lang): | |||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||
raise TypeError(e) | |||
# No args given, so return our default site: | |||
if not name and not project and not lang: | |||
try: | |||
default = config.wiki["defaultSite"] | |||
except KeyError: | |||
e = "Default site is not specified in config." | |||
raise SiteNotFoundError(e) | |||
return self._make_site_object(default) | |||
# Name arg given, but don't look at others unless `name` isn't found: | |||
if name: | |||
try: | |||
return self._make_site_object(name) | |||
except SiteNotFoundError: | |||
if project and lang: | |||
name = self._get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return self._make_site_object(name) | |||
raise | |||
# If we end up here, then project and lang are the only args given: | |||
name = self._get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return self._make_site_object(name) | |||
e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) | |||
raise SiteNotFoundError(e) | |||
def add_site(self, project=None, lang=None, base_url=None, | |||
script_path="/w", sql=None): | |||
"""Add a site to the sitesdb so it can be retrieved with get_site(). | |||
If only a project and a lang are given, we'll guess the base_url as | |||
"//{lang}.{project}.org" (which is protocol-relative, becoming 'https' | |||
if 'useHTTPS' is True in config otherwise 'http'). If this is wrong, | |||
provide the correct base_url as an argument (in which case project and | |||
lang are ignored). Most wikis use "/w" as the script path (meaning the | |||
API is located at "{base_url}{script_path}/api.php" -> | |||
"//{lang}.{project}.org/w/api.php"), so this is the default. If your | |||
wiki is different, provide the script_path as an argument. The only | |||
other argument to Site() that we can't get from config files or by | |||
querying the wiki itself is SQL connection info, so provide a dict of | |||
kwargs as `sql` and Site will pass it to oursql.connect(**sql), | |||
allowing you to make queries with site.sql_query(). | |||
Returns True if the site was added successfully or False if the site is | |||
already in our sitesdb (this can be done purposefully to update old | |||
site info). Raises SiteNotFoundError if not enough information has | |||
been provided to identify the site (e.g. a project but not a lang). | |||
""" | |||
if not config.is_loaded(): | |||
self._load_config() | |||
if not base_url: | |||
if not project or not lang: | |||
e = "Without a base_url, both a project and a lang must be given." | |||
raise SiteNotFoundError(e) | |||
base_url = "//{0}.{1}.org".format(lang, project) | |||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||
cookiejar = self._get_cookiejar() | |||
user_agent = config.wiki.get("userAgent") | |||
use_https = config.wiki.get("useHTTPS", False) | |||
assert_edit = config.wiki.get("assert") | |||
maxlag = config.wiki.get("maxlag") | |||
search_config = config.wiki.get("search") | |||
# Create a temp Site object to log in and load the other attributes: | |||
site = Site(base_url=base_url, script_path=script_path, sql=sql, | |||
login=login, cookiejar=cookiejar, user_agent=user_agent, | |||
use_https=use_https, assert_edit=assert_edit, | |||
maxlag=maxlag, search_config=search_config) | |||
self._add_site_to_sitesdb(site) | |||
return site | |||
def remove_site(self, name=None, project=None, lang=None): | |||
"""Remove a site from the sitesdb. | |||
Returns True if the site was removed successfully or False if the site | |||
was not in our sitesdb originally. If all three args (name, project, | |||
and lang) are given, we'll first try 'name' and then try the latter two | |||
if 'name' wasn't found in the database. Raises TypeError if a project | |||
was given but not a language, or vice versa. Will create an empty | |||
sitesdb if none was found. | |||
""" | |||
if not config.is_loaded(): | |||
self._load_config() | |||
# Someone specified a project without a lang, or vice versa: | |||
if (project and not lang) or (not project and lang): | |||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||
raise TypeError(e) | |||
if name: | |||
was_removed = self._remove_site_from_sitesdb(name) | |||
if not was_removed: | |||
if project and lang: | |||
name = self._get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return self._remove_site_from_sitesdb(name) | |||
return was_removed | |||
if project and lang: | |||
name = self._get_site_name_from_sitesdb(project, lang) | |||
if name: | |||
return self._remove_site_from_sitesdb(name) | |||
return False | |||
_root = path.split(path.split(path.dirname(path.abspath(__file__)))[0])[0] | |||
_dbfile = path.join(_root, "sites.db") | |||
_manager = SitesDBManager(_dbfile) | |||
del _root, _dbfile | |||
get_site = _manager.get_site | |||
add_site = _manager.add_site | |||
remove_site = _manager.remove_site |