@@ -1,19 +1,11 @@ | |||||
# Ignore python bytecode: | |||||
*.pyc | |||||
# Ignore bot-specific config file: | |||||
config.yml | |||||
# Ignore logs directory: | |||||
# Ignore bot-specific files: | |||||
logs/ | logs/ | ||||
# Ignore cookies file: | |||||
config.yml | |||||
sites.db | |||||
.cookies | .cookies | ||||
# Ignore OS X's crud: | |||||
.DS_Store | |||||
# Ignore python bytecode: | |||||
*.pyc | |||||
# Ignore pydev's nonsense: | |||||
.project | |||||
.pydevproject | |||||
.settings/ | |||||
# Ignore OS X's stuff: | |||||
.DS_Store |
@@ -176,7 +176,7 @@ class _BotConfig(object): | |||||
return self._root_dir | return self._root_dir | ||||
@property | @property | ||||
def config_path(self): | |||||
def path(self): | |||||
return self._config_path | return self._config_path | ||||
@property | @property | ||||
@@ -89,7 +89,7 @@ class Watcher(IRCConnection): | |||||
return | return | ||||
module = imp.new_module("_rc_event_processing_rules") | module = imp.new_module("_rc_event_processing_rules") | ||||
try: | try: | ||||
exec compile(rules, config.config_path, "exec") in module.__dict__ | |||||
exec compile(rules, config.path, "exec") in module.__dict__ | |||||
except Exception: | except Exception: | ||||
e = "Could not compile config file's RC event rules" | e = "Could not compile config file's RC event rules" | ||||
self.logger.exception(e) | self.logger.exception(e) | ||||
@@ -36,9 +36,9 @@ logger.addHandler(_log.NullHandler()) | |||||
from earwigbot.wiki.constants import * | from earwigbot.wiki.constants import * | ||||
from earwigbot.wiki.exceptions import * | from earwigbot.wiki.exceptions import * | ||||
from earwigbot.wiki.functions import * | |||||
from earwigbot.wiki.category import Category | from earwigbot.wiki.category import Category | ||||
from earwigbot.wiki.page import Page | from earwigbot.wiki.page import Page | ||||
from earwigbot.wiki.site import Site | from earwigbot.wiki.site import Site | ||||
from earwigbot.wiki.sitesdb import get_site, add_site, remove_site | |||||
from earwigbot.wiki.user import User | from earwigbot.wiki.user import User |
@@ -1,211 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 by Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
""" | |||||
EarwigBot's Wiki Toolset: Misc Functions | |||||
This module, a component of the wiki package, contains miscellaneous functions | |||||
that are not methods of any class, like get_site(). | |||||
There's no need to import this module explicitly. All functions here are | |||||
automatically available from earwigbot.wiki. | |||||
""" | |||||
from cookielib import LWPCookieJar, LoadError | |||||
import errno | |||||
from getpass import getpass | |||||
from os import chmod, path | |||||
import platform | |||||
import stat | |||||
import earwigbot | |||||
from earwigbot.config import config | |||||
from earwigbot.wiki.exceptions import SiteNotFoundError | |||||
from earwigbot.wiki.site import Site | |||||
__all__ = ["get_site", "add_site", "del_site"] | |||||
_cookiejar = None | |||||
def _load_config(): | |||||
"""Called by a config-requiring function, such as get_site(), when config | |||||
has not been loaded. This will usually happen only if we're running code | |||||
directly from Python's interpreter and not the bot itself, because | |||||
earwigbot.py or core/main.py will already call these functions. | |||||
""" | |||||
is_encrypted = config.load() | |||||
if is_encrypted: # Passwords in the config file are encrypted | |||||
key = getpass("Enter key to unencrypt bot passwords: ") | |||||
config._decryption_key = key | |||||
config.decrypt(config.wiki, "password") | |||||
def _get_cookiejar(): | |||||
"""Returns a LWPCookieJar object loaded from our .cookies file. The same | |||||
one is returned every time. | |||||
The .cookies file is located in the project root, same directory as | |||||
config.yml and bot.py. If it doesn't exist, we will create the file and set | |||||
it to be readable and writeable only by us. If it exists but the | |||||
information inside is bogus, we will ignore it. | |||||
This is normally called by _get_site_object_from_dict() (in turn called by | |||||
get_site()), and the cookiejar is passed to our Site's constructor, used | |||||
when it makes API queries. This way, we can easily preserve cookies between | |||||
sites (e.g., for CentralAuth), making logins easier. | |||||
""" | |||||
global _cookiejar | |||||
if _cookiejar is not None: | |||||
return _cookiejar | |||||
cookie_file = path.join(config.root_dir, ".cookies") | |||||
_cookiejar = LWPCookieJar(cookie_file) | |||||
try: | |||||
_cookiejar.load() | |||||
except LoadError: | |||||
pass # File contains bad data, so ignore it completely | |||||
except IOError as e: | |||||
if e.errno == errno.ENOENT: # "No such file or directory" | |||||
# Create the file and restrict reading/writing only to the owner, | |||||
# so others can't peak at our cookies: | |||||
open(cookie_file, "w").close() | |||||
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) | |||||
else: | |||||
raise | |||||
return _cookiejar | |||||
def _get_site_object_from_dict(name, d): | |||||
"""Return a Site object based on the contents of a dict, probably acquired | |||||
through our config file, and a separate name. | |||||
""" | |||||
project = d.get("project") | |||||
lang = d.get("lang") | |||||
base_url = d.get("baseURL") | |||||
article_path = d.get("articlePath") | |||||
script_path = d.get("scriptPath") | |||||
sql = d.get("sql", {}) | |||||
namespaces = d.get("namespaces", {}) | |||||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||||
cookiejar = _get_cookiejar() | |||||
user_agent = config.wiki.get("userAgent") | |||||
assert_edit = config.wiki.get("assert") | |||||
maxlag = config.wiki.get("maxlag") | |||||
search_config = config.wiki.get("search") | |||||
if user_agent: | |||||
user_agent = user_agent.replace("$1", earwigbot.__version__) | |||||
user_agent = user_agent.replace("$2", platform.python_version()) | |||||
return Site(name=name, project=project, lang=lang, base_url=base_url, | |||||
article_path=article_path, script_path=script_path, sql=sql, | |||||
namespaces=namespaces, login=login, cookiejar=cookiejar, | |||||
user_agent=user_agent, assert_edit=assert_edit, maxlag=maxlag, | |||||
search_config=search_config) | |||||
def get_site(name=None, project=None, lang=None): | |||||
"""Returns a Site instance based on information from our config file. | |||||
With no arguments, returns the default site as specified by our config | |||||
file. This is default = config.wiki["defaultSite"]; | |||||
config.wiki["sites"][default]. | |||||
With `name` specified, returns the site specified by | |||||
config.wiki["sites"][name]. | |||||
With `project` and `lang` specified, returns the site specified by the | |||||
member of config.wiki["sites"], `s`, for which s["project"] == project and | |||||
s["lang"] == lang. | |||||
We will attempt to login to the site automatically | |||||
using config.wiki["username"] and config.wiki["password"] if both are | |||||
defined. | |||||
Specifying a project without a lang or a lang without a project will raise | |||||
TypeError. If all three args are specified, `name` will be first tried, | |||||
then `project` and `lang`. If, with any number of args, a site cannot be | |||||
found in the config, SiteNotFoundError is raised. | |||||
""" | |||||
# Check if config has been loaded, and load it if it hasn't: | |||||
if not config.is_loaded(): | |||||
_load_config() | |||||
# Someone specified a project without a lang (or a lang without a project)! | |||||
if (project is None and lang is not None) or (project is not None and | |||||
lang is None): | |||||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||||
raise TypeError(e) | |||||
# No args given, so return our default site (project is None implies lang | |||||
# is None, so we don't need to add that in): | |||||
if name is None and project is None: | |||||
try: | |||||
default = config.wiki["defaultSite"] | |||||
except KeyError: | |||||
e = "Default site is not specified in config." | |||||
raise SiteNotFoundError(e) | |||||
try: | |||||
site = config.wiki["sites"][default] | |||||
except KeyError: | |||||
e = "Default site specified by config is not in the config's sites list." | |||||
raise SiteNotFoundError(e) | |||||
return _get_site_object_from_dict(default, site) | |||||
# Name arg given, but don't look at others unless `name` isn't found: | |||||
if name is not None: | |||||
try: | |||||
site = config.wiki["sites"][name] | |||||
except KeyError: | |||||
if project is None: # Implies lang is None, so only name was given | |||||
e = "Site '{0}' not found in config.".format(name) | |||||
raise SiteNotFoundError(e) | |||||
for sitename, site in config.wiki["sites"].items(): | |||||
if site["project"] == project and site["lang"] == lang: | |||||
return _get_site_object_from_dict(sitename, site) | |||||
e = "Neither site '{0}' nor site '{1}:{2}' found in config." | |||||
e.format(name, project, lang) | |||||
raise SiteNotFoundError(e) | |||||
else: | |||||
return _get_site_object_from_dict(name, site) | |||||
# If we end up here, then project and lang are both not None: | |||||
for sitename, site in config.wiki["sites"].items(): | |||||
if site["project"] == project and site["lang"] == lang: | |||||
return _get_site_object_from_dict(sitename, site) | |||||
e = "Site '{0}:{1}' not found in config.".format(project, lang) | |||||
raise SiteNotFoundError(e) | |||||
def add_site(): | |||||
"""STUB: config editing is required first. | |||||
Returns True if the site was added successfully or False if the site was | |||||
already in our config. Raises ConfigError if saving the updated file failed | |||||
for some reason.""" | |||||
pass | |||||
def del_site(name): | |||||
"""STUB: config editing is required first. | |||||
Returns True if the site was removed successfully or False if the site was | |||||
not in our config originally. Raises ConfigError if saving the updated file | |||||
failed for some reason.""" | |||||
pass |
@@ -174,7 +174,7 @@ class Page(CopyrightMixin): | |||||
Assuming the API is sound, this should not raise any exceptions. | Assuming the API is sound, this should not raise any exceptions. | ||||
""" | """ | ||||
if result is None: | |||||
if not result: | |||||
params = {"action": "query", "rvprop": "user", "intoken": "edit", | params = {"action": "query", "rvprop": "user", "intoken": "edit", | ||||
"prop": "info|revisions", "rvlimit": 1, "rvdir": "newer", | "prop": "info|revisions", "rvlimit": 1, "rvdir": "newer", | ||||
"titles": self._title, "inprop": "protection|url"} | "titles": self._title, "inprop": "protection|url"} | ||||
@@ -240,7 +240,7 @@ class Page(CopyrightMixin): | |||||
Don't call this directly, ever - use .get(force=True) if you want to | Don't call this directly, ever - use .get(force=True) if you want to | ||||
force content reloading. | force content reloading. | ||||
""" | """ | ||||
if result is None: | |||||
if not result: | |||||
params = {"action": "query", "prop": "revisions", "rvlimit": 1, | params = {"action": "query", "prop": "revisions", "rvlimit": 1, | ||||
"rvprop": "content|timestamp", "titles": self._title} | "rvprop": "content|timestamp", "titles": self._title} | ||||
result = self._site._api_query(params) | result = self._site._api_query(params) | ||||
@@ -471,7 +471,7 @@ class Page(CopyrightMixin): | |||||
""" | """ | ||||
if force: | if force: | ||||
self._load_wrapper() | self._load_wrapper() | ||||
if self._fullurl is not None: | |||||
if self._fullurl: | |||||
return self._fullurl | return self._fullurl | ||||
else: | else: | ||||
slug = quote(self._title.replace(" ", "_"), safe="/:") | slug = quote(self._title.replace(" ", "_"), safe="/:") | ||||
@@ -71,18 +71,19 @@ class Site(object): | |||||
def __init__(self, name=None, project=None, lang=None, base_url=None, | def __init__(self, name=None, project=None, lang=None, base_url=None, | ||||
article_path=None, script_path=None, sql=None, | article_path=None, script_path=None, sql=None, | ||||
namespaces=None, login=(None, None), cookiejar=None, | namespaces=None, login=(None, None), cookiejar=None, | ||||
user_agent=None, assert_edit=None, maxlag=None, | |||||
search_config=(None, None)): | |||||
user_agent=None, use_https=False, assert_edit=None, | |||||
maxlag=None, search_config=(None, None)): | |||||
"""Constructor for new Site instances. | """Constructor for new Site instances. | ||||
This probably isn't necessary to call yourself unless you're building a | This probably isn't necessary to call yourself unless you're building a | ||||
Site that's not in your config and you don't want to add it - normally | Site that's not in your config and you don't want to add it - normally | ||||
all you need is tools.get_site(name), which creates the Site for you | all you need is tools.get_site(name), which creates the Site for you | ||||
based on your config file. We accept a bunch of kwargs, but the only | |||||
ones you really "need" are `base_url` and `script_path` - this is | |||||
enough to figure out an API url. `login`, a tuple of | |||||
(username, password), is highly recommended. `cookiejar` will be used | |||||
to store cookies, and we'll use a normal CookieJar if none is given. | |||||
based on your config file and the sites database. We accept a bunch of | |||||
kwargs, but the only ones you really "need" are `base_url` and | |||||
`script_path` - this is enough to figure out an API url. `login`, a | |||||
tuple of (username, password), is highly recommended. `cookiejar` will | |||||
be used to store cookies, and we'll use a normal CookieJar if none is | |||||
given. | |||||
First, we'll store the given arguments as attributes, then set up our | First, we'll store the given arguments as attributes, then set up our | ||||
URL opener. We'll load any of the attributes that weren't given from | URL opener. We'll load any of the attributes that weren't given from | ||||
@@ -99,7 +100,8 @@ class Site(object): | |||||
self._script_path = script_path | self._script_path = script_path | ||||
self._namespaces = namespaces | self._namespaces = namespaces | ||||
# Attributes used for API queries: | |||||
# Attributes used for API queries: | |||||
self._use_https = use_https | |||||
self._assert_edit = assert_edit | self._assert_edit = assert_edit | ||||
self._maxlag = maxlag | self._maxlag = maxlag | ||||
self._max_retries = 5 | self._max_retries = 5 | ||||
@@ -112,11 +114,11 @@ class Site(object): | |||||
self._search_config = search_config | self._search_config = search_config | ||||
# Set up cookiejar and URL opener for making API queries: | # Set up cookiejar and URL opener for making API queries: | ||||
if cookiejar is not None: | |||||
if cookiejar: | |||||
self._cookiejar = cookiejar | self._cookiejar = cookiejar | ||||
else: | else: | ||||
self._cookiejar = CookieJar() | self._cookiejar = CookieJar() | ||||
if user_agent is None: | |||||
if not user_agent: | |||||
user_agent = USER_AGENT # Set default UA from wiki.constants | user_agent = USER_AGENT # Set default UA from wiki.constants | ||||
self._opener = build_opener(HTTPCookieProcessor(self._cookiejar)) | self._opener = build_opener(HTTPCookieProcessor(self._cookiejar)) | ||||
self._opener.addheaders = [("User-Agent", user_agent), | self._opener.addheaders = [("User-Agent", user_agent), | ||||
@@ -127,9 +129,9 @@ class Site(object): | |||||
# If we have a name/pass and the API says we're not logged in, log in: | # If we have a name/pass and the API says we're not logged in, log in: | ||||
self._login_info = name, password = login | self._login_info = name, password = login | ||||
if name is not None and password is not None: | |||||
if name and password: | |||||
logged_in_as = self._get_username_from_cookies() | logged_in_as = self._get_username_from_cookies() | ||||
if logged_in_as is None or name != logged_in_as: | |||||
if not logged_in_as or name != logged_in_as: | |||||
self._login(login) | self._login(login) | ||||
def __repr__(self): | def __repr__(self): | ||||
@@ -137,10 +139,10 @@ class Site(object): | |||||
res = ", ".join(( | res = ", ".join(( | ||||
"Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", | "Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", | ||||
"base_url={_base_url!r}", "article_path={_article_path!r}", | "base_url={_base_url!r}", "article_path={_article_path!r}", | ||||
"script_path={_script_path!r}", "assert_edit={_assert_edit!r}", | |||||
"maxlag={_maxlag!r}", "sql={_sql!r}", "login={0}", | |||||
"user_agent={2!r}", "cookiejar={1})" | |||||
)) | |||||
"script_path={_script_path!r}", "use_https={_use_https!r}", | |||||
"assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}", | |||||
"sql={_sql_data!r}", "login={0}", "user_agent={2!r}", | |||||
"cookiejar={1})")) | |||||
name, password = self._login_info | name, password = self._login_info | ||||
login = "({0}, {1})".format(repr(name), "hidden" if password else None) | login = "({0}, {1})".format(repr(name), "hidden" if password else None) | ||||
cookies = self._cookiejar.__class__.__name__ | cookies = self._cookiejar.__class__.__name__ | ||||
@@ -162,7 +164,9 @@ class Site(object): | |||||
This will first attempt to construct an API url from self._base_url and | This will first attempt to construct an API url from self._base_url and | ||||
self._script_path. We need both of these, or else we'll raise | self._script_path. We need both of these, or else we'll raise | ||||
SiteAPIError. | |||||
SiteAPIError. If self._base_url is protocol-relative (introduced in | |||||
MediaWiki 1.18), we'll choose HTTPS if self._user_https is True, | |||||
otherwise HTTP. | |||||
We'll encode the given params, adding format=json along the way, as | We'll encode the given params, adding format=json along the way, as | ||||
well as &assert= and &maxlag= based on self._assert_edit and _maxlag. | well as &assert= and &maxlag= based on self._assert_edit and _maxlag. | ||||
@@ -180,11 +184,17 @@ class Site(object): | |||||
There's helpful MediaWiki API documentation at | There's helpful MediaWiki API documentation at | ||||
<http://www.mediawiki.org/wiki/API>. | <http://www.mediawiki.org/wiki/API>. | ||||
""" | """ | ||||
if self._base_url is None or self._script_path is None: | |||||
if not self._base_url or self._script_path is None: | |||||
e = "Tried to do an API query, but no API URL is known." | e = "Tried to do an API query, but no API URL is known." | ||||
raise SiteAPIError(e) | raise SiteAPIError(e) | ||||
url = ''.join((self._base_url, self._script_path, "/api.php")) | |||||
base_url = self._base_url | |||||
if base_url.startswith("//"): # Protocol-relative URLs from 1.18 | |||||
if self._use_https: | |||||
base_url = "https:" + base_url | |||||
else: | |||||
base_url = "http:" + base_url | |||||
url = ''.join((base_url, self._script_path, "/api.php")) | |||||
params["format"] = "json" # This is the only format we understand | params["format"] = "json" # This is the only format we understand | ||||
if self._assert_edit: # If requested, ensure that we're logged in | if self._assert_edit: # If requested, ensure that we're logged in | ||||
@@ -193,7 +203,6 @@ class Site(object): | |||||
params["maxlag"] = self._maxlag | params["maxlag"] = self._maxlag | ||||
data = urlencode(params) | data = urlencode(params) | ||||
logger.debug("{0} -> {1}".format(url, data)) | logger.debug("{0} -> {1}".format(url, data)) | ||||
try: | try: | ||||
@@ -332,15 +341,15 @@ class Site(object): | |||||
name = ''.join((self._name, "Token")) | name = ''.join((self._name, "Token")) | ||||
cookie = self._get_cookie(name, domain) | cookie = self._get_cookie(name, domain) | ||||
if cookie is not None: | |||||
if cookie: | |||||
name = ''.join((self._name, "UserName")) | name = ''.join((self._name, "UserName")) | ||||
user_name = self._get_cookie(name, domain) | user_name = self._get_cookie(name, domain) | ||||
if user_name is not None: | |||||
if user_name: | |||||
return user_name.value | return user_name.value | ||||
name = "centralauth_Token" | name = "centralauth_Token" | ||||
for cookie in self._cookiejar: | for cookie in self._cookiejar: | ||||
if cookie.domain_initial_dot is False or cookie.is_expired(): | |||||
if not cookie.domain_initial_dot or cookie.is_expired(): | |||||
continue | continue | ||||
if cookie.name != name: | if cookie.name != name: | ||||
continue | continue | ||||
@@ -348,7 +357,7 @@ class Site(object): | |||||
search = ''.join(("(.*?)", re_escape(cookie.domain))) | search = ''.join(("(.*?)", re_escape(cookie.domain))) | ||||
if re_match(search, domain): # Test it against our site | if re_match(search, domain): # Test it against our site | ||||
user_name = self._get_cookie("centralauth_User", cookie.domain) | user_name = self._get_cookie("centralauth_User", cookie.domain) | ||||
if user_name is not None: | |||||
if user_name: | |||||
return user_name.value | return user_name.value | ||||
def _get_username_from_api(self): | def _get_username_from_api(self): | ||||
@@ -378,7 +387,7 @@ class Site(object): | |||||
single API query for our username (or IP address) and return that. | single API query for our username (or IP address) and return that. | ||||
""" | """ | ||||
name = self._get_username_from_cookies() | name = self._get_username_from_cookies() | ||||
if name is not None: | |||||
if name: | |||||
return name | return name | ||||
return self._get_username_from_api() | return self._get_username_from_api() | ||||
@@ -417,7 +426,7 @@ class Site(object): | |||||
""" | """ | ||||
name, password = login | name, password = login | ||||
params = {"action": "login", "lgname": name, "lgpassword": password} | params = {"action": "login", "lgname": name, "lgpassword": password} | ||||
if token is not None: | |||||
if token: | |||||
params["lgtoken"] = token | params["lgtoken"] = token | ||||
result = self._api_query(params) | result = self._api_query(params) | ||||
res = result["login"]["result"] | res = result["login"]["result"] | ||||
@@ -455,10 +464,9 @@ class Site(object): | |||||
def _sql_connect(self, **kwargs): | def _sql_connect(self, **kwargs): | ||||
"""Attempt to establish a connection with this site's SQL database. | """Attempt to establish a connection with this site's SQL database. | ||||
oursql.connect() will be called with self._sql_data as its kwargs, | |||||
which is usually config.wiki["sites"][self.name()]["sql"]. Any kwargs | |||||
given to this function will be passed to connect() and will have | |||||
precedence over the config file. | |||||
oursql.connect() will be called with self._sql_data as its kwargs. | |||||
Any kwargs given to this function will be passed to connect() and will | |||||
have precedence over the config file. | |||||
Will raise SQLError() if the module "oursql" is not available. oursql | Will raise SQLError() if the module "oursql" is not available. oursql | ||||
may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot | may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot | ||||
@@ -631,6 +639,6 @@ class Site(object): | |||||
If `username` is left as None, then a User object representing the | If `username` is left as None, then a User object representing the | ||||
currently logged-in (or anonymous!) user is returned. | currently logged-in (or anonymous!) user is returned. | ||||
""" | """ | ||||
if username is None: | |||||
if not username: | |||||
username = self._get_username() | username = self._get_username() | ||||
return User(self, username) | return User(self, username) |
@@ -0,0 +1,392 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 by Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from cookielib import LWPCookieJar, LoadError | |||||
import errno | |||||
from getpass import getpass | |||||
from os import chmod, path | |||||
from platform import python_version | |||||
import stat | |||||
import sqlite3 as sqlite | |||||
from earwigbot import __version__ | |||||
from earwigbot.config import config | |||||
from earwigbot.wiki.exceptions import SiteNotFoundError | |||||
from earwigbot.wiki.site import Site | |||||
__all__ = ["SitesDBManager", "get_site", "add_site", "remove_site"] | |||||
class SitesDBManager(object): | |||||
""" | |||||
EarwigBot's Wiki Toolset: Sites Database Manager | |||||
This class controls the sites.db file, which stores information about all | |||||
wiki sites known to the bot. Three public methods act as bridges between | |||||
the bot's config files and Site objects: | |||||
get_site -- returns a Site object corresponding to a given site name | |||||
add_site -- stores a site in the database, given connection info | |||||
remove_site -- removes a site from the database, given its name | |||||
There's usually no need to use this class directly. All public methods | |||||
here are available as earwigbot.wiki.get_site(), earwigbot.wiki.add_site(), | |||||
and earwigbot.wiki.remove_site(), which use a sites.db file located in the | |||||
same directory as our config.yml file. Lower-level access can be achieved | |||||
by importing the manager class | |||||
(`from earwigbot.wiki.sitesdb import SitesDBManager`). | |||||
""" | |||||
def __init__(self, db_file): | |||||
"""Set up the manager with an attribute for the sitesdb filename.""" | |||||
self._cookiejar = None | |||||
self._sitesdb = db_file | |||||
def _load_config(self): | |||||
"""Load the bot's config. | |||||
Called by a config-requiring function, such as get_site(), when config | |||||
has not been loaded. This will usually happen only if we're running | |||||
code directly from Python's interpreter and not the bot itself, because | |||||
bot.py and earwigbot.runner will already call these functions. | |||||
""" | |||||
is_encrypted = config.load() | |||||
if is_encrypted: # Passwords in the config file are encrypted | |||||
key = getpass("Enter key to unencrypt bot passwords: ") | |||||
config._decryption_key = key | |||||
config.decrypt(config.wiki, "password") | |||||
def _get_cookiejar(self): | |||||
"""Return a LWPCookieJar object loaded from our .cookies file. | |||||
The same .cookies file is returned every time, located in the project | |||||
root, same directory as config.yml and bot.py. If it doesn't exist, we | |||||
will create the file and set it to be readable and writeable only by | |||||
us. If it exists but the information inside is bogus, we'll ignore it. | |||||
This is normally called by _make_site_object() (in turn called by | |||||
get_site()), and the cookiejar is passed to our Site's constructor, | |||||
used when it makes API queries. This way, we can easily preserve | |||||
cookies between sites (e.g., for CentralAuth), making logins easier. | |||||
""" | |||||
if self._cookiejar: | |||||
return self._cookiejar | |||||
cookie_file = path.join(config.root_dir, ".cookies") | |||||
self._cookiejar = LWPCookieJar(cookie_file) | |||||
try: | |||||
self._cookiejar.load() | |||||
except LoadError: | |||||
pass # File contains bad data, so ignore it completely | |||||
except IOError as e: | |||||
if e.errno == errno.ENOENT: # "No such file or directory" | |||||
# Create the file and restrict reading/writing only to the | |||||
# owner, so others can't peak at our cookies: | |||||
open(cookie_file, "w").close() | |||||
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) | |||||
else: | |||||
raise | |||||
return self._cookiejar | |||||
def _create_sitesdb(self): | |||||
"""Initialize the sitesdb file with its three necessary tables.""" | |||||
script = """ | |||||
CREATE TABLE sites (site_name, site_project, site_lang, site_base_url, | |||||
site_article_path, site_script_path); | |||||
CREATE TABLE sql_data (sql_site, sql_data_key, sql_data_value); | |||||
CREATE TABLE namespaces (ns_site, ns_id, ns_name, ns_is_primary_name); | |||||
""" | |||||
with sqlite.connect(self._sitesdb) as conn: | |||||
conn.executescript(script) | |||||
def _load_site_from_sitesdb(self, name): | |||||
"""Return all information stored in the sitesdb relating to given site. | |||||
The information will be returned as a tuple, containing the site's | |||||
name, project, language, base URL, article path, script path, SQL | |||||
connection data, and namespaces, in that order. If the site is not | |||||
found in the database, SiteNotFoundError will be raised. An empty | |||||
database will be created before the exception is raised if none exists. | |||||
""" | |||||
query1 = "SELECT * FROM sites WHERE site_name = ?" | |||||
query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" | |||||
query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?" | |||||
error = "Site '{0}' not found in the sitesdb.".format(name) | |||||
with sqlite.connect(self._sitesdb) as conn: | |||||
try: | |||||
site_data = conn.execute(query1, (name,)).fetchone() | |||||
except sqlite.OperationalError: | |||||
self._create_sitesdb() | |||||
raise SiteNotFoundError(error) | |||||
if not site_data: | |||||
raise SiteNotFoundError(error) | |||||
sql_data = conn.execute(query2, (name,)).fetchall() | |||||
ns_data = conn.execute(query3, (name,)).fetchall() | |||||
name, project, lang, base_url, article_path, script_path = site_data | |||||
sql = dict(sql_data) | |||||
namespaces = {} | |||||
for ns_id, ns_name, ns_is_primary_name in ns_data: | |||||
try: | |||||
if ns_is_primary_name: # "Primary" name goes first in list | |||||
namespaces[ns_id].insert(0, ns_name) | |||||
else: # Ordering of the aliases doesn't matter | |||||
namespaces[ns_id].append(ns_name) | |||||
except KeyError: | |||||
namespaces[ns_id] = [ns_name] | |||||
return (name, project, lang, base_url, article_path, script_path, sql, | |||||
namespaces) | |||||
def _make_site_object(self, name): | |||||
"""Return a Site object associated with the site 'name' in our sitesdb. | |||||
This calls _load_site_from_sitesdb(), so SiteNotFoundError will be | |||||
raised if the site is not in our sitesdb. | |||||
""" | |||||
(name, project, lang, base_url, article_path, script_path, sql, | |||||
namespaces) = self._load_site_from_sitesdb(name) | |||||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||||
cookiejar = self._get_cookiejar() | |||||
user_agent = config.wiki.get("userAgent") | |||||
use_https = config.wiki.get("useHTTPS", False) | |||||
assert_edit = config.wiki.get("assert") | |||||
maxlag = config.wiki.get("maxlag") | |||||
search_config = config.wiki.get("search") | |||||
if user_agent: | |||||
user_agent = user_agent.replace("$1", __version__) | |||||
user_agent = user_agent.replace("$2", python_version()) | |||||
return Site(name=name, project=project, lang=lang, base_url=base_url, | |||||
article_path=article_path, script_path=script_path, | |||||
sql=sql, namespaces=namespaces, login=login, | |||||
cookiejar=cookiejar, user_agent=user_agent, | |||||
use_https=use_https, assert_edit=assert_edit, | |||||
maxlag=maxlag, search_config=search_config) | |||||
def _get_site_name_from_sitesdb(self, project, lang): | |||||
"""Return the name of the first site with the given project and lang. | |||||
If the site is not found, return None. An empty sitesdb will be created | |||||
if none exists. | |||||
""" | |||||
query = "SELECT site_name FROM site WHERE site_project = ? and site_lang = ?" | |||||
with sqlite.connect(self._sitesdb) as conn: | |||||
try: | |||||
return conn.execute(query, (project, lang)).fetchone() | |||||
except sqlite.OperationalError: | |||||
self._create_sitesdb() | |||||
def _add_site_to_sitesdb(self, site): | |||||
"""Extract relevant info from a Site object and add it to the sitesdb. | |||||
Works like a reverse _load_site_from_sitesdb(); the site's project, | |||||
language, base URL, article path, script path, SQL connection data, and | |||||
namespaces are extracted from the site and inserted into the sites | |||||
database. If the sitesdb doesn't exist, we'll create it first. | |||||
""" | |||||
name = site.name() | |||||
sites_data = (name, site.project(), site.lang(), site._base_url, | |||||
site._article_path, site._script_path) | |||||
sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()] | |||||
ns_data = [] | |||||
for ns_id, ns_names in site._namespaces.iteritems(): | |||||
ns_data.append((name, ns_id, ns_names.pop(0), True)) | |||||
for ns_name in ns_names: | |||||
ns_data.append((name, ns_id, ns_name, False)) | |||||
with sqlite.connect(self._sitesdb) as conn: | |||||
check_exists = "SELECT 1 FROM sites WHERE site_name = ?" | |||||
try: | |||||
exists = conn.execute(check_exists, (name,)).fetchone() | |||||
except sqlite.OperationalError: | |||||
self._create_sitesdb() | |||||
else: | |||||
if exists: | |||||
conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||||
conn.execute("INSERT INTO sites VALUES (?, ?, ?, ?, ?, ?)", sites_data) | |||||
conn.executemany("INSERT INTO sql_data VALUES (?, ?, ?)", sql_data) | |||||
conn.executemany("INSERT INTO namespaces VALUES (?, ?, ?, ?)", ns_data) | |||||
def _remove_site_from_sitesdb(self, name): | |||||
"""Remove a site by name from the sitesdb.""" | |||||
with sqlite.connect(self._sitesdb) as conn: | |||||
cursor = conn.execute("DELETE FROM sites WHERE site_name = ?", (name,)) | |||||
if cursor.rowcount == 0: | |||||
return False | |||||
else: | |||||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||||
return True | |||||
def get_site(self, name=None, project=None, lang=None): | |||||
"""Return a Site instance based on information from the sitesdb. | |||||
With no arguments, return the default site as specified by our config | |||||
file. This is config.wiki["defaultSite"]. | |||||
With 'name' specified, return the site with that name. This is | |||||
equivalent to the site's 'wikiid' in the API, like 'enwiki'. | |||||
With 'project' and 'lang' specified, return the site whose project and | |||||
language match these values. If there are multiple sites with the same | |||||
values (unlikely), this is not a reliable way of loading a site. Call | |||||
the function with an explicit 'name' in that case. | |||||
We will attempt to login to the site automatically using | |||||
config.wiki["username"] and config.wiki["password"] if both are | |||||
defined. | |||||
Specifying a project without a lang or a lang without a project will | |||||
raise TypeError. If all three args are specified, 'name' will be first | |||||
tried, then 'project' and 'lang' if 'name' doesn't work. If a site | |||||
cannot be found in the sitesdb, SiteNotFoundError will be raised. An | |||||
empty sitesdb will be created if none is found. | |||||
""" | |||||
if not config.is_loaded(): | |||||
self._load_config() | |||||
# Someone specified a project without a lang, or vice versa: | |||||
if (project and not lang) or (not project and lang): | |||||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||||
raise TypeError(e) | |||||
# No args given, so return our default site: | |||||
if not name and not project and not lang: | |||||
try: | |||||
default = config.wiki["defaultSite"] | |||||
except KeyError: | |||||
e = "Default site is not specified in config." | |||||
raise SiteNotFoundError(e) | |||||
return self._make_site_object(default) | |||||
# Name arg given, but don't look at others unless `name` isn't found: | |||||
if name: | |||||
try: | |||||
return self._make_site_object(name) | |||||
except SiteNotFoundError: | |||||
if project and lang: | |||||
name = self._get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return self._make_site_object(name) | |||||
raise | |||||
# If we end up here, then project and lang are the only args given: | |||||
name = self._get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return self._make_site_object(name) | |||||
e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) | |||||
raise SiteNotFoundError(e) | |||||
def add_site(self, project=None, lang=None, base_url=None, | |||||
script_path="/w", sql=None): | |||||
"""Add a site to the sitesdb so it can be retrieved with get_site(). | |||||
If only a project and a lang are given, we'll guess the base_url as | |||||
"//{lang}.{project}.org" (which is protocol-relative, becoming 'https' | |||||
if 'useHTTPS' is True in config otherwise 'http'). If this is wrong, | |||||
provide the correct base_url as an argument (in which case project and | |||||
lang are ignored). Most wikis use "/w" as the script path (meaning the | |||||
API is located at "{base_url}{script_path}/api.php" -> | |||||
"//{lang}.{project}.org/w/api.php"), so this is the default. If your | |||||
wiki is different, provide the script_path as an argument. The only | |||||
other argument to Site() that we can't get from config files or by | |||||
querying the wiki itself is SQL connection info, so provide a dict of | |||||
kwargs as `sql` and Site will pass it to oursql.connect(**sql), | |||||
allowing you to make queries with site.sql_query(). | |||||
Returns True if the site was added successfully or False if the site is | |||||
already in our sitesdb (this can be done purposefully to update old | |||||
site info). Raises SiteNotFoundError if not enough information has | |||||
been provided to identify the site (e.g. a project but not a lang). | |||||
""" | |||||
if not config.is_loaded(): | |||||
self._load_config() | |||||
if not base_url: | |||||
if not project or not lang: | |||||
e = "Without a base_url, both a project and a lang must be given." | |||||
raise SiteNotFoundError(e) | |||||
base_url = "//{0}.{1}.org".format(lang, project) | |||||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||||
cookiejar = self._get_cookiejar() | |||||
user_agent = config.wiki.get("userAgent") | |||||
use_https = config.wiki.get("useHTTPS", False) | |||||
assert_edit = config.wiki.get("assert") | |||||
maxlag = config.wiki.get("maxlag") | |||||
search_config = config.wiki.get("search") | |||||
# Create a temp Site object to log in and load the other attributes: | |||||
site = Site(base_url=base_url, script_path=script_path, sql=sql, | |||||
login=login, cookiejar=cookiejar, user_agent=user_agent, | |||||
use_https=use_https, assert_edit=assert_edit, | |||||
maxlag=maxlag, search_config=search_config) | |||||
self._add_site_to_sitesdb(site) | |||||
return site | |||||
def remove_site(self, name=None, project=None, lang=None): | |||||
"""Remove a site from the sitesdb. | |||||
Returns True if the site was removed successfully or False if the site | |||||
was not in our sitesdb originally. If all three args (name, project, | |||||
and lang) are given, we'll first try 'name' and then try the latter two | |||||
if 'name' wasn't found in the database. Raises TypeError if a project | |||||
was given but not a language, or vice versa. Will create an empty | |||||
sitesdb if none was found. | |||||
""" | |||||
if not config.is_loaded(): | |||||
self._load_config() | |||||
# Someone specified a project without a lang, or vice versa: | |||||
if (project and not lang) or (not project and lang): | |||||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||||
raise TypeError(e) | |||||
if name: | |||||
was_removed = self._remove_site_from_sitesdb(name) | |||||
if not was_removed: | |||||
if project and lang: | |||||
name = self._get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return self._remove_site_from_sitesdb(name) | |||||
return was_removed | |||||
if project and lang: | |||||
name = self._get_site_name_from_sitesdb(project, lang) | |||||
if name: | |||||
return self._remove_site_from_sitesdb(name) | |||||
return False | |||||
_root = path.split(path.split(path.dirname(path.abspath(__file__)))[0])[0] | |||||
_dbfile = path.join(_root, "sites.db") | |||||
_manager = SitesDBManager(_dbfile) | |||||
del _root, _dbfile | |||||
get_site = _manager.get_site | |||||
add_site = _manager.add_site | |||||
remove_site = _manager.remove_site |