diff --git a/.gitignore b/.gitignore index bc67eea..1884197 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,9 @@ # Ignore bot-specific config file: config.json +# Ignore cookies file: +.cookies + # Ignore OS X's crud: *.DS_Store diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py index 0620262..d28cac2 100644 --- a/wiki/tools/exceptions.py +++ b/wiki/tools/exceptions.py @@ -15,12 +15,18 @@ class SiteNotFoundError(WikiToolsetError): class SiteAPIError(WikiToolsetError): """We couldn't connect to a site's API, perhaps because the server doesn't - exist, our URL is wrong, or they're having temporary problems.""" + exist, our URL is wrong or incomplete, or they're having temporary + problems.""" class LoginError(WikiToolsetError): """An error occured while trying to login. Perhaps the username/password is incorrect.""" +class PermissionsError(WikiToolsetError): + """We tried to do something we don't have permission to, like a non-admin + trying to delete a page, or trying to edit a page when no login information + was provided.""" + class NamespaceNotFoundError(WikiToolsetError): """A requested namespace name or namespace ID does not exist.""" diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py index 2618a57..ff69c19 100644 --- a/wiki/tools/functions.py +++ b/wiki/tools/functions.py @@ -10,7 +10,11 @@ There's no need to import this module explicitly. All functions here are automatically available from wiki.tools. """ +from cookielib import LWPCookieJar, LoadError +import errno from getpass import getpass +from os import chmod, path +import stat from core import config from wiki.tools.exceptions import SiteNotFoundError @@ -18,6 +22,8 @@ from wiki.tools.site import Site __all__ = ["get_site"] +_cookiejar = None + def _load_config(): """Called by a config-requiring function, such as get_site(), when config has not been loaded. This will usually happen only if we're running code @@ -31,50 +37,60 @@ def _load_config(): else: config.parse_config(None) +def _get_cookiejar(): + """Returns a LWPCookieJar object loaded from our .cookies file. The same + one is returned every time. + + The .cookies file is located in the project root, same directory as + config.json and earwigbot.py. If it doesn't exist, we will create the file + and set it to be readable and writeable only by us. If it exists but the + information inside is bogus, we will ignore it. + + This is normally called by _get_site_object_from_dict() (in turn called by + get_site()), and the cookiejar is passed to our Site's constructor, used + when it makes API queries. This way, we can easily preserve cookies between + sites (e.g., for CentralAuth), making logins easier. + """ + global _cookiejar + if _cookiejar is not None: + return _cookiejar + + cookie_file = path.join(config.root_dir, ".cookies") + _cookiejar = LWPCookieJar(cookie_file) + + try: + _cookiejar.load() + except LoadError: + # file contains bad data, so ignore it completely + pass + except IOError as e: + if e.errno == errno.ENOENT: # "No such file or directory" + # create the file and restrict reading/writing only to the owner, + # so others can't peak at our cookies + open(cookie_file, "w").close() + chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) + else: + raise + + return _cookiejar + def _get_site_object_from_dict(name, d): """Return a Site object based on the contents of a dict, probably acquired through our config file, and a separate name. """ - try: - project = d["project"] - except KeyError: - project = None - try: - lang = d["lang"] - except KeyError: - lang = None - try: - base_url = d["baseURL"] - except KeyError: - base_url = None - try: - article_path = d["articlePath"] - except KeyError: - article_path = None - try: - script_path = d["scriptPath"] - except KeyError: - script_path = None - try: - sql_server = d["sqlServer"] - except KeyError: - sql_server = None - try: - sql_db = d["sqlDB"] - except KeyError: - sql_db = None - try: - namespaces = d["namespaces"] - except KeyError: - namespaces = None - try: - login = (config.wiki["username"], config.wiki["password"]) - except KeyError: - login = (None, None) + project = d.get("project") + lang = d.get("lang") + base_url = d.get("baseURL") + article_path = d.get("articlePath") + script_path = d.get("scriptPath") + sql = (d.get("sqlServer"), d.get("sqlDB")) + namespaces = d.get("namespaces") + login = (config.wiki.get("username"), config.wiki.get("password")) + cookiejar = _get_cookiejar() return Site(name=name, project=project, lang=lang, base_url=base_url, - article_path=article_path, script_path=script_path, - sql=(sql_server, sql_db), namespaces=namespaces, login=login) + article_path=article_path, script_path=script_path, sql=sql, + namespaces=namespaces, login=login, cookiejar=cookiejar) def get_site(name=None, project=None, lang=None): """Returns a Site instance based on information from our config file. @@ -112,7 +128,7 @@ def get_site(name=None, project=None, lang=None): # no args given, so return our default site (project is None implies lang # is None, so we don't need to add that in) if name is None and project is None: - try: # ...so use the default site + try: default = config.wiki["defaultSite"] except KeyError: e = "Default site is not specified in config." diff --git a/wiki/tools/site.py b/wiki/tools/site.py index f32e3c2..982cd30 100644 --- a/wiki/tools/site.py +++ b/wiki/tools/site.py @@ -2,6 +2,7 @@ from cookielib import CookieJar from json import loads +from re import escape as re_escape, match as re_match from urllib import unquote_plus, urlencode from urllib2 import build_opener, HTTPCookieProcessor, URLError from urlparse import urlparse @@ -19,12 +20,12 @@ class Site(object): def __init__(self, name=None, project=None, lang=None, base_url=None, article_path=None, script_path=None, sql=(None, None), - namespaces=None, login=(None, None)): + namespaces=None, login=(None, None), cookiejar=None): """ Docstring needed """ # attributes referring to site information, filled in by an API query - # if they are missing (and an API url is available) + # if they are missing (and an API url can be determined) self._name = name self._project = project self._lang = lang @@ -35,61 +36,22 @@ class Site(object): self._namespaces = namespaces # set up cookiejar and URL opener for making API queries - self._cookiejar = CookieJar() + if cookiejar is not None: + self._cookiejar = cookiejar + else: + self._cookiejar = CookieJar() self._opener = build_opener(HTTPCookieProcessor(self._cookiejar)) self._opener.addheaders = [('User-agent', USER_AGENT)] - # use a username and password to login if they were provided - if login[0] is not None and login[1] is not None: - self._login(login[0], login[1]) - # get all of the above attributes that were not specified as arguments self._load_attributes() - def _login(self, name, password, token="", attempt=0): - """ - Docstring needed - """ - params = {"action": "login", "lgname": name, "lgpassword": password, - "lgtoken": token} - result = self.api_query(params) - res = result["login"]["result"] - - if res == "Success": - return - elif res == "NeedToken" and attempt == 0: - token = result["login"]["token"] - return self._login(name, password, token, attempt=1) - else: - if res == "Illegal": - e = "The provided username is illegal." - elif res == "NotExists": - e = "The provided username does not exist." - elif res == "EmptyPass": - e = "No password was given." - elif res == "WrongPass" or res == "WrongPluginPass": - e = "The given password is incorrect." - else: - e = "Couldn't login; server says '{0}'.".format(res) - raise LoginError(e) - - def _get_logged_in_user(self): - """ - Docstring needed - """ - # first try to get username from the cookie jar to avoid an - # unnecessary API query - cookie_name = ''.join((self._name, "UserName")) - cookie_domain = urlparse(self._base_url).netloc - for cookie in self._cookiejar: - if cookie.name == cookie_name and cookie.domain == cookie_domain: - return unquote_plus(cookie.value) - - # if we end up here, we're probably an anon and thus an API query - # will be required to get our username - params = {"action": "query", "meta": "userinfo"} - result = self.api_query(params) - return result["query"]["userinfo"]["name"] + # if we have a name/pass and the API says we're not logged in, log in + self._login_info = name, password = login + if name is not None and password is not None: + logged_in_as = self._get_username_from_cookies() + if logged_in_as is None or name != logged_in_as: + self._login(login) def _load_attributes(self, force=False): """ @@ -101,7 +63,7 @@ class Site(object): self._article_path, self._script_path] params = {"action": "query", "meta": "siteinfo"} - + if self._namespaces is None or force: params["siprop"] = "general|namespaces|namespacealiases" result = self.api_query(params) @@ -113,24 +75,12 @@ class Site(object): result = self.api_query(params) res = result["query"]["general"] - - if self._name is None or force: - self._name = res["wikiid"] - - if self._project is None or force: - self._project = res["sitename"].lower() - - if self._lang is None or force: - self._lang = res["lang"] - - if self._base_url is None or force: - self._base_url = res["server"] - - if self._article_path is None or force: - self._article_path = res["articlepath"] - - if self._script_path is None or force: - self._script_path = res["scriptpath"] + self._name = res["wikiid"] + self._project = res["sitename"].lower() + self._lang = res["lang"] + self._base_url = res["server"] + self._article_path = res["articlepath"] + self._script_path = res["scriptpath"] def _load_namespaces(self, result): """ @@ -156,20 +106,162 @@ class Site(object): alias = namespace["*"] self._namespaces[ns_id].append(alias) + def _get_cookie(self, name, domain): + """Return the cookie `name` in `domain`, unless it is expired. Return + None if no cookie was found. + """ + for cookie in self._cookiejar: + if cookie.name == name and cookie.domain == domain: + if cookie.is_expired(): + break + return cookie + return None + + def _get_username_from_cookies(self): + """Try to return our username based solely on cookies. + + First, we'll look for a cookie named self._name + "Token", like + "enwikiToken". If it exists and isn't expired, we'll assume it's valid + and try to return the value of the cookie self._name + "UserName" (like + "enwikiUserName"). This should work fine on wikis without single-user + login. + + If `enwikiToken` doesn't exist, we'll try to find a cookie named + `centralauth_Token`. If this exists and is not expired, we'll try to + return the value of `centralauth_User`. + + If we didn't get any matches, we'll return None. Our goal here isn't to + return the most likely username, or what we *want* our username to be + (for that, we'd do self._login_info[0]), but rather to get our current + username without an unnecessary ?action=query&meta=userinfo API query. + """ + domain = self.domain() + name = ''.join((self._name, "Token")) + cookie = self._get_cookie(name, domain) + + if cookie is not None: + name = ''.join((self._name, "UserName")) + user_name = self._get_cookie(name, domain) + if user_name is not None: + return user_name.value + + name = "centralauth_Token" + for cookie in self._cookiejar: + if cookie.domain_initial_dot is False or cookie.is_expired(): + continue + if cookie.name != name: + continue + # build a regex that will match domains this cookie affects + search = ''.join(("(.*?)", re_escape(cookie.domain))) + if re_match(search, domain): # test it against our site + user_name = self._get_cookie("centralauth_User", cookie.domain) + if user_name is not None: + return user_name.value + + return None + + def _get_username_from_api(self): + """Do a simple API query to get our username and return it. + + This is a reliable way to make sure we are actually logged in, because + it doesn't deal with annoying cookie logic, but it results in an API + query that is unnecessary in many cases. + + Called by _get_username() (in turn called by get_user() with no + username argument) when cookie lookup fails, probably indicating that + we are logged out. + """ + params = {"action": "query", "meta": "userinfo"} + result = self.api_query(params) + return result["query"]["userinfo"]["name"] + + def _get_username(self): + """Return the name of the current user, whether logged in or not. + + First, we'll try to deduce it solely from cookies, to avoid an + unnecessary API query. For the cookie-detection method, see + _get_username_from_cookies()'s docs. + + If our username isn't in cookies, then we're probably not logged in, or + something fishy is going on (like forced logout). In this case, do a + single API query for our username (or IP address) and return that. + """ + name = self._get_username_from_cookies() + if name is not None: + return name + return self._get_username_from_api() + + def _save_cookiejar(self): + """Try to save our cookiejar after doing a (normal) login or logout. + + Calls the standard .save() method with no filename. Don't fret if our + cookiejar doesn't support saving (CookieJar raises AttributeError, + FileCookieJar raises NotImplementedError) or no default filename was + given (LWPCookieJar and MozillaCookieJar raise ValueError). + """ + try: + self._cookiejar.save() + except (AttributeError, NotImplementedError, ValueError): + pass + + def _login(self, login, token=None, attempt=0): + """ + Docstring needed + """ + name, password = login + params = {"action": "login", "lgname": name, "lgpassword": password} + if token is not None: + params["lgtoken"] = token + result = self.api_query(params) + res = result["login"]["result"] + + if res == "Success": + self._save_cookiejar() + elif res == "NeedToken" and attempt == 0: + token = result["login"]["token"] + return self._login(login, token, attempt=1) + else: + if res == "Illegal": + e = "The provided username is illegal." + elif res == "NotExists": + e = "The provided username does not exist." + elif res == "EmptyPass": + e = "No password was given." + elif res == "WrongPass" or res == "WrongPluginPass": + e = "The given password is incorrect." + else: + e = "Couldn't login; server says '{0}'.".format(res) + raise LoginError(e) + + def _logout(self): + """ + Docstring needed + """ + params = {"action": "logout"} + self.api_query(params) + self._cookiejar.clear() + self._save_cookiejar() + def api_query(self, params): """ Docstring needed """ + if self._base_url is None or self._script_path is None: + e = "Tried to do an API query, but no API URL is known." + raise SiteAPIError(e) + url = ''.join((self._base_url, self._script_path, "/api.php")) params["format"] = "json" # this is the only format we understand data = urlencode(params) + print url, data # debug code + try: response = self._opener.open(url, data) except URLError as error: if hasattr(error, "reason"): - e = "API query at {0} failed because {1}.".format(error.geturl, - error.reason) + e = "API query at {0} failed because {1}." + e = e.format(error.geturl, error.reason) elif hasattr(error, "code"): e = "API query at {0} failed; got an error code of {1}." e = e.format(error.geturl, error.code) @@ -198,29 +290,11 @@ class Site(object): """ return self._lang - def base_url(self): - """ - Docstring needed - """ - return self._base_url - - def article_path(self): - """ - Docstring needed - """ - return self._article_path - - def script_path(self): - """ - Docstring needed - """ - return self._script_path - - def namespaces(self): + def domain(self): """ Docstring needed """ - return self._namespaces + return urlparse(self._base_url).netloc def namespace_id_to_name(self, ns_id, all=False): """ @@ -272,5 +346,5 @@ class Site(object): Docstring needed """ if username is None: - username = self._get_logged_in_user() + username = self._get_username() return User(self, username) diff --git a/wiki/tools/user.py b/wiki/tools/user.py index 98f9670..be71515 100644 --- a/wiki/tools/user.py +++ b/wiki/tools/user.py @@ -132,7 +132,7 @@ class User(object): """ return self._get_attribute("_registration", force) - def is_emailable(self, force=False): + def emailable(self, force=False): """ Docstring needed """ @@ -144,7 +144,7 @@ class User(object): """ return self._get_attribute("_gender", force) - def userpage(self): + def get_userpage(self): """ Docstring needed """ @@ -152,7 +152,7 @@ class User(object): pagename = ''.join((prefix, ":", self._name)) return Page(self.site, pagename) - def talkpage(self): + def get_talkpage(self): """ Docstring needed """