瀏覽代碼

Major improvements to cookies, login/logout, and crosswiki support.

* Exceptions: New PermissionsError; reworded docstring of SiteAPIError.
* Site: __init__() accepts an optional cookiejar parameter, otherwise we
  use CookieJar(). Added five new cookie/username-related methods. Only
  login from __init__() if we are missing valid login cookies and a user/
  pass was provided. _login() and _logout() both try to save cookies via
  _save_cookiejar(). _load_attributes() automatically refreshes all
  attributes other than namespaces if at least one is missing, instead of
  only the missing ones. api_query() raises SiteAPIError if either
  self._base_url or self._script_path is missing. Removed some pointless
  methods and renamed one; added domain().
* Functions: _get_site_object_from_dict() is cleaner, adds our cookiejar
  to Site instances using _get_cookiejar() to load a LWPCookieJar() object
  from the ".cookies" file in our project root. The same cookiejar is
  returned for every site, enabling crosswiki login, via a global variable.
* User: Renamed some methods.
* .gitignore: Added .cookies file.
tags/v0.1^2
Ben Kurtovic 12 年之前
父節點
當前提交
612c9c8ff6
共有 5 個檔案被更改,包括 235 行新增136 行删除
  1. +3
    -0
      .gitignore
  2. +7
    -1
      wiki/tools/exceptions.py
  3. +55
    -39
      wiki/tools/functions.py
  4. +167
    -93
      wiki/tools/site.py
  5. +3
    -3
      wiki/tools/user.py

+ 3
- 0
.gitignore 查看文件

@@ -4,6 +4,9 @@
# Ignore bot-specific config file:
config.json

# Ignore cookies file:
.cookies

# Ignore OS X's crud:
*.DS_Store



+ 7
- 1
wiki/tools/exceptions.py 查看文件

@@ -15,12 +15,18 @@ class SiteNotFoundError(WikiToolsetError):

class SiteAPIError(WikiToolsetError):
"""We couldn't connect to a site's API, perhaps because the server doesn't
exist, our URL is wrong, or they're having temporary problems."""
exist, our URL is wrong or incomplete, or they're having temporary
problems."""

class LoginError(WikiToolsetError):
"""An error occured while trying to login. Perhaps the username/password is
incorrect."""

class PermissionsError(WikiToolsetError):
"""We tried to do something we don't have permission to, like a non-admin
trying to delete a page, or trying to edit a page when no login information
was provided."""

class NamespaceNotFoundError(WikiToolsetError):
"""A requested namespace name or namespace ID does not exist."""



+ 55
- 39
wiki/tools/functions.py 查看文件

@@ -10,7 +10,11 @@ There's no need to import this module explicitly. All functions here are
automatically available from wiki.tools.
"""

from cookielib import LWPCookieJar, LoadError
import errno
from getpass import getpass
from os import chmod, path
import stat

from core import config
from wiki.tools.exceptions import SiteNotFoundError
@@ -18,6 +22,8 @@ from wiki.tools.site import Site

__all__ = ["get_site"]

_cookiejar = None

def _load_config():
"""Called by a config-requiring function, such as get_site(), when config
has not been loaded. This will usually happen only if we're running code
@@ -31,50 +37,60 @@ def _load_config():
else:
config.parse_config(None)

def _get_cookiejar():
"""Returns a LWPCookieJar object loaded from our .cookies file. The same
one is returned every time.

The .cookies file is located in the project root, same directory as
config.json and earwigbot.py. If it doesn't exist, we will create the file
and set it to be readable and writeable only by us. If it exists but the
information inside is bogus, we will ignore it.

This is normally called by _get_site_object_from_dict() (in turn called by
get_site()), and the cookiejar is passed to our Site's constructor, used
when it makes API queries. This way, we can easily preserve cookies between
sites (e.g., for CentralAuth), making logins easier.
"""
global _cookiejar
if _cookiejar is not None:
return _cookiejar

cookie_file = path.join(config.root_dir, ".cookies")
_cookiejar = LWPCookieJar(cookie_file)

try:
_cookiejar.load()
except LoadError:
# file contains bad data, so ignore it completely
pass
except IOError as e:
if e.errno == errno.ENOENT: # "No such file or directory"
# create the file and restrict reading/writing only to the owner,
# so others can't peak at our cookies
open(cookie_file, "w").close()
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR)
else:
raise

return _cookiejar

def _get_site_object_from_dict(name, d):
"""Return a Site object based on the contents of a dict, probably acquired
through our config file, and a separate name.
"""
try:
project = d["project"]
except KeyError:
project = None
try:
lang = d["lang"]
except KeyError:
lang = None
try:
base_url = d["baseURL"]
except KeyError:
base_url = None
try:
article_path = d["articlePath"]
except KeyError:
article_path = None
try:
script_path = d["scriptPath"]
except KeyError:
script_path = None
try:
sql_server = d["sqlServer"]
except KeyError:
sql_server = None
try:
sql_db = d["sqlDB"]
except KeyError:
sql_db = None
try:
namespaces = d["namespaces"]
except KeyError:
namespaces = None
try:
login = (config.wiki["username"], config.wiki["password"])
except KeyError:
login = (None, None)
project = d.get("project")
lang = d.get("lang")
base_url = d.get("baseURL")
article_path = d.get("articlePath")
script_path = d.get("scriptPath")
sql = (d.get("sqlServer"), d.get("sqlDB"))
namespaces = d.get("namespaces")
login = (config.wiki.get("username"), config.wiki.get("password"))
cookiejar = _get_cookiejar()

return Site(name=name, project=project, lang=lang, base_url=base_url,
article_path=article_path, script_path=script_path,
sql=(sql_server, sql_db), namespaces=namespaces, login=login)
article_path=article_path, script_path=script_path, sql=sql,
namespaces=namespaces, login=login, cookiejar=cookiejar)

def get_site(name=None, project=None, lang=None):
"""Returns a Site instance based on information from our config file.
@@ -112,7 +128,7 @@ def get_site(name=None, project=None, lang=None):
# no args given, so return our default site (project is None implies lang
# is None, so we don't need to add that in)
if name is None and project is None:
try: # ...so use the default site
try:
default = config.wiki["defaultSite"]
except KeyError:
e = "Default site is not specified in config."


+ 167
- 93
wiki/tools/site.py 查看文件

@@ -2,6 +2,7 @@

from cookielib import CookieJar
from json import loads
from re import escape as re_escape, match as re_match
from urllib import unquote_plus, urlencode
from urllib2 import build_opener, HTTPCookieProcessor, URLError
from urlparse import urlparse
@@ -19,12 +20,12 @@ class Site(object):

def __init__(self, name=None, project=None, lang=None, base_url=None,
article_path=None, script_path=None, sql=(None, None),
namespaces=None, login=(None, None)):
namespaces=None, login=(None, None), cookiejar=None):
"""
Docstring needed
"""
# attributes referring to site information, filled in by an API query
# if they are missing (and an API url is available)
# if they are missing (and an API url can be determined)
self._name = name
self._project = project
self._lang = lang
@@ -35,61 +36,22 @@ class Site(object):
self._namespaces = namespaces

# set up cookiejar and URL opener for making API queries
self._cookiejar = CookieJar()
if cookiejar is not None:
self._cookiejar = cookiejar
else:
self._cookiejar = CookieJar()
self._opener = build_opener(HTTPCookieProcessor(self._cookiejar))
self._opener.addheaders = [('User-agent', USER_AGENT)]

# use a username and password to login if they were provided
if login[0] is not None and login[1] is not None:
self._login(login[0], login[1])

# get all of the above attributes that were not specified as arguments
self._load_attributes()

def _login(self, name, password, token="", attempt=0):
"""
Docstring needed
"""
params = {"action": "login", "lgname": name, "lgpassword": password,
"lgtoken": token}
result = self.api_query(params)
res = result["login"]["result"]

if res == "Success":
return
elif res == "NeedToken" and attempt == 0:
token = result["login"]["token"]
return self._login(name, password, token, attempt=1)
else:
if res == "Illegal":
e = "The provided username is illegal."
elif res == "NotExists":
e = "The provided username does not exist."
elif res == "EmptyPass":
e = "No password was given."
elif res == "WrongPass" or res == "WrongPluginPass":
e = "The given password is incorrect."
else:
e = "Couldn't login; server says '{0}'.".format(res)
raise LoginError(e)

def _get_logged_in_user(self):
"""
Docstring needed
"""
# first try to get username from the cookie jar to avoid an
# unnecessary API query
cookie_name = ''.join((self._name, "UserName"))
cookie_domain = urlparse(self._base_url).netloc
for cookie in self._cookiejar:
if cookie.name == cookie_name and cookie.domain == cookie_domain:
return unquote_plus(cookie.value)
# if we end up here, we're probably an anon and thus an API query
# will be required to get our username
params = {"action": "query", "meta": "userinfo"}
result = self.api_query(params)
return result["query"]["userinfo"]["name"]
# if we have a name/pass and the API says we're not logged in, log in
self._login_info = name, password = login
if name is not None and password is not None:
logged_in_as = self._get_username_from_cookies()
if logged_in_as is None or name != logged_in_as:
self._login(login)

def _load_attributes(self, force=False):
"""
@@ -101,7 +63,7 @@ class Site(object):
self._article_path, self._script_path]

params = {"action": "query", "meta": "siteinfo"}
if self._namespaces is None or force:
params["siprop"] = "general|namespaces|namespacealiases"
result = self.api_query(params)
@@ -113,24 +75,12 @@ class Site(object):
result = self.api_query(params)

res = result["query"]["general"]

if self._name is None or force:
self._name = res["wikiid"]

if self._project is None or force:
self._project = res["sitename"].lower()

if self._lang is None or force:
self._lang = res["lang"]

if self._base_url is None or force:
self._base_url = res["server"]

if self._article_path is None or force:
self._article_path = res["articlepath"]

if self._script_path is None or force:
self._script_path = res["scriptpath"]
self._name = res["wikiid"]
self._project = res["sitename"].lower()
self._lang = res["lang"]
self._base_url = res["server"]
self._article_path = res["articlepath"]
self._script_path = res["scriptpath"]

def _load_namespaces(self, result):
"""
@@ -156,20 +106,162 @@ class Site(object):
alias = namespace["*"]
self._namespaces[ns_id].append(alias)

def _get_cookie(self, name, domain):
"""Return the cookie `name` in `domain`, unless it is expired. Return
None if no cookie was found.
"""
for cookie in self._cookiejar:
if cookie.name == name and cookie.domain == domain:
if cookie.is_expired():
break
return cookie
return None

def _get_username_from_cookies(self):
"""Try to return our username based solely on cookies.

First, we'll look for a cookie named self._name + "Token", like
"enwikiToken". If it exists and isn't expired, we'll assume it's valid
and try to return the value of the cookie self._name + "UserName" (like
"enwikiUserName"). This should work fine on wikis without single-user
login.

If `enwikiToken` doesn't exist, we'll try to find a cookie named
`centralauth_Token`. If this exists and is not expired, we'll try to
return the value of `centralauth_User`.

If we didn't get any matches, we'll return None. Our goal here isn't to
return the most likely username, or what we *want* our username to be
(for that, we'd do self._login_info[0]), but rather to get our current
username without an unnecessary ?action=query&meta=userinfo API query.
"""
domain = self.domain()
name = ''.join((self._name, "Token"))
cookie = self._get_cookie(name, domain)

if cookie is not None:
name = ''.join((self._name, "UserName"))
user_name = self._get_cookie(name, domain)
if user_name is not None:
return user_name.value

name = "centralauth_Token"
for cookie in self._cookiejar:
if cookie.domain_initial_dot is False or cookie.is_expired():
continue
if cookie.name != name:
continue
# build a regex that will match domains this cookie affects
search = ''.join(("(.*?)", re_escape(cookie.domain)))
if re_match(search, domain): # test it against our site
user_name = self._get_cookie("centralauth_User", cookie.domain)
if user_name is not None:
return user_name.value

return None

def _get_username_from_api(self):
"""Do a simple API query to get our username and return it.
This is a reliable way to make sure we are actually logged in, because
it doesn't deal with annoying cookie logic, but it results in an API
query that is unnecessary in many cases.
Called by _get_username() (in turn called by get_user() with no
username argument) when cookie lookup fails, probably indicating that
we are logged out.
"""
params = {"action": "query", "meta": "userinfo"}
result = self.api_query(params)
return result["query"]["userinfo"]["name"]

def _get_username(self):
"""Return the name of the current user, whether logged in or not.

First, we'll try to deduce it solely from cookies, to avoid an
unnecessary API query. For the cookie-detection method, see
_get_username_from_cookies()'s docs.

If our username isn't in cookies, then we're probably not logged in, or
something fishy is going on (like forced logout). In this case, do a
single API query for our username (or IP address) and return that.
"""
name = self._get_username_from_cookies()
if name is not None:
return name
return self._get_username_from_api()

def _save_cookiejar(self):
"""Try to save our cookiejar after doing a (normal) login or logout.

Calls the standard .save() method with no filename. Don't fret if our
cookiejar doesn't support saving (CookieJar raises AttributeError,
FileCookieJar raises NotImplementedError) or no default filename was
given (LWPCookieJar and MozillaCookieJar raise ValueError).
"""
try:
self._cookiejar.save()
except (AttributeError, NotImplementedError, ValueError):
pass

def _login(self, login, token=None, attempt=0):
"""
Docstring needed
"""
name, password = login
params = {"action": "login", "lgname": name, "lgpassword": password}
if token is not None:
params["lgtoken"] = token
result = self.api_query(params)
res = result["login"]["result"]

if res == "Success":
self._save_cookiejar()
elif res == "NeedToken" and attempt == 0:
token = result["login"]["token"]
return self._login(login, token, attempt=1)
else:
if res == "Illegal":
e = "The provided username is illegal."
elif res == "NotExists":
e = "The provided username does not exist."
elif res == "EmptyPass":
e = "No password was given."
elif res == "WrongPass" or res == "WrongPluginPass":
e = "The given password is incorrect."
else:
e = "Couldn't login; server says '{0}'.".format(res)
raise LoginError(e)

def _logout(self):
"""
Docstring needed
"""
params = {"action": "logout"}
self.api_query(params)
self._cookiejar.clear()
self._save_cookiejar()

def api_query(self, params):
"""
Docstring needed
"""
if self._base_url is None or self._script_path is None:
e = "Tried to do an API query, but no API URL is known."
raise SiteAPIError(e)

url = ''.join((self._base_url, self._script_path, "/api.php"))
params["format"] = "json" # this is the only format we understand
data = urlencode(params)

print url, data # debug code

try:
response = self._opener.open(url, data)
except URLError as error:
if hasattr(error, "reason"):
e = "API query at {0} failed because {1}.".format(error.geturl,
error.reason)
e = "API query at {0} failed because {1}."
e = e.format(error.geturl, error.reason)
elif hasattr(error, "code"):
e = "API query at {0} failed; got an error code of {1}."
e = e.format(error.geturl, error.code)
@@ -198,29 +290,11 @@ class Site(object):
"""
return self._lang

def base_url(self):
"""
Docstring needed
"""
return self._base_url

def article_path(self):
"""
Docstring needed
"""
return self._article_path

def script_path(self):
"""
Docstring needed
"""
return self._script_path

def namespaces(self):
def domain(self):
"""
Docstring needed
"""
return self._namespaces
return urlparse(self._base_url).netloc

def namespace_id_to_name(self, ns_id, all=False):
"""
@@ -272,5 +346,5 @@ class Site(object):
Docstring needed
"""
if username is None:
username = self._get_logged_in_user()
username = self._get_username()
return User(self, username)

+ 3
- 3
wiki/tools/user.py 查看文件

@@ -132,7 +132,7 @@ class User(object):
"""
return self._get_attribute("_registration", force)

def is_emailable(self, force=False):
def emailable(self, force=False):
"""
Docstring needed
"""
@@ -144,7 +144,7 @@ class User(object):
"""
return self._get_attribute("_gender", force)

def userpage(self):
def get_userpage(self):
"""
Docstring needed
"""
@@ -152,7 +152,7 @@ class User(object):
pagename = ''.join((prefix, ":", self._name))
return Page(self.site, pagename)

def talkpage(self):
def get_talkpage(self):
"""
Docstring needed
"""


Loading…
取消
儲存