@@ -116,7 +116,6 @@ class Page(CopyvioMixIn): | |||
self._creator = None | |||
# Attributes used for editing/deleting/protecting/etc: | |||
self._token = None | |||
self._basetimestamp = None | |||
self._starttimestamp = None | |||
@@ -199,18 +198,18 @@ class Page(CopyvioMixIn): | |||
"""Load various data from the API in a single query. | |||
Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl, | |||
._protection, ._namespace, ._is_talkpage, ._creator, ._lastrevid, | |||
._token, and ._starttimestamp using the API. It will do a query of | |||
its own unless *result* is provided, in which case we'll pretend | |||
*result* is what the query returned. | |||
._protection, ._namespace, ._is_talkpage, ._creator, ._lastrevid, and | |||
._starttimestamp using the API. It will do a query of its own unless | |||
*result* is provided, in which case we'll pretend *result* is what the | |||
query returned. | |||
Assuming the API is sound, this should not raise any exceptions. | |||
""" | |||
if not result: | |||
query = self.site.api_query | |||
result = query(action="query", rvprop="user", intoken="edit", | |||
prop="info|revisions", rvlimit=1, rvdir="newer", | |||
titles=self._title, inprop="protection|url") | |||
result = query(action="query", prop="info|revisions", | |||
inprop="protection|url", rvprop="user", rvlimit=1, | |||
rvdir="newer", titles=self._title) | |||
res = result["query"]["pages"].values()[0] | |||
@@ -233,13 +232,7 @@ class Page(CopyvioMixIn): | |||
self._fullurl = res["fullurl"] | |||
self._protection = res["protection"] | |||
try: | |||
self._token = res["edittoken"] | |||
except KeyError: | |||
pass | |||
else: | |||
self._starttimestamp = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()) | |||
self._starttimestamp = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()) | |||
# We've determined the namespace and talkpage status in __init__() | |||
# based on the title, but now we can be sure: | |||
@@ -291,13 +284,6 @@ class Page(CopyvioMixIn): | |||
in _handle_edit_errors(). We'll then throw these back as subclasses of | |||
EditError. | |||
""" | |||
# Try to get our edit token, and die if we can't: | |||
if not self._token: | |||
self._load_attributes() | |||
if not self._token: | |||
e = "You don't have permission to edit this page." | |||
raise exceptions.PermissionsError(e) | |||
# Weed out invalid pages before we get too far: | |||
self._assert_validity() | |||
@@ -306,8 +292,7 @@ class Page(CopyvioMixIn): | |||
params = self._build_edit_params(text, summary, minor, bot, force, | |||
section, captcha_id, captcha_word) | |||
else: # Make sure we have the right token: | |||
params["token"] = self._token | |||
self._token = None # Token now invalid | |||
params["token"] = self.site.get_token() | |||
# Try the API query, catching most errors with our handler: | |||
try: | |||
@@ -332,8 +317,9 @@ class Page(CopyvioMixIn): | |||
"""Given some keyword arguments, build an API edit query string.""" | |||
unitxt = text.encode("utf8") if isinstance(text, unicode) else text | |||
hashed = md5(unitxt).hexdigest() # Checksum to ensure text is correct | |||
params = {"action": "edit", "title": self._title, "text": text, | |||
"token": self._token, "summary": summary, "md5": hashed} | |||
params = { | |||
"action": "edit", "title": self._title, "text": text, | |||
"token": self.site.get_token(), "summary": summary, "md5": hashed} | |||
if section: | |||
params["section"] = section | |||
@@ -378,13 +364,13 @@ class Page(CopyvioMixIn): | |||
self._exists = self.PAGE_UNKNOWN | |||
raise exceptions.EditConflictError(error.info) | |||
elif error.code == "badtoken" and retry: | |||
params["token"] = self.site.get_token("edit") | |||
params["token"] = self.site.get_token(force=True) | |||
try: | |||
return self.site.api_query(**params) | |||
except exceptions.APIError as error: | |||
if not hasattr(error, "code"): | |||
except exceptions.APIError as err: | |||
if not hasattr(err, "code"): | |||
raise # We can only handle errors with a code attribute | |||
return self._handle_edit_errors(error, params, retry=False) | |||
return self._handle_edit_errors(err, params, retry=False) | |||
elif error.code in ["emptypage", "emptynewsection"]: | |||
raise exceptions.NoContentError(error.info) | |||
elif error.code == "contenttoobig": | |||
@@ -577,7 +563,7 @@ class Page(CopyvioMixIn): | |||
query = self.site.api_query | |||
result = query(action="query", rvlimit=1, titles=self._title, | |||
prop="info|revisions", inprop="protection|url", | |||
intoken="edit", rvprop="content|timestamp") | |||
rvprop="content|timestamp") | |||
self._load_attributes(result=result) | |||
self._assert_existence() | |||
self._load_content(result=result) | |||
@@ -610,7 +596,7 @@ class Page(CopyvioMixIn): | |||
:py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a | |||
redirect. | |||
""" | |||
re_redirect = "^\s*\#\s*redirect\s*\[\[(.*?)\]\]" | |||
re_redirect = r"^\s*\#\s*redirect\s*\[\[(.*?)\]\]" | |||
content = self.get() | |||
try: | |||
return re.findall(re_redirect, content, flags=re.I)[0] | |||
@@ -709,7 +695,7 @@ class Page(CopyvioMixIn): | |||
username = username.lower() | |||
optouts = [optout.lower() for optout in optouts] if optouts else [] | |||
r_bots = "\{\{\s*(no)?bots\s*(\||\}\})" | |||
r_bots = r"\{\{\s*(no)?bots\s*(\||\}\})" | |||
filter = self.parse().ifilter_templates(recursive=True, matches=r_bots) | |||
for template in filter: | |||
if template.has_param("deny"): | |||
@@ -83,6 +83,8 @@ class Site(object): | |||
""" | |||
SERVICE_API = 1 | |||
SERVICE_SQL = 2 | |||
SPECIAL_TOKENS = ["deleteglobalaccount", "patrol", "rollback", | |||
"setglobalaccountstatus", "userrights", "watch"] | |||
def __init__(self, name=None, project=None, lang=None, base_url=None, | |||
article_path=None, script_path=None, sql=None, | |||
@@ -124,6 +126,7 @@ class Site(object): | |||
self._wait_between_queries = wait_between_queries | |||
self._max_retries = 6 | |||
self._last_query_time = 0 | |||
self._tokens = {} | |||
self._api_lock = RLock() | |||
self._api_info_cache = {"maxlag": 0, "lastcheck": 0} | |||
@@ -252,13 +255,25 @@ class Site(object): | |||
return self._handle_api_result(result, params, tries, wait, ae_retry) | |||
def _request_csrf_token(self, params): | |||
"""If possible, add a request for a CSRF token to an API query.""" | |||
if params.get("action") == "query": | |||
if params.get("meta"): | |||
if "tokens" not in params["meta"].split("|"): | |||
params["meta"] += "|tokens" | |||
else: | |||
params["meta"] = "tokens" | |||
if params.get("type"): | |||
if "csrf" not in params["type"].split("|"): | |||
params["type"] += "|csrf" | |||
def _build_api_query(self, params, ignore_maxlag, no_assert): | |||
"""Given API query params, return the URL to query and POST data.""" | |||
if not self._base_url or self._script_path is None: | |||
e = "Tried to do an API query, but no API URL is known." | |||
raise exceptions.APIError(e) | |||
url = ''.join((self.url, self._script_path, "/api.php")) | |||
url = self.url + self._script_path + "/api.php" | |||
params["format"] = "json" # This is the only format we understand | |||
if self._assert_edit and not no_assert: | |||
# If requested, ensure that we're logged in | |||
@@ -266,6 +281,9 @@ class Site(object): | |||
if self._maxlag and not ignore_maxlag: | |||
# If requested, don't overload the servers: | |||
params["maxlag"] = self._maxlag | |||
if "csrf" not in self._tokens: | |||
# If we don't have a CSRF token, try to fetch one: | |||
self._request_csrf_token(params) | |||
data = self._urlencode_utf8(params) | |||
return url, data | |||
@@ -282,6 +300,9 @@ class Site(object): | |||
code = res["error"]["code"] | |||
info = res["error"]["info"] | |||
except (TypeError, KeyError): # If there's no error code/info, return | |||
if "query" in res and "tokens" in res["query"]: | |||
for name, token in res["query"]["tokens"].iteritems(): | |||
self._tokens[name.split("token")[0]] = token | |||
return res | |||
if code == "maxlag": # We've been throttled by the server | |||
@@ -326,7 +347,7 @@ class Site(object): | |||
# All attributes to be loaded, except _namespaces, which is a special | |||
# case because it requires additional params in the API query: | |||
attrs = [self._name, self._project, self._lang, self._base_url, | |||
self._article_path, self._script_path] | |||
self._article_path, self._script_path] | |||
params = {"action": "query", "meta": "siteinfo", "siprop": "general"} | |||
@@ -485,6 +506,7 @@ class Site(object): | |||
from our first request, and *attempt* is to prevent getting stuck in a | |||
loop if MediaWiki isn't acting right. | |||
""" | |||
self._tokens.clear() | |||
name, password = login | |||
params = {"action": "login", "lgname": name, "lgpassword": password} | |||
@@ -764,25 +786,26 @@ class Site(object): | |||
result = list(self.sql_query(query)) | |||
return int(result[0][0]) | |||
def get_token(self, action): | |||
def get_token(self, action=None, force=False): | |||
"""Return a token for a data-modifying API action. | |||
*action* must be one of the types listed on | |||
<https://www.mediawiki.org/wiki/API:Tokens>. If it's given as a union | |||
of types separated by |, then the function will return a dictionary | |||
of tokens instead of a single one. | |||
In general, this will be a CSRF token, unless *action* is in a special | |||
list of non-CSRF tokens. Tokens are cached for the session (until | |||
:meth:`_login` is called again); set *force* to ``True`` to force a new | |||
token to be fetched. | |||
Raises :py:exc:`~earwigbot.exceptions.PermissionsError` if we don't | |||
have permissions for the requested action(s), or they are invalid. | |||
Raises :py:exc:`~earwigbot.exceptions.APIError` if there was some other | |||
API issue. | |||
Raises :exc:`.APIError` if there was an API issue. | |||
""" | |||
res = self.api_query(action="tokens", type=action) | |||
if "warnings" in res and "tokens" in res["warnings"]: | |||
raise exceptions.PermissionsError(res["warnings"]["tokens"]["*"]) | |||
if "|" in action: | |||
return res["tokens"] | |||
return res["tokens"].values()[0] | |||
if action not in self.SPECIAL_TOKENS: | |||
action = "csrf" | |||
if action in self._tokens and not force: | |||
return self._tokens[action] | |||
res = self.api_query(action="query", meta="tokens", type=action) | |||
if action not in self._tokens: | |||
err = "Tried to fetch a {0} token, but API returned: {1}" | |||
raise exceptions.APIError(err.format(action, res)) | |||
return self._tokens[action] | |||
def namespace_id_to_name(self, ns_id, all=False): | |||
"""Given a namespace ID, returns associated namespace names. | |||