From 8dc023fcaceb587bfda8d2e71621de65b88de1fa Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 20 Aug 2011 02:09:18 -0400 Subject: [PATCH 01/16] support for Maxlag and AssertEdit in API queries, via config.json and Site.__init__ kwargs; patched holes in Site._api_query, including stability improvements and support for retrying queries when servers lag --- bot/commands/ctcp.py | 2 +- bot/wiki/functions.py | 6 +++-- bot/wiki/site.py | 70 +++++++++++++++++++++++++++++++++++++++------------ 3 files changed, 59 insertions(+), 19 deletions(-) diff --git a/bot/commands/ctcp.py b/bot/commands/ctcp.py index 3993279..4b86b96 100644 --- a/bot/commands/ctcp.py +++ b/bot/commands/ctcp.py @@ -42,6 +42,6 @@ class Command(BaseCommand): elif command == "VERSION": default = "EarwigBot - 0.1-dev - Python/$1 https://github.com/earwig/earwigbot" - vers = config.metadata.get("ircVersion", default) + vers = config.irc.get("version", default) vers = vers.replace("$1", platform.python_version()) self.connection.notice(target, "\x01VERSION {0}\x01".format(vers)) diff --git a/bot/wiki/functions.py b/bot/wiki/functions.py index 5c4052c..7562a4e 100644 --- a/bot/wiki/functions.py +++ b/bot/wiki/functions.py @@ -86,7 +86,9 @@ def _get_site_object_from_dict(name, d): namespaces = d.get("namespaces", {}) login = (config.wiki.get("username"), config.wiki.get("password")) cookiejar = _get_cookiejar() - user_agent = config.metadata.get("userAgent") + user_agent = config.wiki.get("userAgent") + assert_edit = config.wiki.get("assert") + maxlag = config.wiki.get("maxlag") if user_agent: user_agent = user_agent.replace("$1", platform.python_version()) @@ -102,7 +104,7 @@ def _get_site_object_from_dict(name, d): return Site(name=name, project=project, lang=lang, base_url=base_url, article_path=article_path, script_path=script_path, sql=sql, namespaces=namespaces, login=login, cookiejar=cookiejar, - user_agent=user_agent) + user_agent=user_agent, assert_edit=assert_edit, maxlag=maxlag) def get_site(name=None, project=None, lang=None): """Returns a Site instance based on information from our config file. diff --git a/bot/wiki/site.py b/bot/wiki/site.py index eecf2c3..ab94947 100644 --- a/bot/wiki/site.py +++ b/bot/wiki/site.py @@ -5,6 +5,7 @@ from gzip import GzipFile from json import loads from re import escape as re_escape, match as re_match from StringIO import StringIO +from time import sleep from urllib import unquote_plus, urlencode from urllib2 import build_opener, HTTPCookieProcessor, URLError from urlparse import urlparse @@ -41,7 +42,7 @@ class Site(object): def __init__(self, name=None, project=None, lang=None, base_url=None, article_path=None, script_path=None, sql=(None, None), namespaces=None, login=(None, None), cookiejar=None, - user_agent=None): + user_agent=None, assert_edit=None, maxlag=None): """Constructor for new Site instances. This probably isn't necessary to call yourself unless you're building a @@ -69,6 +70,11 @@ class Site(object): self._sql = sql self._namespaces = namespaces + # Attributes used when querying the API: + self._assert_edit = assert_edit + self._maxlag = maxlag + self._max_retries = 5 + # Set up cookiejar and URL opener for making API queries: if cookiejar is not None: self._cookiejar = cookiejar @@ -90,22 +96,25 @@ class Site(object): if logged_in_as is None or name != logged_in_as: self._login(login) - def _api_query(self, params): + def _api_query(self, params, tries=0, wait=5): """Do an API query with `params` as a dict of parameters. This will first attempt to construct an API url from self._base_url and self._script_path. We need both of these, or else we'll raise SiteAPIError. - We'll encode the given params, adding format=json along the way, and - make the request through self._opener, which has built-in cookie + We'll encode the given params, adding format=json along the way, as + well as &assert= and &maxlag= based on self._assert_edit and _maxlag. + We make the request through self._opener, which has built-in cookie support via self._cookiejar, a User-Agent (wiki.constants.USER_AGENT), and Accept-Encoding set to "gzip". - + Assuming everything went well, we'll gunzip the data (if compressed), load it as a JSON object, and return it. - If our request failed, we'll raise SiteAPIError with details. + If our request failed for some reason, we'll raise SiteAPIError with + details. If that reason was due to maxlag, we'll sleep for a bit and + then repeat the query until we exceed self._max_retries. There's helpful MediaWiki API documentation at . @@ -115,7 +124,13 @@ class Site(object): raise SiteAPIError(e) url = ''.join((self._base_url, self._script_path, "/api.php")) + params["format"] = "json" # This is the only format we understand + if self._assert_edit: # If requested, ensure that we're logged in + params["assert"] = self._assert_edit + if self._maxlag: # If requested, don't overload the servers + params["maxlag"] = self._maxlag + data = urlencode(params) print url, data # debug code @@ -124,21 +139,44 @@ class Site(object): response = self._opener.open(url, data) except URLError as error: if hasattr(error, "reason"): - e = "API query at {0} failed because {1}." - e = e.format(error.geturl, error.reason) + e = "API query failed: {0}.".format(error.reason) elif hasattr(error, "code"): - e = "API query at {0} failed; got an error code of {1}." - e = e.format(error.geturl, error.code) + e = "API query failed: got an error code of {0}." + e = e.format(error.code) else: e = "API query failed." raise SiteAPIError(e) + + result = response.read() + if response.headers.get("Content-Encoding") == "gzip": + stream = StringIO(result) + gzipper = GzipFile(fileobj=stream) + result = gzipper.read() + + try: + res = loads(result) # Parse as a JSON object + except ValueError: + e = "API query failed: JSON could not be decoded." + raise SiteAPIError(e) + + try: + code = res["error"]["code"] + info = res["error"]["info"] + except KeyError: + return res + + if code == "maxlag": + if tries >= self._max_retries: + e = "Maximum number of retries reached ({0})." + raise SiteAPIError(e.format(self._max_retries)) + tries += 1 + msg = 'Server says: "{0}". Retrying in {1} seconds ({2}/{3}).' + print msg.format(info, wait, tries, self._max_retries) + sleep(wait) + return self._api_query(params, tries=tries, wait=wait*3) else: - result = response.read() - if response.headers.get("Content-Encoding") == "gzip": - stream = StringIO(result) - gzipper = GzipFile(fileobj=stream) - result = gzipper.read() - return loads(result) # Parse as a JSON object + e = 'API query failed: got error "{0}"; server says: "{1}".' + raise SiteAPIError(e.format(code, info)) def _load_attributes(self, force=False): """Load data about our Site from the API. From 953527e47334793a6d5d6b24363bc9cf8f2c5f52 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 20 Aug 2011 19:17:46 -0400 Subject: [PATCH 02/16] ignore maxlag on IRC commands, because these are started explicitly by users --- bot/commands/afc_report.py | 1 + bot/commands/afc_status.py | 1 + bot/commands/rights.py | 1 + 3 files changed, 3 insertions(+) diff --git a/bot/commands/afc_report.py b/bot/commands/afc_report.py index 5375ce5..8488ac5 100644 --- a/bot/commands/afc_report.py +++ b/bot/commands/afc_report.py @@ -11,6 +11,7 @@ class Command(BaseCommand): def process(self, data): self.site = wiki.get_site() + self.site._maxlag = None self.data = data if not data.args: diff --git a/bot/commands/afc_status.py b/bot/commands/afc_status.py index d476daf..86f0f96 100644 --- a/bot/commands/afc_status.py +++ b/bot/commands/afc_status.py @@ -27,6 +27,7 @@ class Command(BaseCommand): def process(self, data): self.site = wiki.get_site() + self.site._maxlag = None if data.line[1] == "JOIN": notice = self.get_join_notice() diff --git a/bot/commands/rights.py b/bot/commands/rights.py index 94bf246..a38dfdf 100644 --- a/bot/commands/rights.py +++ b/bot/commands/rights.py @@ -20,6 +20,7 @@ class Command(BaseCommand): username = ' '.join(data.args) site = wiki.get_site() + site._maxlag = None user = site.get_user(username) try: From ff9f6323eb34ac9ce9fe02317d2af8944038b9ad Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 21 Aug 2011 15:32:39 -0400 Subject: [PATCH 03/16] I can has page edit support? --- bot/wiki/page.py | 101 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 89 insertions(+), 12 deletions(-) diff --git a/bot/wiki/page.py b/bot/wiki/page.py index 14139fe..62cba0d 100644 --- a/bot/wiki/page.py +++ b/bot/wiki/page.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- +from hashlib import md5 import re +from time import strftime from urllib import quote from wiki.exceptions import * @@ -25,7 +27,9 @@ class Page(object): is_redirect -- returns True if the page is a redirect, else False toggle_talk -- returns a content page's talk page, or vice versa get -- returns page content - get_redirect_target -- if the page is a redirect, returns its destination + get_redirect_target -- if the page is a redirect, returns its destination + edit -- replaces the page's content or creates a new page + add_section -- add a new section at the bottom of the page """ def __init__(self, site, title, follow_redirects=False): @@ -54,6 +58,11 @@ class Page(object): self._content = None self._creator = None + # Attributes used for editing/deleting/protecting/etc: + self._token = None + self._basetimestamp = None + self._starttimestamp = None + # Try to determine the page's namespace using our site's namespace # converter: prefix = self._title.split(":", 1)[0] @@ -124,16 +133,16 @@ class Page(object): """Loads various data from the API in a single query. Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl, - ._protection, ._namespace, ._is_talkpage, ._creator, and ._lastrevid - using the API. It will do a query of its own unless `result` is - provided, in which case we'll pretend `result` is what the query - returned. + ._protection, ._namespace, ._is_talkpage, ._creator, ._lastrevid, + ._token, and ._starttimestamp using the API. It will do a query of + its own unless `result` is provided, in which case we'll pretend + `result` is what the query returned. Assuming the API is sound, this should not raise any exceptions. """ if result is None: - params = {"action": "query", "rvprop": "user", "rvdir": "newer", - "prop": "info|revisions", "rvlimit": 1, + params = {"action": "query", "rvprop": "user", "intoken": "edit", + "prop": "info|revisions", "rvlimit": 1, "rvdir": "newer", "titles": self._title, "inprop": "protection|url"} result = self._site._api_query(params) @@ -168,6 +177,13 @@ class Page(object): self._fullurl = res["fullurl"] self._protection = res["protection"] + try: + self._token = res["edittoken"] + except KeyError: + pass + else: + self._starttimestamp = strftime("%Y-%m-%dT%H:%M:%SZ") + # We've determined the namespace and talkpage status in __init__() # based on the title, but now we can be sure: self._namespace = res["ns"] @@ -192,13 +208,13 @@ class Page(object): """ if result is None: params = {"action": "query", "prop": "revisions", "rvlimit": 1, - "rvprop": "content", "titles": self._title} + "rvprop": "content|timestamp", "titles": self._title} result = self._site._api_query(params) res = result["query"]["pages"].values()[0] try: - content = res["revisions"][0]["*"] - self._content = content + self._content = res["revisions"][0]["*"] + self._basetimestamp = res["revisions"][0]["timestamp"] except KeyError: # This can only happen if the page was deleted since we last called # self._load_attributes(). In that case, some of our attributes are @@ -206,6 +222,25 @@ class Page(object): self._load_attributes() self._force_existence() + def _get_token(self): + """Tries to get an edit token for the page. + + This is actually the same as the delete and protect tokens, so we'll + use it for everything. Raises PermissionError if we're not allowed to + edit the page, otherwise sets self._token and self._starttimestamp. + """ + params = {"action": "query", "prop": "info", "intoken": "edit", + "titles": self._title} + result = self._site._api_query(params) + + try: + self._token = result["query"]["pages"].values()[0]["edittoken"] + except KeyError: + e = "You don't have permission to edit this page." + raise PermissionsError(e) + else: + self._starttimestamp = strftime("%Y-%m-%dT%H:%M:%SZ") + def title(self, force=False): """Returns the Page's title, or pagename. @@ -394,9 +429,9 @@ class Page(object): if force or self._exists == 0: # Kill two birds with one stone by doing an API query for both our # attributes and our page content: - params = {"action": "query", "rvprop": "content", "rvlimit": 1, + params = {"action": "query", "rvlimit": 1, "titles": self._title, "prop": "info|revisions", "inprop": "protection|url", - "titles": self._title} + "intoken": "edit", "rvprop": "content|timestamp"} result = self._site._api_query(params) self._load_attributes(result=result) self._force_existence() @@ -438,3 +473,45 @@ class Page(object): except IndexError: e = "The page does not appear to have a redirect target." raise RedirectError(e) + + def edit(self, text, summary, minor=False, bot=True, force=False): + """Replaces the page's content or creates a new page. + + `text` is the new page content, with `summary` as the edit summary. + If `minor` is True, the edit will be marked as minor. If `bot` is true, + the edit will be marked as a bot edit, but only if we actually have a + bot flag. + + Use `force` to ignore edit conflicts and page deletions/recreations + that occured between getting our edit token and editing our page. Be + careful with this! + """ + if not self._token: + self._get_token() + + hashed = md5(text).hexdigest() + + params = {"action": "edit", "title": self._title, "text": text, + "token": self._token, "summary": summary, "md5": hashed} + + if minor: + params["minor"] = "true" + else: + params["notminor"] = "true" + if bot: + params["bot"] = "true" + + if not force: + params["starttimestamp"] = self._starttimestamp + if self._basetimestamp: + params["basetimestamp"] = self._basetimestamp + else: + params["recreate"] = "true" + + result = self._site._api_query(params) + print result + + def add_section(self, text, title, minor=False, bot=True): + """ + """ + pass \ No newline at end of file From 9125a610eab4332973aeebcf57cb23b6422b8593 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 21 Aug 2011 21:12:39 -0400 Subject: [PATCH 04/16] !editcount/!ec and fix --- bot/commands/editcount.py | 34 ++++++++++++++++++++++++++++++++++ bot/wiki/page.py | 2 +- 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 bot/commands/editcount.py diff --git a/bot/commands/editcount.py b/bot/commands/editcount.py new file mode 100644 index 0000000..df136f7 --- /dev/null +++ b/bot/commands/editcount.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- + +from classes import BaseCommand +import wiki + +class Command(BaseCommand): + """Return a user's edit count.""" + name = "editcount" + + def check(self, data): + commands = ["ec", "editcount"] + if data.is_command and data.command in commands: + return True + return False + + def process(self, data): + if not data.args: + self.connection.reply(data, "who do you want me to look up?") + return + + username = ' '.join(data.args) + site = wiki.get_site() + site._maxlag = None + user = site.get_user(username) + + try: + count = user.editcount() + except wiki.UserNotFoundError: + msg = "the user \x0302{0}\x0301 does not exist." + self.connection.reply(data, msg.format(username)) + return + + msg = "\x0302{0}\x0301 has {1} edits." + self.connection.reply(data, msg.format(username, count)) diff --git a/bot/wiki/page.py b/bot/wiki/page.py index 62cba0d..41a789b 100644 --- a/bot/wiki/page.py +++ b/bot/wiki/page.py @@ -514,4 +514,4 @@ class Page(object): def add_section(self, text, title, minor=False, bot=True): """ """ - pass \ No newline at end of file + pass From 87a85392e4e198ab75e6f52c2cdd7a3a94299558 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 21 Aug 2011 21:15:59 -0400 Subject: [PATCH 05/16] link to x's tool --- bot/commands/editcount.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bot/commands/editcount.py b/bot/commands/editcount.py index df136f7..061c473 100644 --- a/bot/commands/editcount.py +++ b/bot/commands/editcount.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +from urllib import quote_plus + from classes import BaseCommand import wiki @@ -30,5 +32,8 @@ class Command(BaseCommand): self.connection.reply(data, msg.format(username)) return - msg = "\x0302{0}\x0301 has {1} edits." - self.connection.reply(data, msg.format(username, count)) + url = "http://toolserver.org/~soxred93/pcount/index.php?name={0}&lang=en&wiki=wikipedia" + url = url.format(quote_plus(user.name())) + + msg = "\x0302{0}\x0301 has {1} edits ({2})." + self.connection.reply(data, msg.format(username, count, url)) From 7c8353530a98e857061b5a143c94c2fdde040672 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 21 Aug 2011 21:21:50 -0400 Subject: [PATCH 06/16] some cleanup; use command issuer's IRC nick as username if none is provided --- bot/commands/editcount.py | 15 +++++++-------- bot/commands/rights.py | 16 ++++++++-------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/bot/commands/editcount.py b/bot/commands/editcount.py index 061c473..71e2492 100644 --- a/bot/commands/editcount.py +++ b/bot/commands/editcount.py @@ -17,23 +17,22 @@ class Command(BaseCommand): def process(self, data): if not data.args: - self.connection.reply(data, "who do you want me to look up?") - return + name = data.nick + else: + name = ' '.join(data.args) - username = ' '.join(data.args) site = wiki.get_site() site._maxlag = None - user = site.get_user(username) + user = site.get_user(name) try: count = user.editcount() except wiki.UserNotFoundError: msg = "the user \x0302{0}\x0301 does not exist." - self.connection.reply(data, msg.format(username)) + self.connection.reply(data, msg.format(name)) return + safe = quote_plus(user.name()) url = "http://toolserver.org/~soxred93/pcount/index.php?name={0}&lang=en&wiki=wikipedia" - url = url.format(quote_plus(user.name())) - msg = "\x0302{0}\x0301 has {1} edits ({2})." - self.connection.reply(data, msg.format(username, count, url)) + self.connection.reply(data, msg.format(name, count, url.format(safe))) diff --git a/bot/commands/rights.py b/bot/commands/rights.py index a38dfdf..db436a7 100644 --- a/bot/commands/rights.py +++ b/bot/commands/rights.py @@ -4,7 +4,7 @@ from classes import BaseCommand import wiki class Command(BaseCommand): - """Retrieve a list of rights for a given username.""" + """Retrieve a list of rights for a given name.""" name = "rights" def check(self, data): @@ -15,19 +15,19 @@ class Command(BaseCommand): def process(self, data): if not data.args: - self.connection.reply(data, "who do you want me to look up?") - return + name = data.nick + else: + name = ' '.join(data.args) - username = ' '.join(data.args) site = wiki.get_site() site._maxlag = None - user = site.get_user(username) - + user = site.get_user(name) + try: rights = user.groups() except wiki.UserNotFoundError: msg = "the user \x0302{0}\x0301 does not exist." - self.connection.reply(data, msg.format(username)) + self.connection.reply(data, msg.format(name)) return try: @@ -35,4 +35,4 @@ class Command(BaseCommand): except ValueError: pass msg = "the rights for \x0302{0}\x0301 are {1}." - self.connection.reply(data, msg.format(username, ', '.join(rights))) + self.connection.reply(data, msg.format(name, ', '.join(rights))) From e06b61393c16c63ba74c993c817583106dfcb003 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 21 Aug 2011 21:34:03 -0400 Subject: [PATCH 07/16] !praise --- bot/commands/praise.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 bot/commands/praise.py diff --git a/bot/commands/praise.py b/bot/commands/praise.py new file mode 100644 index 0000000..26fd4b6 --- /dev/null +++ b/bot/commands/praise.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- + +import random + +from classes import BaseCommand + +class Command(BaseCommand): + """Praise people!""" + name = "praise" + + def check(self, data): + commands = ["praise", "earwig", "leonard", "leonard^bloom", "groove", + "groovedog"] + return data.is_command and data.command in commands + + def process(self, data): + if data.command == "earwig": + msg = "\x02Earwig\x0F is the bestest Python programmer ever!" + elif data.command in ["leonard", "leonard^bloom"]: + msg = "\x02Leonard^Bloom\x0F is the biggest slacker ever!" + elif data.command in ["groove", "groovedog"]: + msg = "\x02GrooveDog\x0F is the bestest heh evar!" + else: + if not data.args: + msg = "You use this command to praise certain people. Who they are is a secret." + else: + msg = "You're doing it wrong." + self.connection.reply(data, msg) + return + + self.connection.say(data.chan, msg) From 4488f6f5e3d11c1f0ef22614abe85ce9f37c1afd Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 21 Aug 2011 23:25:59 -0400 Subject: [PATCH 08/16] !registration --- bot/commands/registration.py | 63 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 bot/commands/registration.py diff --git a/bot/commands/registration.py b/bot/commands/registration.py new file mode 100644 index 0000000..e897c99 --- /dev/null +++ b/bot/commands/registration.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- + +import time + +from classes import BaseCommand +import wiki + +class Command(BaseCommand): + """Return when a user registered.""" + name = "registration" + + def check(self, data): + commands = ["registration", "age"] + if data.is_command and data.command in commands: + return True + return False + + def process(self, data): + if not data.args: + name = data.nick + else: + name = ' '.join(data.args) + + site = wiki.get_site() + site._maxlag = None + user = site.get_user(name) + + try: + reg = user.registration() + except wiki.UserNotFoundError: + msg = "the user \x0302{0}\x0301 does not exist." + self.connection.reply(data, msg.format(name)) + return + + date = time.strftime("%b %m, %Y at %I:%M:%S %p", reg) + age = self.get_diff(time.mktime(reg), time.mktime(time.gmtime())) + + g = user.gender() + if g == "male": + gender = "He's" + elif g == "female": + gender = "She's" + else: + gende = "They're" + + msg = "\x0302{0}\x0301 registered on {1}. {2} {3} old." + self.connection.reply(data, msg.format(name, date, gender, age)) + + def get_diff(self, t1, t2): + parts = {"years": 31536000, "days": 86400, "hours": 3600, + "minutes": 60, "seconds": 1} + msg = [] + + order = sorted(parts.items(), key=lambda x: x[1], reverse=True) + for key, value in order: + num = 0 + while t2 - t1 > value: + t1 += value + num += 1 + if num or (not num and msg): + msg.append(" ".join((str(num), key))) + + return ", ".join(msg) From 7d464aeac48db0e24c90c91d1425f5db269f7b8c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 21 Aug 2011 23:30:38 -0400 Subject: [PATCH 09/16] argh --- bot/commands/registration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/commands/registration.py b/bot/commands/registration.py index e897c99..3ce12ea 100644 --- a/bot/commands/registration.py +++ b/bot/commands/registration.py @@ -41,7 +41,7 @@ class Command(BaseCommand): elif g == "female": gender = "She's" else: - gende = "They're" + gender = "They're" msg = "\x0302{0}\x0301 registered on {1}. {2} {3} old." self.connection.reply(data, msg.format(name, date, gender, age)) From 08b686804498f4c0842d29a380ffda1c92d55648 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 21 Aug 2011 23:36:13 -0400 Subject: [PATCH 10/16] drop AM/PM in favor of just UTC --- bot/commands/registration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/commands/registration.py b/bot/commands/registration.py index 3ce12ea..7eeb43d 100644 --- a/bot/commands/registration.py +++ b/bot/commands/registration.py @@ -32,7 +32,7 @@ class Command(BaseCommand): self.connection.reply(data, msg.format(name)) return - date = time.strftime("%b %m, %Y at %I:%M:%S %p", reg) + date = time.strftime("%b %m, %Y at %H:%M:%S %Z", reg) age = self.get_diff(time.mktime(reg), time.mktime(time.gmtime())) g = user.gender() From 4939cd04564bf119bc38e5c5caac2d49a3c29cf2 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 21 Aug 2011 23:37:23 -0400 Subject: [PATCH 11/16] bleh --- bot/commands/registration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/commands/registration.py b/bot/commands/registration.py index 7eeb43d..5b88b85 100644 --- a/bot/commands/registration.py +++ b/bot/commands/registration.py @@ -32,7 +32,7 @@ class Command(BaseCommand): self.connection.reply(data, msg.format(name)) return - date = time.strftime("%b %m, %Y at %H:%M:%S %Z", reg) + date = time.strftime("%b %m, %Y at %H:%M:%S UTC", reg) age = self.get_diff(time.mktime(reg), time.mktime(time.gmtime())) g = user.gender() From cddb22636f7d1969aa3356ffcfb2c0e2adf84a37 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 21 Aug 2011 23:44:36 -0400 Subject: [PATCH 12/16] wikitools fix in User when said user is really old and the API doesn't give a registration date --- bot/wiki/user.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bot/wiki/user.py b/bot/wiki/user.py index b1cca38..da14255 100644 --- a/bot/wiki/user.py +++ b/bot/wiki/user.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from time import strptime +from time import gmtime, strptime from wiki.constants import * from wiki.exceptions import UserNotFoundError @@ -101,7 +101,12 @@ class User(object): self._editcount = res["editcount"] reg = res["registration"] - self._registration = strptime(reg, "%Y-%m-%dT%H:%M:%SZ") + try: + self._registration = strptime(reg, "%Y-%m-%dT%H:%M:%SZ") + except TypeError: + # Sometimes the API doesn't give a date; the user's probably really + # old. There's nothing else we can do! + self._registration = gmtime(0) try: res["emailable"] From 3bb5228cbfa26237ea0fab37ef6d0dc462b40f0f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 22 Aug 2011 00:08:08 -0400 Subject: [PATCH 13/16] I'm not sure why I made this huge mistake but errrrghhhh. --- bot/commands/registration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/commands/registration.py b/bot/commands/registration.py index 5b88b85..4547872 100644 --- a/bot/commands/registration.py +++ b/bot/commands/registration.py @@ -32,7 +32,7 @@ class Command(BaseCommand): self.connection.reply(data, msg.format(name)) return - date = time.strftime("%b %m, %Y at %H:%M:%S UTC", reg) + date = time.strftime("%b %d, %Y at %H:%M:%S UTC", reg) age = self.get_diff(time.mktime(reg), time.mktime(time.gmtime())) g = user.gender() From 4f3a5930c8123f27d79d4e46c6a14caa44aa964d Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 23 Aug 2011 19:24:33 -0400 Subject: [PATCH 14/16] edit support! --- bot/wiki/exceptions.py | 31 ++++++++-- bot/wiki/page.py | 149 +++++++++++++++++++++++++++++++++++-------------- bot/wiki/site.py | 4 +- 3 files changed, 136 insertions(+), 48 deletions(-) diff --git a/bot/wiki/exceptions.py b/bot/wiki/exceptions.py index 0e7e824..5a87fda 100644 --- a/bot/wiki/exceptions.py +++ b/bot/wiki/exceptions.py @@ -3,7 +3,7 @@ """ EarwigBot's Wiki Toolset: Exceptions -This module contains all exceptions used by the wiki package. +This module contains all exceptions used by the wiki package. There are a lot. """ class WikiToolsetError(Exception): @@ -22,11 +22,6 @@ class LoginError(WikiToolsetError): """An error occured while trying to login. Perhaps the username/password is incorrect.""" -class PermissionsError(WikiToolsetError): - """We tried to do something we don't have permission to, like a non-admin - trying to delete a page, or trying to edit a page when no login information - was provided.""" - class NamespaceNotFoundError(WikiToolsetError): """A requested namespace name or namespace ID does not exist.""" @@ -45,3 +40,27 @@ class RedirectError(WikiToolsetError): class UserNotFoundError(WikiToolsetError): """Attempting to get certain information about a user that does not exist.""" + +class EditError(WikiToolsetError): + """We got some error while editing. Sometimes, a subclass of this exception + will be used, like PermissionsError or EditConflictError.""" + +class PermissionsError(EditError): + """We tried to do something we don't have permission to, like a non-admin + trying to delete a page, or trying to edit a page when no login information + was provided.""" + +class EditConflictError(EditError): + """We've gotten an edit conflict or a (rarer) delete/recreate conflict.""" + +class NoContentError(EditError): + """We tried to create a page or new section with no content.""" + +class ContentTooBigError(EditError): + """The edit we tried to push exceeded the article size limit.""" + +class SpamDetectedError(EditError): + """The spam filter refused our edit.""" + +class FilteredError(EditError): + """The edit filter refused our edit.""" diff --git a/bot/wiki/page.py b/bot/wiki/page.py index 41a789b..812824b 100644 --- a/bot/wiki/page.py +++ b/bot/wiki/page.py @@ -222,24 +222,104 @@ class Page(object): self._load_attributes() self._force_existence() - def _get_token(self): - """Tries to get an edit token for the page. - - This is actually the same as the delete and protect tokens, so we'll - use it for everything. Raises PermissionError if we're not allowed to - edit the page, otherwise sets self._token and self._starttimestamp. + def _edit(self, params=None, text=None, summary=None, minor=None, bot=None, + force=None, section=None, captcha_id=None, captcha_word=None, + tries=0): + """Edit a page! + + If `params` is given, """ - params = {"action": "query", "prop": "info", "intoken": "edit", - "titles": self._title} - result = self._site._api_query(params) - - try: - self._token = result["query"]["pages"].values()[0]["edittoken"] - except KeyError: + if not self._token: + self._load_attributes() + if not self._token: e = "You don't have permission to edit this page." raise PermissionsError(e) + self._force_validity() # Weed these out before we get too far + + if not params: + params = self._build_edit_params(text, summary, minor, bot, force, + section, captcha_id, captcha_word) + + try: + result = self._site._api_query(params) + except SiteAPIError as error: + if not hasattr(error, code): + raise + result = self._handle_edit_exceptions(error, params, tries) + + # These attributes are now invalidated: + self._content = None + self._basetimestamp = None + + return result + + def _build_edit_params(self, text, summary, minor, bot, force, section, + captcha_id, captcha_word): + """Something.""" + hashed = md5(text).hexdigest() # Checksum to ensure text is correct + params = {"action": "edit", "title": self._title, "text": text, + "token": self._token, "summary": summary, "md5": hashed} + + if section: + params["section"] = section + if captcha_id and captcha_word: + params["captchaid"] = captcha_id + params["captchaword"] = captcha_word + if minor: + params["minor"] = "true" else: - self._starttimestamp = strftime("%Y-%m-%dT%H:%M:%SZ") + params["notminor"] = "true" + if bot: + params["bot"] = "true" + if self._exists == 2: # Page does not already exist + params["recreate"] = "true" + + if not force: + params["starttimestamp"] = self._starttimestamp + if self._basetimestamp: + params["basetimestamp"] = self._basetimestamp + if self._exists == 3: + # Page exists; don't re-create it by accident if it's deleted: + params["nocreate"] = "true" + else: + # Page does not exist; don't edit if it already exists: + params["createonly"] = "true" + + return params + + def _handle_edit_exceptions(self, error, params, tries): + """Something.""" + if error.code in ["noedit", "cantcreate", "protectedtitle", + "noimageredirect"]: + raise PermissionsError(error.info) + + elif error.code in ["noedit-anon", "cantcreate-anon", + "noimageredirect-anon"]: + if not all(self._site._login_info): # Insufficient login info + raise PermissionsError(error.info) + if self.tries == 0: # We have login info; try to login: + self._site._login(self._site._login_info) + return self._edit(params=params, tries=1) + else: # We already tried to log in and failed! + e = "Although we should be logged in, we are not. This may be a cookie problem or an odd bug." + raise LoginError(e) + + elif error.code in ["editconflict", "pagedeleted", "articleexists"]: + raise EditConflictError(error.info) + + elif error.code in ["emptypage", "emptynewsection"]: + raise NoContentError(error.info) + + elif error.code == "contenttoobig": + raise ContentTooBigError(error.info) + + elif error.code == "spamdetected": + raise SpamDetectedError(error.info) + + elif error.code == "filtered": + raise FilteredError(error.info) + + raise EditError(", ".join((error.code, error.info))) def title(self, force=False): """Returns the Page's title, or pagename. @@ -482,36 +562,23 @@ class Page(object): the edit will be marked as a bot edit, but only if we actually have a bot flag. - Use `force` to ignore edit conflicts and page deletions/recreations - that occured between getting our edit token and editing our page. Be - careful with this! + Use `force` to push the new content even if there's an edit conflict or + the page was deleted/recreated between getting our edit token and + editing our page. Be careful with this! """ - if not self._token: - self._get_token() + self._edit(text=text, summary=summary, minor=minor, bot=bot, + force=force) - hashed = md5(text).hexdigest() + def add_section(self, text, title, minor=False, bot=True, force=False): + """Adds a new section to the bottom of the page. - params = {"action": "edit", "title": self._title, "text": text, - "token": self._token, "summary": summary, "md5": hashed} + The arguments for this are the same as those for edit(), but instead of + providing a summary, you provide a section title. - if minor: - params["minor"] = "true" - else: - params["notminor"] = "true" - if bot: - params["bot"] = "true" - - if not force: - params["starttimestamp"] = self._starttimestamp - if self._basetimestamp: - params["basetimestamp"] = self._basetimestamp - else: - params["recreate"] = "true" - - result = self._site._api_query(params) - print result + Likewise, raised exceptions are the same as edit()'s. - def add_section(self, text, title, minor=False, bot=True): - """ + This should create the page if it does not already exist, with just the + new section as content. """ - pass + self._edit(text=text, summary=title, minor=minor, bot=bot, force=force, + section="new") diff --git a/bot/wiki/site.py b/bot/wiki/site.py index ab94947..0103b68 100644 --- a/bot/wiki/site.py +++ b/bot/wiki/site.py @@ -176,7 +176,9 @@ class Site(object): return self._api_query(params, tries=tries, wait=wait*3) else: e = 'API query failed: got error "{0}"; server says: "{1}".' - raise SiteAPIError(e.format(code, info)) + error = SiteAPIError(e.format(code, info)) + error.code, error.info = code, info + raise error def _load_attributes(self, force=False): """Load data about our Site from the API. From b1cf39ac64325bab5cebff37a88e774d2ff5e2b5 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 24 Aug 2011 00:21:45 -0400 Subject: [PATCH 15/16] major improvements to editing; fixes, cleanup, support for AssertEdit is complete w/ logging in following a failed assertion; bugfixes --- bot/wiki/page.py | 115 ++++++++++++++++++++++++++++++++++++++++++------------- bot/wiki/site.py | 2 +- 2 files changed, 89 insertions(+), 28 deletions(-) diff --git a/bot/wiki/page.py b/bot/wiki/page.py index 812824b..19cfac3 100644 --- a/bot/wiki/page.py +++ b/bot/wiki/page.py @@ -2,7 +2,7 @@ from hashlib import md5 import re -from time import strftime +from time import gmtime, strftime from urllib import quote from wiki.exceptions import * @@ -182,7 +182,7 @@ class Page(object): except KeyError: pass else: - self._starttimestamp = strftime("%Y-%m-%dT%H:%M:%SZ") + self._starttimestamp = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()) # We've determined the namespace and talkpage status in __init__() # based on the title, but now we can be sure: @@ -225,37 +225,59 @@ class Page(object): def _edit(self, params=None, text=None, summary=None, minor=None, bot=None, force=None, section=None, captcha_id=None, captcha_word=None, tries=0): - """Edit a page! + """Edit the page! + + If `params` is given, we'll use it as our API query parameters. + Otherwise, we'll build params using the given kwargs via + _build_edit_params(). - If `params` is given, + We'll then try to do the API query, and catch any errors the API raises + in _handle_edit_errors(). We'll then throw these back as subclasses of + EditError. """ + # Try to get our edit token, and die if we can't: if not self._token: self._load_attributes() if not self._token: e = "You don't have permission to edit this page." raise PermissionsError(e) - self._force_validity() # Weed these out before we get too far + + # Weed out invalid pages before we get too far: + self._force_validity() + # Build our API query string: if not params: params = self._build_edit_params(text, summary, minor, bot, force, section, captcha_id, captcha_word) + else: # Make sure we have the right token: + params["token"] = self._token + # Try the API query, catching most errors with our handler: try: result = self._site._api_query(params) except SiteAPIError as error: - if not hasattr(error, code): - raise - result = self._handle_edit_exceptions(error, params, tries) - - # These attributes are now invalidated: - self._content = None - self._basetimestamp = None - - return result + if not hasattr(error, "code"): + raise # We can only handle errors with a code attribute + result = self._handle_edit_errors(error, params, tries) + + # If everything was successful, reset invalidated attributes: + if result["edit"]["result"] == "Success": + self._content = None + self._basetimestamp = None + self._exists = 0 + return + + # If we're here, then the edit failed. If it's because of AssertEdit, + # handle that. Otherwise, die - something odd is going on: + try: + assertion = result["edit"]["assert"] + except KeyError: + raise EditError(result["edit"]) + self._handle_assert_edit(assertion, params, tries) def _build_edit_params(self, text, summary, minor, bot, force, section, captcha_id, captcha_word): - """Something.""" + """Given some keyword arguments, build an API edit query string.""" hashed = md5(text).hexdigest() # Checksum to ensure text is correct params = {"action": "edit", "title": self._title, "text": text, "token": self._token, "summary": summary, "md5": hashed} @@ -271,40 +293,50 @@ class Page(object): params["notminor"] = "true" if bot: params["bot"] = "true" - if self._exists == 2: # Page does not already exist - params["recreate"] = "true" if not force: params["starttimestamp"] = self._starttimestamp if self._basetimestamp: params["basetimestamp"] = self._basetimestamp - if self._exists == 3: - # Page exists; don't re-create it by accident if it's deleted: - params["nocreate"] = "true" - else: + if self._exists == 2: # Page does not exist; don't edit if it already exists: params["createonly"] = "true" + else: + params["recreate"] = "true" return params - def _handle_edit_exceptions(self, error, params, tries): - """Something.""" + def _handle_edit_errors(self, error, params, tries): + """If our edit fails due to some error, try to handle it. + + We'll either raise an appropriate exception (for example, if the page + is protected), or we'll try to fix it (for example, if we can't edit + due to being logged out, we'll try to log in). + """ if error.code in ["noedit", "cantcreate", "protectedtitle", "noimageredirect"]: raise PermissionsError(error.info) elif error.code in ["noedit-anon", "cantcreate-anon", "noimageredirect-anon"]: - if not all(self._site._login_info): # Insufficient login info + if not all(self._site._login_info): + # Insufficient login info: raise PermissionsError(error.info) - if self.tries == 0: # We have login info; try to login: + if tries == 0: + # We have login info; try to login: self._site._login(self._site._login_info) + self._token = None # Need a new token; old one is invalid now return self._edit(params=params, tries=1) - else: # We already tried to log in and failed! + else: + # We already tried to log in and failed! e = "Although we should be logged in, we are not. This may be a cookie problem or an odd bug." raise LoginError(e) elif error.code in ["editconflict", "pagedeleted", "articleexists"]: + # These attributes are now invalidated: + self._content = None + self._basetimestamp = None + self._exists = 0 raise EditConflictError(error.info) elif error.code in ["emptypage", "emptynewsection"]: @@ -319,7 +351,36 @@ class Page(object): elif error.code == "filtered": raise FilteredError(error.info) - raise EditError(", ".join((error.code, error.info))) + raise EditError(": ".join((error.code, error.info))) + + def _handle_assert_edit(self, assertion, params, tries): + """If we can't edit due to a failed AssertEdit assertion, handle that. + + If the assertion was 'user' and we have valid login information, try to + log in. Otherwise, raise PermissionsError with details. + """ + if assertion == "user": + if not all(self._site._login_info): + # Insufficient login info: + e = "AssertEdit: user assertion failed, and no login info was provided." + raise PermissionsError(e) + if tries == 0: + # We have login info; try to login: + self._site._login(self._site._login_info) + self._token = None # Need a new token; old one is invalid now + return self._edit(params=params, tries=1) + else: + # We already tried to log in and failed! + e = "Although we should be logged in, we are not. This may be a cookie problem or an odd bug." + raise LoginError(e) + + elif assertion == "bot": + e = "AssertEdit: bot assertion failed; we don't have a bot flag!" + raise PermissionsError(e) + + # Unknown assertion, maybe "true", "false", or "exists": + e = "AssertEdit: assertion '{0}' failed.".format(assertion) + raise PermissionsError(e) def title(self, force=False): """Returns the Page's title, or pagename. diff --git a/bot/wiki/site.py b/bot/wiki/site.py index 0103b68..bca2807 100644 --- a/bot/wiki/site.py +++ b/bot/wiki/site.py @@ -162,7 +162,7 @@ class Site(object): try: code = res["error"]["code"] info = res["error"]["info"] - except KeyError: + except (TypeError, KeyError): return res if code == "maxlag": From d0eaa8ebe093c8db519f915a8970aa9d1ced7433 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 24 Aug 2011 14:50:04 -0400 Subject: [PATCH 16/16] adding __repr__() and __str__() methods to Site, Page, Category, and User --- bot/wiki/category.py | 10 ++++++++++ bot/wiki/page.py | 10 ++++++++++ bot/wiki/site.py | 25 +++++++++++++++++++++++++ bot/wiki/user.py | 8 ++++++++ 4 files changed, 53 insertions(+) diff --git a/bot/wiki/category.py b/bot/wiki/category.py index 745c90c..578ae01 100644 --- a/bot/wiki/category.py +++ b/bot/wiki/category.py @@ -16,6 +16,16 @@ class Category(Page): members -- returns a list of titles in the category """ + def __repr__(self): + """Returns the canonical string representation of the Category.""" + res = ", ".join(("Category(title={0!r}", "follow_redirects={1!r}", + "site={2!r})")) + return res.format(self._title, self._follow_redirects, self._site) + + def __str__(self): + """Returns a nice string representation of the Category.""" + return ''.format(self.title(), str(self._site)) + def members(self, limit=50): """Returns a list of titles in the category. diff --git a/bot/wiki/page.py b/bot/wiki/page.py index 19cfac3..a917747 100644 --- a/bot/wiki/page.py +++ b/bot/wiki/page.py @@ -81,6 +81,16 @@ class Page(object): else: self._is_talkpage = self._namespace % 2 == 1 + def __repr__(self): + """Returns the canonical string representation of the Page.""" + res = ", ".join(("Page(title={0!r}", "follow_redirects={1!r}", + "site={2!r})")) + return res.format(self._title, self._follow_redirects, self._site) + + def __str__(self): + """Returns a nice string representation of the Page.""" + return ''.format(self.title(), str(self._site)) + def _force_validity(self): """Used to ensure that our page's title is valid. diff --git a/bot/wiki/site.py b/bot/wiki/site.py index bca2807..476159e 100644 --- a/bot/wiki/site.py +++ b/bot/wiki/site.py @@ -96,6 +96,31 @@ class Site(object): if logged_in_as is None or name != logged_in_as: self._login(login) + def __repr__(self): + """Returns the canonical string representation of the Site.""" + res = ", ".join(( + "Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", + "base_url={_base_url!r}", "article_path={_article_path!r}", + "script_path={_script_path!r}", "assert_edit={_assert_edit!r}", + "maxlag={_maxlag!r}", "sql={_sql!r}", "login={0}", + "user_agent={2!r}", "cookiejar={1})" + )) + name, password = self._login_info + login = "({0}, {1})".format(repr(name), "hidden" if password else None) + cookies = self._cookiejar.__class__.__name__ + try: + cookies += "({0!r})".format(self._cookiejar.filename) + except AttributeError: + cookies += "()" + agent = self._opener.addheaders[0][1] + return res.format(login, cookies, agent, **self.__dict__) + + def __str__(self): + """Returns a nice string representation of the Site.""" + res = "" + return res.format(self.name(), self.project(), self.lang(), + self.domain()) + def _api_query(self, params, tries=0, wait=5): """Do an API query with `params` as a dict of parameters. diff --git a/bot/wiki/user.py b/bot/wiki/user.py index da14255..f65b9fc 100644 --- a/bot/wiki/user.py +++ b/bot/wiki/user.py @@ -45,6 +45,14 @@ class User(object): self._site = site self._name = name + def __repr__(self): + """Returns the canonical string representation of the User.""" + return "User(name={0!r}, site={1!r})".format(self._name, self._site) + + def __str__(self): + """Returns a nice string representation of the User.""" + return ''.format(self.name(), str(self._site)) + def _get_attribute(self, attr, force): """Internally used to get an attribute by name.