From d207baaee25dc4d7d7e8620b1597bd34b07b1c88 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 8 Aug 2011 16:01:01 -0400 Subject: [PATCH] ported afc_report to wikitools and cleaned it up massively; added creator() method to Page; safety check in functions._get_site_object_from_dict() --- bot/commands/afc_report.py | 137 ++++++++++++++++++++++----------------------- bot/wiki/functions.py | 6 +- bot/wiki/page.py | 40 ++++++++++--- 3 files changed, 104 insertions(+), 79 deletions(-) diff --git a/bot/commands/afc_report.py b/bot/commands/afc_report.py index 402b959..9bb1f27 100644 --- a/bot/commands/afc_report.py +++ b/bot/commands/afc_report.py @@ -1,92 +1,87 @@ # -*- coding: utf-8 -*- -import json import re -import urllib from classes import BaseCommand +import wiki class Command(BaseCommand): """Get information about an AFC submission by name.""" name = "report" def process(self, data): + self.site = wiki.get_site() self.data = data + if not data.args: - self.connection.reply(data, "what submission do you want me to give information about?") + msg = "what submission do you want me to give information about?" + self.connection.reply(data, msg) return - pagename = ' '.join(data.args) - pagename = pagename.replace("http://en.wikipedia.org/wiki/", "").replace("http://enwp.org/", "").replace("_", " ") - pagename = pagename.strip() - - if self.page_exists(pagename): # given '!report Foo', first try [[Foo]] - self.report(pagename) - else: # if that doesn't work, try [[Wikipedia:Articles for creation/Foo]] - if self.page_exists("Wikipedia:Articles for creation/" + pagename): - self.report("Wikipedia:Articles for creation/" + pagename) - else: # if that doesn't work, try [[Wikipedia talk:Articles for creation/Foo]] - if self.page_exists("Wikipedia talk:Articles for creation/" + pagename): - self.report("Wikipedia talk:Articles for creation/" + pagename) - else: - self.connection.reply(data, "submission \x0302{0}\x0301 not found.".format(pagename)) - - def report(self, pagename): + title = ' '.join(data.args) + title = title.replace("http://en.wikipedia.org/wiki/", "") + title = title.replace("http://enwp.org/", "").strip() + + # Given '!report Foo', first try [[Foo]]: + if self.report(title): + return + + # Then try [[Wikipedia:Articles for creation/Foo]]: + title2 = "".join(("Wikipedia:Articles for creation/", title)) + if self.report(title2): + return + + # Then try [[Wikipedia talk:Articles for creation/Foo]]: + title3 = "".join(("Wikipedia talk:Articles for creation/", title)) + if self.report(title3): + return + + msg = "submission \x0302{0}\x0301 not found.".format(title) + self.connection.reply(data, msg) + + def report(self, title): data = self.data - shortname = pagename.replace("Wikipedia:Articles for creation/", "").replace("Wikipedia talk:Articles for creation/", "") - url = "http://enwp.org/" + urllib.quote(pagename.replace(" ", "_")) - status = self.get_status(pagename) - user, user_url = self.get_creator(pagename) - - self.connection.reply(data, "AfC submission report for \x0302{0}\x0301 ({1}):".format(shortname, url)) - self.connection.say(data.chan, "Status: \x0303{0}\x0301".format(status)) - if status == "accepted": # the first edit will be the redirect [[WT:AFC/Foo]] -> [[Foo]], NOT the creation of the submission - self.connection.say(data.chan, "Reviewed by \x0302{0}\x0301 ({1})".format(user, user_url)) - else: - self.connection.say(data.chan, "Submitted by \x0302{0}\x0301 ({1})".format(user, user_url)) - - def page_exists(self, pagename): - params = {'action': 'query', 'format': 'json', 'titles': pagename} - data = urllib.urlencode(params) - raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read() - res = json.loads(raw) - try: - res['query']['pages'].values()[0]['missing'] # this key will appear if the page does not exist - return False - except KeyError: # if it's not there, the page exists - return True - - def get_status(self, pagename): - params = {'action': 'query', 'prop': 'revisions', 'rvprop':'content', 'rvlimit':'1', 'format': 'json'} - params['titles'] = pagename - data = urllib.urlencode(params) - raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read() - res = json.loads(raw) - pageid = res['query']['pages'].keys()[0] - content = res['query']['pages'][pageid]['revisions'][0]['*'] - lcontent = content.lower() - if re.search("\{\{afc submission\|r\|(.*?)\}\}", lcontent): + page = self.site.get_page(title, follow_redirects=False) + if not page.exists()[0]: + return + + url = page.url().replace("en.wikipedia.org/wiki", "enwp.org") + short = re.sub(r"wikipedia( talk)?:articles for creation/", "", title, + re.IGNORECASE) + status = self.get_status(page) + user = self.site.get_user(page.creator()) + user_name = user.name() + user_url = user.get_userpage().url() + + msg1 = "AfC submission report for \x0302{0}\x0301 ({1}):" + msg2 = "Status: \x0303{0}\x0301" + msg3 = "Submitted by \x0302{0}\x0301 ({1})" + if status == "accepted" + msg3 = "Reviewed by \x0302{0}\x0301 ({1})" + + self.connection.reply(data, msg1.format(short, url)) + self.connection.say(data.chan, msg2.format(status)) + self.connection.say(data.chan, msg3.format(user_name, user_url)) + + return True + + def get_status(self, page): + content = page.get() + + if page.is_redirect(): + target = page.get_redirect_target() + if self.site.get_page(target).namespace() == 0: + return "accepted" + return "redirect" + if re.search("\{\{afc submission\|r\|(.*?)\}\}", content, re.I): return "being reviewed" - elif re.search("\{\{afc submission\|\|(.*?)\}\}", lcontent): + if re.search("\{\{afc submission\|\|(.*?)\}\}", content, re.I): return "pending" - elif re.search("\{\{afc submission\|d\|(.*?)\}\}", lcontent): + if re.search("\{\{afc submission\|d\|(.*?)\}\}", content, re.I): + regex = "\{\{afc submission\|d\|(.*?)(\||\}\})" try: - reason = re.findall("\{\{afc submission\|d\|(.*?)(\||\}\})", lcontent)[0][0] - return "declined with reason \"{0}\"".format(reason) + reason = re.findall(regex, content, re.I)[0][0] except IndexError: return "declined" - else: - if "#redirect" in content: - return "accepted" - else: - return "unkown" - - def get_creator(self, pagename): - params = {'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'rvdir': 'newer', 'rvlimit': '1', 'format': 'json'} - params['titles'] = pagename - data = urllib.urlencode(params) - raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read() - res = json.loads(raw) - user = res['query']['pages'].values()[0]['revisions'][0]['user'] - user_url = "http://enwp.org/User_talk:" + urllib.quote(user.replace(" ", "_")) - return user, user_url + return "declined with reason \"{0}\"".format(reason) + return "unkown" diff --git a/bot/wiki/functions.py b/bot/wiki/functions.py index 3532a4d..6fce5a1 100644 --- a/bot/wiki/functions.py +++ b/bot/wiki/functions.py @@ -90,7 +90,11 @@ def _get_site_object_from_dict(name, d): for key, value in namespaces.items(): # Convert string keys to integers del namespaces[key] - namespaces[int(key)] = value + try: + namespaces[int(key)] = value + except ValueError: # Data is broken, ignore it + namespaces = None + break return Site(name=name, project=project, lang=lang, base_url=base_url, article_path=article_path, script_path=script_path, sql=sql, diff --git a/bot/wiki/page.py b/bot/wiki/page.py index 9fe167e..14139fe 100644 --- a/bot/wiki/page.py +++ b/bot/wiki/page.py @@ -20,6 +20,7 @@ class Page(object): url -- returns the page's URL namespace -- returns the page's namespace as an integer protection -- returns the page's current protection status + creator -- returns the page's creator (first user to edit) is_talkpage -- returns True if the page is a talkpage, else False is_redirect -- returns True if the page is a redirect, else False toggle_talk -- returns a content page's talk page, or vice versa @@ -51,6 +52,7 @@ class Page(object): self._protection = None self._fullurl = None self._content = None + self._creator = None # Try to determine the page's namespace using our site's namespace # converter: @@ -122,15 +124,17 @@ class Page(object): """Loads various data from the API in a single query. Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl, - ._protection, ._namespace, ._is_talkpage, and ._lastrevid using the - API. It will do a query of its own unless `result` is provided, in - which case we'll pretend `result` is what the query returned. + ._protection, ._namespace, ._is_talkpage, ._creator, and ._lastrevid + using the API. It will do a query of its own unless `result` is + provided, in which case we'll pretend `result` is what the query + returned. Assuming the API is sound, this should not raise any exceptions. """ if result is None: - params = {"action": "query", "prop": "info", "titles": self._title, - "inprop": "protection|url"} + params = {"action": "query", "rvprop": "user", "rvdir": "newer", + "prop": "info|revisions", "rvlimit": 1, + "titles": self._title, "inprop": "protection|url"} result = self._site._api_query(params) res = result["query"]["pages"].values()[0] @@ -169,9 +173,10 @@ class Page(object): self._namespace = res["ns"] self._is_talkpage = self._namespace % 2 == 1 # talkpages have odd IDs - # This last field will only be specified if the page exists: + # These last two fields will only be specified if the page exists: + self._lastrevid = res.get("lastrevid") try: - self._lastrevid = res["lastrevid"] + self._creator = res['revisions'][0]['user'] except KeyError: pass @@ -287,6 +292,27 @@ class Page(object): self._force_validity() # invalid pages cannot be protected return self._protection + def creator(self, force=False): + """Returns the page's creator (i.e., the first user to edit the page). + + Makes an API query if force is True or if we haven't already made one. + Normally, we can get the creator along with everything else (except + content) in self._load_attributes(). However, due to a limitation in + the API (can't get the editor of one revision and the content of + another at both ends of the history), if our other attributes were only + loaded from get(), we'll have to do another API query. This is done + by calling ourselves again with force=True. + + Raises InvalidPageError or PageNotFoundError if the page name is + invalid or the page does not exist, respectively. + """ + if self._exists == 0 or force: + self._load_wrapper() + self._force_existence() + if not self._creator and not force: + self.creator(force=True) + return self._creator + def is_talkpage(self, force=False): """Returns True if the page is a talkpage, else False.