From d207baaee25dc4d7d7e8620b1597bd34b07b1c88 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 8 Aug 2011 16:01:01 -0400
Subject: [PATCH] ported afc_report to wikitools and cleaned it up massively;
 added creator() method to Page; safety check in
 functions._get_site_object_from_dict()

---
 bot/commands/afc_report.py | 137 ++++++++++++++++++++++-----------------------
 bot/wiki/functions.py      |   6 +-
 bot/wiki/page.py           |  40 ++++++++++---
 3 files changed, 104 insertions(+), 79 deletions(-)

diff --git a/bot/commands/afc_report.py b/bot/commands/afc_report.py
index 402b959..9bb1f27 100644
--- a/bot/commands/afc_report.py
+++ b/bot/commands/afc_report.py
@@ -1,92 +1,87 @@
 # -*- coding: utf-8  -*-
 
-import json
 import re
-import urllib
 
 from classes import BaseCommand
+import wiki
 
 class Command(BaseCommand):
     """Get information about an AFC submission by name."""
     name = "report"
 
     def process(self, data):
+        self.site = wiki.get_site()
         self.data = data
+
         if not data.args:
-            self.connection.reply(data, "what submission do you want me to give information about?")
+            msg = "what submission do you want me to give information about?"
+            self.connection.reply(data, msg)
             return
 
-        pagename = ' '.join(data.args)
-        pagename = pagename.replace("http://en.wikipedia.org/wiki/", "").replace("http://enwp.org/", "").replace("_", " ")
-        pagename = pagename.strip()
-
-        if self.page_exists(pagename):  # given '!report Foo', first try [[Foo]]
-            self.report(pagename)
-        else:  # if that doesn't work, try [[Wikipedia:Articles for creation/Foo]]
-            if self.page_exists("Wikipedia:Articles for creation/" + pagename):
-                self.report("Wikipedia:Articles for creation/" + pagename)
-            else:  # if that doesn't work, try [[Wikipedia talk:Articles for creation/Foo]]
-                if self.page_exists("Wikipedia talk:Articles for creation/" + pagename):
-                    self.report("Wikipedia talk:Articles for creation/" + pagename)
-                else:
-                    self.connection.reply(data, "submission \x0302{0}\x0301 not found.".format(pagename))
-
-    def report(self, pagename):
+        title = ' '.join(data.args)
+        title = title.replace("http://en.wikipedia.org/wiki/", "")
+        title = title.replace("http://enwp.org/", "").strip()
+
+        # Given '!report Foo', first try [[Foo]]:
+        if self.report(title):
+            return
+
+        # Then try [[Wikipedia:Articles for creation/Foo]]:
+        title2 = "".join(("Wikipedia:Articles for creation/", title))
+        if self.report(title2):
+            return
+
+        # Then try [[Wikipedia talk:Articles for creation/Foo]]:
+        title3 = "".join(("Wikipedia talk:Articles for creation/", title))
+        if self.report(title3):
+            return
+
+        msg = "submission \x0302{0}\x0301 not found.".format(title)
+        self.connection.reply(data, msg)
+
+    def report(self, title):
         data = self.data
-        shortname = pagename.replace("Wikipedia:Articles for creation/", "").replace("Wikipedia talk:Articles for creation/", "")
-        url = "http://enwp.org/" + urllib.quote(pagename.replace(" ", "_"))
-        status = self.get_status(pagename)
-        user, user_url = self.get_creator(pagename)
-
-        self.connection.reply(data, "AfC submission report for \x0302{0}\x0301 ({1}):".format(shortname, url))
-        self.connection.say(data.chan, "Status: \x0303{0}\x0301".format(status))
-        if status == "accepted":  # the first edit will be the redirect [[WT:AFC/Foo]] -> [[Foo]], NOT the creation of the submission
-            self.connection.say(data.chan, "Reviewed by \x0302{0}\x0301 ({1})".format(user, user_url))
-        else:
-            self.connection.say(data.chan, "Submitted by \x0302{0}\x0301 ({1})".format(user, user_url))
-
-    def page_exists(self, pagename):
-        params = {'action': 'query', 'format': 'json', 'titles': pagename}
-        data = urllib.urlencode(params)
-        raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read()
-        res = json.loads(raw)
-        try:
-            res['query']['pages'].values()[0]['missing']  # this key will appear if the page does not exist
-            return False
-        except KeyError:  # if it's not there, the page exists
-            return True
-
-    def get_status(self, pagename):
-        params = {'action': 'query', 'prop': 'revisions', 'rvprop':'content', 'rvlimit':'1', 'format': 'json'}
-        params['titles'] = pagename
-        data = urllib.urlencode(params)
-        raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read()
-        res = json.loads(raw)
-        pageid = res['query']['pages'].keys()[0]
-        content = res['query']['pages'][pageid]['revisions'][0]['*']
-        lcontent = content.lower()
-        if re.search("\{\{afc submission\|r\|(.*?)\}\}", lcontent):
+        page = self.site.get_page(title, follow_redirects=False)
+        if not page.exists()[0]:
+            return
+
+        url = page.url().replace("en.wikipedia.org/wiki", "enwp.org")
+        short = re.sub(r"wikipedia( talk)?:articles for creation/", "", title,
+                       re.IGNORECASE)
+        status = self.get_status(page)
+        user = self.site.get_user(page.creator())
+        user_name = user.name()
+        user_url = user.get_userpage().url()
+
+        msg1 = "AfC submission report for \x0302{0}\x0301 ({1}):"
+        msg2 = "Status: \x0303{0}\x0301"
+        msg3 = "Submitted by \x0302{0}\x0301 ({1})"
+        if status == "accepted"
+            msg3 = "Reviewed by \x0302{0}\x0301 ({1})"
+
+        self.connection.reply(data, msg1.format(short, url))
+        self.connection.say(data.chan, msg2.format(status))
+        self.connection.say(data.chan, msg3.format(user_name, user_url))
+
+        return True
+
+    def get_status(self, page):
+        content = page.get()
+
+        if page.is_redirect():
+            target = page.get_redirect_target()
+            if self.site.get_page(target).namespace() == 0:
+                return "accepted"
+            return "redirect"
+        if re.search("\{\{afc submission\|r\|(.*?)\}\}", content, re.I):
             return "being reviewed"
-        elif re.search("\{\{afc submission\|\|(.*?)\}\}", lcontent):
+        if re.search("\{\{afc submission\|\|(.*?)\}\}", content, re.I):
             return "pending"
-        elif re.search("\{\{afc submission\|d\|(.*?)\}\}", lcontent):
+        if re.search("\{\{afc submission\|d\|(.*?)\}\}", content, re.I):
+            regex = "\{\{afc submission\|d\|(.*?)(\||\}\})"
             try:
-                reason = re.findall("\{\{afc submission\|d\|(.*?)(\||\}\})", lcontent)[0][0]
-                return "declined with reason \"{0}\"".format(reason)
+                reason = re.findall(regex, content, re.I)[0][0]
             except IndexError:
                 return "declined"
-        else:
-            if "#redirect" in content:
-                return "accepted"
-            else:
-                return "unkown"
-
-    def get_creator(self, pagename):
-        params = {'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'rvdir': 'newer', 'rvlimit': '1', 'format': 'json'}
-        params['titles'] = pagename
-        data = urllib.urlencode(params)
-        raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read()
-        res = json.loads(raw)
-        user = res['query']['pages'].values()[0]['revisions'][0]['user']
-        user_url = "http://enwp.org/User_talk:" + urllib.quote(user.replace(" ", "_"))
-        return user, user_url
+            return "declined with reason \"{0}\"".format(reason)
+        return "unkown"
diff --git a/bot/wiki/functions.py b/bot/wiki/functions.py
index 3532a4d..6fce5a1 100644
--- a/bot/wiki/functions.py
+++ b/bot/wiki/functions.py
@@ -90,7 +90,11 @@ def _get_site_object_from_dict(name, d):
 
     for key, value in namespaces.items():  # Convert string keys to integers
         del namespaces[key]
-        namespaces[int(key)] = value
+        try:
+            namespaces[int(key)] = value
+        except ValueError:  # Data is broken, ignore it
+            namespaces = None
+            break
 
     return Site(name=name, project=project, lang=lang, base_url=base_url,
                 article_path=article_path, script_path=script_path, sql=sql,
diff --git a/bot/wiki/page.py b/bot/wiki/page.py
index 9fe167e..14139fe 100644
--- a/bot/wiki/page.py
+++ b/bot/wiki/page.py
@@ -20,6 +20,7 @@ class Page(object):
     url                 -- returns the page's URL
     namespace           -- returns the page's namespace as an integer
     protection          -- returns the page's current protection status
+    creator             -- returns the page's creator (first user to edit)
     is_talkpage         -- returns True if the page is a talkpage, else False
     is_redirect         -- returns True if the page is a redirect, else False
     toggle_talk         -- returns a content page's talk page, or vice versa
@@ -51,6 +52,7 @@ class Page(object):
         self._protection = None
         self._fullurl = None
         self._content = None
+        self._creator = None
 
         # Try to determine the page's namespace using our site's namespace
         # converter:
@@ -122,15 +124,17 @@ class Page(object):
         """Loads various data from the API in a single query.
 
         Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl,
-        ._protection, ._namespace, ._is_talkpage, and ._lastrevid using the
-        API. It will do a query of its own unless `result` is provided, in
-        which case we'll pretend `result` is what the query returned.
+        ._protection, ._namespace, ._is_talkpage, ._creator, and ._lastrevid
+        using the API. It will do a query of its own unless `result` is
+        provided, in which case we'll pretend `result` is what the query
+        returned.
 
         Assuming the API is sound, this should not raise any exceptions.
         """
         if result is None:
-            params = {"action": "query", "prop": "info", "titles": self._title,
-                      "inprop": "protection|url"}
+            params = {"action": "query", "rvprop": "user", "rvdir": "newer",
+                      "prop": "info|revisions", "rvlimit": 1,
+                      "titles": self._title, "inprop": "protection|url"}
             result = self._site._api_query(params)
 
         res = result["query"]["pages"].values()[0]
@@ -169,9 +173,10 @@ class Page(object):
         self._namespace = res["ns"]
         self._is_talkpage = self._namespace % 2 == 1  # talkpages have odd IDs
 
-        # This last field will only be specified if the page exists:
+        # These last two fields will only be specified if the page exists:
+        self._lastrevid = res.get("lastrevid")
         try:
-            self._lastrevid = res["lastrevid"]
+            self._creator = res['revisions'][0]['user']
         except KeyError:
             pass
 
@@ -287,6 +292,27 @@ class Page(object):
         self._force_validity()  # invalid pages cannot be protected
         return self._protection
 
+    def creator(self, force=False):
+        """Returns the page's creator (i.e., the first user to edit the page).
+
+        Makes an API query if force is True or if we haven't already made one.
+        Normally, we can get the creator along with everything else (except
+        content) in self._load_attributes(). However, due to a limitation in
+        the API (can't get the editor of one revision and the content of
+        another at both ends of the history), if our other attributes were only
+        loaded from get(), we'll have to do another API query. This is done
+        by calling ourselves again with force=True.
+
+        Raises InvalidPageError or PageNotFoundError if the page name is
+        invalid or the page does not exist, respectively.
+        """
+        if self._exists == 0 or force:
+            self._load_wrapper()
+        self._force_existence()
+        if not self._creator and not force:
+            self.creator(force=True)
+        return self._creator
+
     def is_talkpage(self, force=False):
         """Returns True if the page is a talkpage, else False.