From 2c7e8bfef99fb8b44a520d97766629958f76ef9a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 6 Nov 2011 12:53:49 -0500 Subject: [PATCH] A few small changes --- bot/tasks/afc_statistics.py | 40 ++++++++++++++++++++-------------------- bot/wiki/site.py | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/bot/tasks/afc_statistics.py b/bot/tasks/afc_statistics.py index 63f917b..934d1b0 100644 --- a/bot/tasks/afc_statistics.py +++ b/bot/tasks/afc_statistics.py @@ -24,8 +24,8 @@ class Task(BaseTask): """A task to generate statistics for WikiProject Articles for Creation. Statistics are stored in a MySQL database ("u_earwig_afc_statistics") - accessed with oursql. Statistics are updated live while watching the recent - changes IRC feed and saved once an hour, on the hour, to self.pagename. + accessed with oursql. Statistics are synchronied with the live database + every four minutes and saved once an hour, on the hour, to self.pagename. In the live bot, this is "Template:AFC statistics". """ name = "afc_statistics" @@ -201,16 +201,18 @@ class Task(BaseTask): query2 = """SELECT page_latest, page_title, page_namespace FROM page WHERE page_id = ?""" cursor.execute(query1) + for pageid, title, oldid in cursor: - msg = "Updating tracked page: [[{0}]] (id: {1}) @ {2}" + msg = "Updating page [[{0}]] (id: {1}) @ {2}" self.logger.debug(msg.format(pageid, title, oldid)) result = list(self.site.sql_query(query2, (pageid,))) - try: - real_oldid = result[0][0] - except IndexError: # Page doesn't exist! + if not result: self.untrack_page(cursor, pageid) continue - if real_oldid != oldid: + + real_oldid = result[0][0] + if oldid != real_oldid: + self.logger.debug(" {0} -> {1}".format(oldid, real_oldid)) body = result[0][1].replace("_", " ") ns = self.site.namespace_id_to_name(result[0][2]) real_title = ":".join(ns, body) @@ -235,6 +237,8 @@ class Task(BaseTask): if title in self.ignore_list: continue if pageid not in tracked: + msg = "Tracking page [[{0}]] (id: {1})".format(title, pageid) + self.logger.debug(msg) self.track_page(cursor, pageid, title) def delete_old(self, cursor): @@ -263,9 +267,6 @@ class Task(BaseTask): A variety of SQL queries are used to gather information about the page, which are then saved to our database. """ - msg = "Tracking page [[{0}]] (id: {1})".format(title, pageid) - self.logger.debug(msg) - content = self.get_content(title) status, chart = self.get_status_and_chart(content) if not status: @@ -302,9 +303,6 @@ class Task(BaseTask): space). If it was moved to another namespace, something unusual has happened, and we'll untrack the submission. """ - msg = "Updating page [[{0}]] (id: {1})".format(title, pageid) - self.logger.debug(msg) - content = self.get_content(title) try: redirect_regex = wiki.Page.re_redirect @@ -326,7 +324,7 @@ class Task(BaseTask): self.untrack_page(cursor, pageid) return else: - msg = "Page has moved to namespace {0}".format(target_ns) + msg = " Page has moved to namespace {0}".format(target_ns) self.logger.debug(msg) self.untrack_page(cursor, pageid) return @@ -357,7 +355,7 @@ class Task(BaseTask): query = "UPDATE page SET page_title = ?, page_short = ? WHERE page_id = ?" short = self.get_short_title(title) cursor.execute(query, (title, short, pageid)) - msg = "{0}: title: {1} -> {2}" + msg = " {0}: title: {1} -> {2}" self.logger.debug(msg.format(pageid, result["page_title"], title)) def update_page_modify(self, cursor, result, pageid, size, m_user, m_time, m_id): @@ -367,7 +365,7 @@ class Task(BaseTask): WHERE page_id = ?""" cursor.execute(query, (size, m_user, m_time, m_id, pageid)) - msg = "{0}: modify: {1} / {2} / {3} -> {4} / {5} / {6}" + msg = " {0}: modify: {1} / {2} / {3} -> {4} / {5} / {6}" msg = msg.format(pageid, result["page_modify_user"], result["page_modify_time"], result["page_modify_oldid"], m_user, m_time, m_id) @@ -382,7 +380,7 @@ class Task(BaseTask): WHERE page_id = ?""" cursor.execute(query1, (status, chart, pageid)) - msg = "{0}: status: {1} ({2}) -> {3} ({4})" + msg = " {0}: status: {1} ({2}) -> {3} ({4})" self.logger.debug(msg.format(pageid, result["page_status"], result["row_chart"], status, chart)) @@ -400,7 +398,7 @@ class Task(BaseTask): """Update the notes (or warnings) of a page in our database.""" query = "UPDATE page SET page_notes = ? WHERE page_id = ?" cursor.execute(query, (notes, pageid)) - msg = "{0}: notes: {1} -> {2}" + msg = " {0}: notes: {1} -> {2}" self.logger.debug(msg.format(pageid, result["page_notes"], notes)) def get_content(self, title): @@ -499,7 +497,7 @@ class Task(BaseTask): its revision ID. If the page's status is not something that involves "special"-ing, we will return None for all three. The same will be returned if we cannot determine when the page was "special"-ed, or if - it was "special"-ed more than 100 edits ago. + it was "special"-ed more than 250 edits ago. """ if chart in [CHART_NONE, CHART_PEND]: return None, None, None @@ -519,7 +517,9 @@ class Task(BaseTask): counter = 0 for user, ts, revid in result: counter += 1 - if counter > 100: + if counter > 250: + msg = "Exceeded 250 content lookups while determining special for page (id: {0}, chart: {1})" + self.logger.warn(msg.format(pageid, chart)) break content = self.site.get_revid_content(revid) if re.search(search, content, re.I): diff --git a/bot/wiki/site.py b/bot/wiki/site.py index bbff658..bdc9426 100644 --- a/bot/wiki/site.py +++ b/bot/wiki/site.py @@ -548,7 +548,7 @@ class Site(object): entire list, which includes the canonical name. For example, returns u"Wikipedia" if ns_id=4 and all=False on enwiki; - returns [u"Wikipedia", u"Project"] if ns_id=4 and all=True. + returns [u"Wikipedia", u"Project", u"WP"] if ns_id=4 and all=True. Raises NamespaceNotFoundError if the ID is not found. """