Browse Source

A few small changes

tags/v0.1^2
Ben Kurtovic 12 years ago
parent
commit
2c7e8bfef9
2 changed files with 21 additions and 21 deletions
  1. +20
    -20
      bot/tasks/afc_statistics.py
  2. +1
    -1
      bot/wiki/site.py

+ 20
- 20
bot/tasks/afc_statistics.py View File

@@ -24,8 +24,8 @@ class Task(BaseTask):
"""A task to generate statistics for WikiProject Articles for Creation. """A task to generate statistics for WikiProject Articles for Creation.


Statistics are stored in a MySQL database ("u_earwig_afc_statistics") Statistics are stored in a MySQL database ("u_earwig_afc_statistics")
accessed with oursql. Statistics are updated live while watching the recent
changes IRC feed and saved once an hour, on the hour, to self.pagename.
accessed with oursql. Statistics are synchronied with the live database
every four minutes and saved once an hour, on the hour, to self.pagename.
In the live bot, this is "Template:AFC statistics". In the live bot, this is "Template:AFC statistics".
""" """
name = "afc_statistics" name = "afc_statistics"
@@ -201,16 +201,18 @@ class Task(BaseTask):
query2 = """SELECT page_latest, page_title, page_namespace FROM page query2 = """SELECT page_latest, page_title, page_namespace FROM page
WHERE page_id = ?""" WHERE page_id = ?"""
cursor.execute(query1) cursor.execute(query1)

for pageid, title, oldid in cursor: for pageid, title, oldid in cursor:
msg = "Updating tracked page: [[{0}]] (id: {1}) @ {2}"
msg = "Updating page [[{0}]] (id: {1}) @ {2}"
self.logger.debug(msg.format(pageid, title, oldid)) self.logger.debug(msg.format(pageid, title, oldid))
result = list(self.site.sql_query(query2, (pageid,))) result = list(self.site.sql_query(query2, (pageid,)))
try:
real_oldid = result[0][0]
except IndexError: # Page doesn't exist!
if not result:
self.untrack_page(cursor, pageid) self.untrack_page(cursor, pageid)
continue continue
if real_oldid != oldid:

real_oldid = result[0][0]
if oldid != real_oldid:
self.logger.debug(" {0} -> {1}".format(oldid, real_oldid))
body = result[0][1].replace("_", " ") body = result[0][1].replace("_", " ")
ns = self.site.namespace_id_to_name(result[0][2]) ns = self.site.namespace_id_to_name(result[0][2])
real_title = ":".join(ns, body) real_title = ":".join(ns, body)
@@ -235,6 +237,8 @@ class Task(BaseTask):
if title in self.ignore_list: if title in self.ignore_list:
continue continue
if pageid not in tracked: if pageid not in tracked:
msg = "Tracking page [[{0}]] (id: {1})".format(title, pageid)
self.logger.debug(msg)
self.track_page(cursor, pageid, title) self.track_page(cursor, pageid, title)


def delete_old(self, cursor): def delete_old(self, cursor):
@@ -263,9 +267,6 @@ class Task(BaseTask):
A variety of SQL queries are used to gather information about the page, A variety of SQL queries are used to gather information about the page,
which are then saved to our database. which are then saved to our database.
""" """
msg = "Tracking page [[{0}]] (id: {1})".format(title, pageid)
self.logger.debug(msg)

content = self.get_content(title) content = self.get_content(title)
status, chart = self.get_status_and_chart(content) status, chart = self.get_status_and_chart(content)
if not status: if not status:
@@ -302,9 +303,6 @@ class Task(BaseTask):
space). If it was moved to another namespace, something unusual has space). If it was moved to another namespace, something unusual has
happened, and we'll untrack the submission. happened, and we'll untrack the submission.
""" """
msg = "Updating page [[{0}]] (id: {1})".format(title, pageid)
self.logger.debug(msg)

content = self.get_content(title) content = self.get_content(title)
try: try:
redirect_regex = wiki.Page.re_redirect redirect_regex = wiki.Page.re_redirect
@@ -326,7 +324,7 @@ class Task(BaseTask):
self.untrack_page(cursor, pageid) self.untrack_page(cursor, pageid)
return return
else: else:
msg = "Page has moved to namespace {0}".format(target_ns)
msg = " Page has moved to namespace {0}".format(target_ns)
self.logger.debug(msg) self.logger.debug(msg)
self.untrack_page(cursor, pageid) self.untrack_page(cursor, pageid)
return return
@@ -357,7 +355,7 @@ class Task(BaseTask):
query = "UPDATE page SET page_title = ?, page_short = ? WHERE page_id = ?" query = "UPDATE page SET page_title = ?, page_short = ? WHERE page_id = ?"
short = self.get_short_title(title) short = self.get_short_title(title)
cursor.execute(query, (title, short, pageid)) cursor.execute(query, (title, short, pageid))
msg = "{0}: title: {1} -> {2}"
msg = " {0}: title: {1} -> {2}"
self.logger.debug(msg.format(pageid, result["page_title"], title)) self.logger.debug(msg.format(pageid, result["page_title"], title))


def update_page_modify(self, cursor, result, pageid, size, m_user, m_time, m_id): def update_page_modify(self, cursor, result, pageid, size, m_user, m_time, m_id):
@@ -367,7 +365,7 @@ class Task(BaseTask):
WHERE page_id = ?""" WHERE page_id = ?"""
cursor.execute(query, (size, m_user, m_time, m_id, pageid)) cursor.execute(query, (size, m_user, m_time, m_id, pageid))


msg = "{0}: modify: {1} / {2} / {3} -> {4} / {5} / {6}"
msg = " {0}: modify: {1} / {2} / {3} -> {4} / {5} / {6}"
msg = msg.format(pageid, result["page_modify_user"], msg = msg.format(pageid, result["page_modify_user"],
result["page_modify_time"], result["page_modify_time"],
result["page_modify_oldid"], m_user, m_time, m_id) result["page_modify_oldid"], m_user, m_time, m_id)
@@ -382,7 +380,7 @@ class Task(BaseTask):
WHERE page_id = ?""" WHERE page_id = ?"""
cursor.execute(query1, (status, chart, pageid)) cursor.execute(query1, (status, chart, pageid))


msg = "{0}: status: {1} ({2}) -> {3} ({4})"
msg = " {0}: status: {1} ({2}) -> {3} ({4})"
self.logger.debug(msg.format(pageid, result["page_status"], self.logger.debug(msg.format(pageid, result["page_status"],
result["row_chart"], status, chart)) result["row_chart"], status, chart))


@@ -400,7 +398,7 @@ class Task(BaseTask):
"""Update the notes (or warnings) of a page in our database.""" """Update the notes (or warnings) of a page in our database."""
query = "UPDATE page SET page_notes = ? WHERE page_id = ?" query = "UPDATE page SET page_notes = ? WHERE page_id = ?"
cursor.execute(query, (notes, pageid)) cursor.execute(query, (notes, pageid))
msg = "{0}: notes: {1} -> {2}"
msg = " {0}: notes: {1} -> {2}"
self.logger.debug(msg.format(pageid, result["page_notes"], notes)) self.logger.debug(msg.format(pageid, result["page_notes"], notes))


def get_content(self, title): def get_content(self, title):
@@ -499,7 +497,7 @@ class Task(BaseTask):
its revision ID. If the page's status is not something that involves its revision ID. If the page's status is not something that involves
"special"-ing, we will return None for all three. The same will be "special"-ing, we will return None for all three. The same will be
returned if we cannot determine when the page was "special"-ed, or if returned if we cannot determine when the page was "special"-ed, or if
it was "special"-ed more than 100 edits ago.
it was "special"-ed more than 250 edits ago.
""" """
if chart in [CHART_NONE, CHART_PEND]: if chart in [CHART_NONE, CHART_PEND]:
return None, None, None return None, None, None
@@ -519,7 +517,9 @@ class Task(BaseTask):
counter = 0 counter = 0
for user, ts, revid in result: for user, ts, revid in result:
counter += 1 counter += 1
if counter > 100:
if counter > 250:
msg = "Exceeded 250 content lookups while determining special for page (id: {0}, chart: {1})"
self.logger.warn(msg.format(pageid, chart))
break break
content = self.site.get_revid_content(revid) content = self.site.get_revid_content(revid)
if re.search(search, content, re.I): if re.search(search, content, re.I):


+ 1
- 1
bot/wiki/site.py View File

@@ -548,7 +548,7 @@ class Site(object):
entire list, which includes the canonical name. entire list, which includes the canonical name.


For example, returns u"Wikipedia" if ns_id=4 and all=False on enwiki; For example, returns u"Wikipedia" if ns_id=4 and all=False on enwiki;
returns [u"Wikipedia", u"Project"] if ns_id=4 and all=True.
returns [u"Wikipedia", u"Project", u"WP"] if ns_id=4 and all=True.


Raises NamespaceNotFoundError if the ID is not found. Raises NamespaceNotFoundError if the ID is not found.
""" """


Loading…
Cancel
Save