Browse Source

Force a periodic update of the statistics database (closes #7).

pull/15/head
Ben Kurtovic 10 years ago
parent
commit
c6550df163
2 changed files with 50 additions and 22 deletions
  1. +38
    -22
      tasks/afc_statistics.py
  2. +12
    -0
      tasks/schema/afc_statistics.sql

+ 38
- 22
tasks/afc_statistics.py View File

@@ -300,42 +300,53 @@ class AFCStatistics(Task):
more than 36 hours ago. Pending submissions cannot be "old".
"""
self.logger.debug("Removing old submissions from chart")
query = """DELETE FROM page, row USING page JOIN row
ON page_id = row_id WHERE row_chart IN (?, ?)
query = """DELETE FROM page, row, updatelog USING page JOIN row
ON page_id = row_id JOIN updatelog ON page_id = update_id
WHERE row_chart IN (?, ?)
AND ADDTIME(page_special_time, '36:00:00') < NOW()"""
cursor.execute(query, (self.CHART_ACCEPT, self.CHART_DECLINE))

def update(self, kwargs):
"""Update a page by name, regardless of whether anything has changed.

Mainly intended as a command to be used via IRC, e.g.:
!tasks start afc_statistics action=update page=Foobar
"""Update old submissions, regardless of whether they've been edited.

This is intended to be run hourly, updating notes that change without
being triggering by a typical update (like a blocked submitter). It
also resolves conflicts when pages are tracked during high replag,
potentially causing data to be inaccurate (like a missed decline). By
default it updates the oldest ten pages in the database; this can be
changed by passing "limit" in kwargs with an integer.
"""
title = kwargs.get("page")
if not title:
self.logger.info("Starting update")

replag = self.site.get_replag()
self.logger.debug("Server replag is {0}".format(replag))
if replag > 600 and not kwargs.get("ignore_replag"):
msg = "Update canceled as replag ({0} secs) is greater than ten minutes"
self.logger.warn(msg.format(replag))
return

title = title.replace("_", " ").decode("utf8")
query = "SELECT page_id, page_modify_oldid FROM page WHERE page_title = ?"
query = """SELECT page_id, page_title, page_modify_oldid
FROM page JOIN updatelog ORDER BY update_time ASC LIMIT ?"""
with self.conn.cursor() as cursor:
cursor.execute(query, (title,))
try:
pageid, oldid = cursor.fetchall()[0]
except IndexError:
msg = u"Page [[{0}]] not found in database".format(title)
self.logger.error(msg)
msg = u"Updating page [[{0}]] (id: {1}) @ {2}"
self.logger.info(msg.format(title, pageid, oldid))
self._update_page(cursor, pageid, title)
cursor.execute(query, (kwargs.get("limit", 10),))
for pageid, title, oldid in cursor:
msg = u"Updating page [[{0}]] (id: {1}) @ {2}"
self.logger.debug(msg.format(title, pageid, oldid))
try:
self._update_page(cursor, pageid, title)
except Exception:
e = u"Error updating page [[{0}]] (id: {1})"
self.logger.exception(e.format(title, pageid))
self.logger.info("Update completed")

######################## PRIMARY PAGE ENTRY POINTS ########################

def _untrack_page(self, cursor, pageid):
"""Remove a page, given by ID, from our database."""
self.logger.debug("Untracking page (id: {0})".format(pageid))
query = """DELETE FROM page, row USING page JOIN row
ON page_id = row_id WHERE page_id = ?"""
query = """DELETE FROM page, row, updatelog USING page JOIN row
ON page_id = row_id JOIN updatelog ON page_id = update_id
WHERE page_id = ?"""
cursor.execute(query, (pageid,))

def _track_page(self, cursor, pageid, title):
@@ -363,9 +374,11 @@ class AFCStatistics(Task):

query1 = "INSERT INTO row VALUES (?, ?)"
query2 = "INSERT INTO page VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
query3 = "INSERT INTO updatelog VALUES (?, ?)"
cursor.execute(query1, (pageid, chart))
cursor.execute(query2, (pageid, status, title, len(content), notes,
m_user, m_time, m_id, s_user, s_time, s_id))
cursor.execute(query3, (pageid, datetime.utcnow()))

def _update_page(self, cursor, pageid, title):
"""Update hook for when page is already in our database.
@@ -411,6 +424,9 @@ class AFCStatistics(Task):
if notes != result["page_notes"]:
self._update_page_notes(cursor, result, pageid, notes)

query = "UPDATE updatelog SET update_time = ? WHERE update_id = ?"
cursor.execute(query, (datetime.utcnow(), pageid))

###################### PAGE ATTRIBUTE UPDATE METHODS ######################

def _update_page_title(self, cursor, result, pageid, title):


+ 12
- 0
tasks/schema/afc_statistics.sql View File

@@ -64,4 +64,15 @@ CREATE TABLE `page` (
PRIMARY KEY (`page_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

--
-- Table structure for table `updatelog`
--

DROP TABLE IF EXISTS `updatelog`;
CREATE TABLE `updatelog` (
`update_id` int(10) unsigned NOT NULL,
`update_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
PRIMARY KEY (`check_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

-- Dump completed on 2014-01-10 11:00:00

Loading…
Cancel
Save