Parcourir la source

Force a periodic update of the statistics database (closes #7).

pull/15/head
Ben Kurtovic il y a 11 ans
Parent
révision
c6550df163
2 fichiers modifiés avec 50 ajouts et 22 suppressions
  1. +38
    -22
      tasks/afc_statistics.py
  2. +12
    -0
      tasks/schema/afc_statistics.sql

+ 38
- 22
tasks/afc_statistics.py Voir le fichier

@@ -300,42 +300,53 @@ class AFCStatistics(Task):
more than 36 hours ago. Pending submissions cannot be "old". more than 36 hours ago. Pending submissions cannot be "old".
""" """
self.logger.debug("Removing old submissions from chart") self.logger.debug("Removing old submissions from chart")
query = """DELETE FROM page, row USING page JOIN row
ON page_id = row_id WHERE row_chart IN (?, ?)
query = """DELETE FROM page, row, updatelog USING page JOIN row
ON page_id = row_id JOIN updatelog ON page_id = update_id
WHERE row_chart IN (?, ?)
AND ADDTIME(page_special_time, '36:00:00') < NOW()""" AND ADDTIME(page_special_time, '36:00:00') < NOW()"""
cursor.execute(query, (self.CHART_ACCEPT, self.CHART_DECLINE)) cursor.execute(query, (self.CHART_ACCEPT, self.CHART_DECLINE))


def update(self, kwargs): def update(self, kwargs):
"""Update a page by name, regardless of whether anything has changed.

Mainly intended as a command to be used via IRC, e.g.:
!tasks start afc_statistics action=update page=Foobar
"""Update old submissions, regardless of whether they've been edited.

This is intended to be run hourly, updating notes that change without
being triggering by a typical update (like a blocked submitter). It
also resolves conflicts when pages are tracked during high replag,
potentially causing data to be inaccurate (like a missed decline). By
default it updates the oldest ten pages in the database; this can be
changed by passing "limit" in kwargs with an integer.
""" """
title = kwargs.get("page")
if not title:
self.logger.info("Starting update")

replag = self.site.get_replag()
self.logger.debug("Server replag is {0}".format(replag))
if replag > 600 and not kwargs.get("ignore_replag"):
msg = "Update canceled as replag ({0} secs) is greater than ten minutes"
self.logger.warn(msg.format(replag))
return return


title = title.replace("_", " ").decode("utf8")
query = "SELECT page_id, page_modify_oldid FROM page WHERE page_title = ?"
query = """SELECT page_id, page_title, page_modify_oldid
FROM page JOIN updatelog ORDER BY update_time ASC LIMIT ?"""
with self.conn.cursor() as cursor: with self.conn.cursor() as cursor:
cursor.execute(query, (title,))
try:
pageid, oldid = cursor.fetchall()[0]
except IndexError:
msg = u"Page [[{0}]] not found in database".format(title)
self.logger.error(msg)
msg = u"Updating page [[{0}]] (id: {1}) @ {2}"
self.logger.info(msg.format(title, pageid, oldid))
self._update_page(cursor, pageid, title)
cursor.execute(query, (kwargs.get("limit", 10),))
for pageid, title, oldid in cursor:
msg = u"Updating page [[{0}]] (id: {1}) @ {2}"
self.logger.debug(msg.format(title, pageid, oldid))
try:
self._update_page(cursor, pageid, title)
except Exception:
e = u"Error updating page [[{0}]] (id: {1})"
self.logger.exception(e.format(title, pageid))
self.logger.info("Update completed")


######################## PRIMARY PAGE ENTRY POINTS ######################## ######################## PRIMARY PAGE ENTRY POINTS ########################


def _untrack_page(self, cursor, pageid): def _untrack_page(self, cursor, pageid):
"""Remove a page, given by ID, from our database.""" """Remove a page, given by ID, from our database."""
self.logger.debug("Untracking page (id: {0})".format(pageid)) self.logger.debug("Untracking page (id: {0})".format(pageid))
query = """DELETE FROM page, row USING page JOIN row
ON page_id = row_id WHERE page_id = ?"""
query = """DELETE FROM page, row, updatelog USING page JOIN row
ON page_id = row_id JOIN updatelog ON page_id = update_id
WHERE page_id = ?"""
cursor.execute(query, (pageid,)) cursor.execute(query, (pageid,))


def _track_page(self, cursor, pageid, title): def _track_page(self, cursor, pageid, title):
@@ -363,9 +374,11 @@ class AFCStatistics(Task):


query1 = "INSERT INTO row VALUES (?, ?)" query1 = "INSERT INTO row VALUES (?, ?)"
query2 = "INSERT INTO page VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" query2 = "INSERT INTO page VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
query3 = "INSERT INTO updatelog VALUES (?, ?)"
cursor.execute(query1, (pageid, chart)) cursor.execute(query1, (pageid, chart))
cursor.execute(query2, (pageid, status, title, len(content), notes, cursor.execute(query2, (pageid, status, title, len(content), notes,
m_user, m_time, m_id, s_user, s_time, s_id)) m_user, m_time, m_id, s_user, s_time, s_id))
cursor.execute(query3, (pageid, datetime.utcnow()))


def _update_page(self, cursor, pageid, title): def _update_page(self, cursor, pageid, title):
"""Update hook for when page is already in our database. """Update hook for when page is already in our database.
@@ -411,6 +424,9 @@ class AFCStatistics(Task):
if notes != result["page_notes"]: if notes != result["page_notes"]:
self._update_page_notes(cursor, result, pageid, notes) self._update_page_notes(cursor, result, pageid, notes)


query = "UPDATE updatelog SET update_time = ? WHERE update_id = ?"
cursor.execute(query, (datetime.utcnow(), pageid))

###################### PAGE ATTRIBUTE UPDATE METHODS ###################### ###################### PAGE ATTRIBUTE UPDATE METHODS ######################


def _update_page_title(self, cursor, result, pageid, title): def _update_page_title(self, cursor, result, pageid, title):


+ 12
- 0
tasks/schema/afc_statistics.sql Voir le fichier

@@ -64,4 +64,15 @@ CREATE TABLE `page` (
PRIMARY KEY (`page_id`) PRIMARY KEY (`page_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;


--
-- Table structure for table `updatelog`
--

DROP TABLE IF EXISTS `updatelog`;
CREATE TABLE `updatelog` (
`update_id` int(10) unsigned NOT NULL,
`update_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
PRIMARY KEY (`check_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

-- Dump completed on 2014-01-10 11:00:00

Chargement…
Annuler
Enregistrer