Quellcode durchsuchen

Implement pageviews API requests.

master
Ben Kurtovic vor 8 Jahren
Ursprung
Commit
bfcd4cbaab
4 geänderte Dateien mit 83 neuen und 14 gelöschten Zeilen
  1. +0
    -0
     
  2. +2
    -2
      schema.sql
  3. +1
    -1
      templates/index.mako
  4. +80
    -11
      tif/calc.py

+ 0
- 0
Datei anzeigen


+ 2
- 2
schema.sql Datei anzeigen

@@ -3,7 +3,7 @@ CREATE DATABASE `sXXXXX__tif`;
DROP TABLE IF EXISTS `cache`;
CREATE TABLE `cache` (
`cache_id` INT(10) UNSIGNED NOT NULL,
`cache_views` INT UNSIGNED NOT NULL,
`cache_time` TIMESTAMP NOT NULL,
`cache_views` FLOAT NOT NULL,
`cache_time` TIMESTAMP NOT NULL DEFAULT '0000-00-00 00:00:00',
PRIMARY KEY (`cache_id`)
) ENGINE=InnoDB;

+ 1
- 1
templates/index.mako Datei anzeigen

@@ -24,7 +24,7 @@
<table>
<tr>
<td>TIF</td>
<td>${result["tif"]}</td>
<td>${"{0:.2f}".format(result["tif"])}</td>
</tr>
<tr>
<td>Transclusions</td>


+ 80
- 11
tif/calc.py Datei anzeigen

@@ -1,7 +1,12 @@
# -*- coding: utf-8 -*-

from datetime import datetime
from datetime import datetime, timedelta
from gzip import GzipFile
from json import loads
from os.path import expanduser
from StringIO import StringIO
from urllib import quote
from urllib2 import URLError

from earwigbot.bot import Bot
from oursql import connect
@@ -15,16 +20,80 @@ def _get_db(bot):
args["autoreconnect"] = True
return connect(**args)

def _compute_stats(page, db):
with db.cursor() as cursor:
query = """SELECT COUNT(*) FROM templatelinks WHERE tl_title = ?
AND tl_namespace = 10 AND tl_from_namespace = 0"""
cursor.execute(query, (page.title.replace(" ", "_"),))
transclusions = cursor.fetchall()[0][0]
def _count_transclusions(cursor, title):
query = """SELECT COUNT(*)
FROM templatelinks
WHERE tl_title = ? AND tl_namespace = 10 AND tl_from_namespace = 0"""
cursor.execute(query, (title,))
return cursor.fetchall()[0][0]

def _count_views(cursor, title, dbname):
query = """SELECT SUM(cache_views), MIN(cache_time)
FROM templatelinks
INNER JOIN {0}.cache ON tl_from = cache_id
WHERE tl_title = ? AND tl_namespace = 10 AND tl_from_namespace = 0"""
cursor.execute(query.format(dbname), (title,))
return cursor.fetchall()[0]

def _get_avg_views(site, article):
url = ("https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/"
"{0}.{1}/all-access/user/{2}/daily/{3}/{4}")
days = 30
slug = quote(article.replace(" ", "_"), safe="")
start = datetime.utcnow().strftime("%Y%M%D")
end = (datetime.utcnow() - timedelta(days=days)).strftime("%Y%M%D")
query = url.format(site.lang, site.project, slug, start, end)

try:
response = site._opener.open(query) # We're terrible
except URLError:
return None

result = response.read()
if response.headers.get("Content-Encoding") == "gzip":
stream = StringIO(result)
gzipper = GzipFile(fileobj=stream)
result = gzipper.read()

try:
res = loads(result)
except ValueError:
return None

# TODO
tif = 0.0
cache_time = None
if "items" not in res:
return None
return sum(item["views"] for item in res["items"]) / float(days)

def _update_views(cursor, site, title, dbname):
cache_life = "7 DAY"
query1 = """SELECT tl_from
FROM templatelinks
LEFT JOIN {0}.cache ON tl_from = cache_id
WHERE tl_title = ? AND tl_namespace = 10 AND tl_from_namespace = 0
AND cache_id IS NULL
OR DATE_SUB(NOW(), INTERVAL {1}) > cache_time"""
query2 = """INSERT INTO {0}.cache (cache_id, cache_views, cache_time)
VALUES (?, ?, NOW()) ON DUPLICATE KEY
UPDATE cache_views = ?, cache_time = NOW()""".format(dbname)

cursor.execute(query1.format(dbname, cache_life), (title,))
while True:
titles = cursor.fetchmany(1024)
if not titles:
break

viewcounts = [(t, _get_avg_views(site, t)) for t in titles]
parambatch = [(t, v, v) for (t, v) in viewcounts if v is not None]
cursor.executemany(query2, parambatch)

def _compute_stats(bot, db, page):
dbname = bot.config.wiki["_tifSQL"]["db"]
title = page.title.replace(" ", "_")

with db.cursor() as cursor:
transclusions = _count_transclusions(cursor, title)
_update_views(cursor, page.site, title, dbname)
tif, cache_time = _count_views(cursor, title, dbname)

return tif, transclusions, cache_time

@@ -48,7 +117,7 @@ def calculate_tif(title):
result["error"] = "no page"
return result

tif, transclusions, cache_time = _compute_stats(page, db)
tif, transclusions, cache_time = _compute_stats(bot, db, page)

result["tif"] = tif
result["transclusions"] = transclusions


Laden…
Abbrechen
Speichern