Ver código fonte

Updates

tags/v0.1^2
Ben Kurtovic 12 anos atrás
pai
commit
34d7bf3104
4 arquivos alterados com 144 adições e 154 exclusões
  1. +2
    -0
      bot/classes/base_task.py
  2. +0
    -4
      bot/rules.py
  3. +139
    -147
      bot/tasks/afc_statistics.py
  4. +3
    -3
      bot/wiki/category.py

+ 2
- 0
bot/classes/base_task.py Ver arquivo

@@ -89,4 +89,6 @@ class BaseTask(object):
return False
if content == cfg.get("disabled", "run"):
return False

self.logger.warn("Emergency task shutoff has been enabled!")
return True

+ 0
- 4
bot/rules.py Ver arquivo

@@ -36,7 +36,6 @@ def process(rc):
chans.update(("##earwigbot", "#wikipedia-en-afc"))
if r_page.search(page_name):
tasks.start("afc_statistics", action="edit", page=rc.page)
tasks.start("afc_copyvios", action="edit", page=rc.page)
chans.add("#wikipedia-en-afc")
@@ -49,19 +48,16 @@ def process(rc):
elif rc.flags == "move" and (r_move1.match(comment) or
r_move2.match(comment)):
p = r_moved_pages.findall(rc.comment)[0]
tasks.start("afc_statistics", action="move", page=p)
tasks.start("afc_copyvios", action="move", page=p)
chans.add("#wikipedia-en-afc")
elif rc.flags == "delete" and r_delete.match(comment):
p = r_deleted_page.findall(rc.comment)[0]
tasks.start("afc_statistics", action="delete", page=p)
tasks.start("afc_copyvios", action="delete", page=p)
chans.add("#wikipedia-en-afc")
elif rc.flags == "restore" and r_restore.match(comment):
p = r_restored_page.findall(rc.comment)[0]
tasks.start("afc_statistics", action="restore", page=p)
tasks.start("afc_copyvios", action="restore", page=p)
chans.add("#wikipedia-en-afc")


+ 139
- 147
bot/tasks/afc_statistics.py Ver arquivo

@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-

from datetime import datetime
import logging
import re
from os.path import expanduser
from threading import Lock
@@ -27,7 +28,7 @@ class Task(BaseTask):

# Set some wiki-related attributes:
self.pagename = cfg.get("page", "Template:AFC statistics")
self.pending_cat = cfg.get("pending", "Pending_AfC_submissions")
self.pending_cat = cfg.get("pending", "Pending AfC submissions")
self.ignore_list = cfg.get("ignore_list", [])
default_summary = "Updating statistics for [[WP:WPAFC|WikiProject Articles for creation]]."
self.summary = self.make_summary(cfg.get("summary", default_summary))
@@ -49,26 +50,16 @@ class Task(BaseTask):
self.conn = oursql.connect(**self.conn_data)

action = kwargs.get("action")
if not action:
return

methods = {
"save": self.save,
"sync": self.sync,
"edit": self.process_edit,
"move": self.process_move,
"delete": self.process_delete,
"restore": self.process_edit,
}

method = methods.get(action)
if method:
try:
method(**kwargs)
finally:
self.conn.close()
try:
if action == "save":
self.save()
elif action == "sync":
self.sync()
finally:
self.conn.close()

def save(self, **kwargs):
self.logger.info("Saving chart")
if kwargs.get("fromIRC"):
summary = " ".join((self.summary, "(!earwigbot)"))
else:
@@ -84,11 +75,13 @@ class Task(BaseTask):
statistics.join(("\\1\n", "\n\\3")), text,
flags=re.DOTALL)
if newtext == text:
self.logger.info("Chart unchanged; not saving")
return # Don't edit the page if we're not adding anything

newtext = re.sub("(<!-- sig begin -->)(.*?)(<!-- sig end -->)",
"\\1~~~ at ~~~~~\\3", newtext)
page.edit(newtext, summary, minor=True, bot=True)
self.logger.info("Chart saved to [[{0}]]".format(page.title()))

def compile_charts(self):
stats = ""
@@ -136,118 +129,81 @@ class Task(BaseTask):
return timestamp.strftime("%H:%M, %d %B %Y")

def sync(self, **kwargs):
self.logger.info("Starting sync")
self.report_replag()
with self.conn.cursor() as cursor, self.db_access_lock:
self.sync_deleted(cursor) # Remove deleted subs
self.sync_oldids(cursor) # Make sure all subs are up to date
self.sync_pending(cursor) # Add missing pending subs
self.sync_old(cursor) # Remove old declined and accepted subs

def sync_deleted(self, cursor):
query1 = "SELECT page_id FROM page"
query2 = "SELECT page_id FROM page WHERE page_id = ?"
cursor.execute(query1)
for page in cursor:
result = self.site.sql_query(query2, (page[0],))
if not list(result):
self.untrack_page(cursor, pageid=page[0])
self.update_tracked(cursor)
self.add_untracked(cursor)
self.delete_old(cursor)
self.logger.info("Sync completed")

def report_replag(self):
replag = self.site.get_replag()
if replag < 60:
lvl = logging.DEBUG
elif replag < 720:
lvl = logging.INFO
else:
lvl = logging.WARNING
self.logger.log(lvl, "Server replag is {0}".format(replag))

def sync_oldids(self, cursor):
def update_tracked(self, cursor):
self.logger.debug("Updating tracked submissions")
query1 = "SELECT page_id, page_title, page_modify_oldid FROM page"
query2 = "SELECT page_latest, page_title FROM page WHERE page_id = ?"
cursor.execute(query1)
for page_id, title, oldid in cursor:
result = list(self.site.sql_query(query2, (page_id,)))
for pageid, title, oldid in cursor:
msg = "Updating tracked page: [[{0}]] (id: {1}) @ {2}"
self.logger.debug(msg.format(pageid, title, oldid))
result = list(self.site.sql_query(query2, (pageid,)))
try:
real_oldid = result[0][0]
real_title = result[0][1]
except IndexError: # Page doesn't exist!
self.untrack_page(cursor, pageid=page_id)
self.untrack_page(cursor, pageid)
continue
if real_oldid != oldid:
self.update_page(cursor, real_title)

def sync_pending(self, cursor):
query1 = """SELECT page_id FROM page JOIN row ON page_id = row_id
WHERE row_chart IN (1, 2, 3)"""
query2 = """SELECT cl_from, page_title, page_namespace
FROM categorylinks JOIN page ON cl_from = page_id
WHERE cl_to = ?"""
cursor.execute(query1)
self.update_page(cursor, pageid, real_title)

def add_untracked(self, cursor):
self.logger.debug("Adding untracked pending submissions")
cursor.execute("SELECT page_id FROM page")
tracked = [i[0] for i in cursor.fetchall()]
result = self.site.sql_query(query2, (self.pending_cat,))

for pageid, title, ns in result:
title = ":".join((self.site.namespace_id_to_name(ns), title))
if title.replace("_", " ") in self.ignore_list:
category = self.site.get_category(self.pending_cat)
pending = category.members(limit=500)

for title, pageid in pending:
if title in self.ignore_list:
continue
if pageid not in tracked:
self.track_page(cursor, title)
self.track_page(cursor, pageid, title)

def sync_old(self, cursor):
def delete_old(self, cursor):
self.logger.debug("Removing old submissions from chart")
query = """DELETE FROM page, row USING page JOIN row
ON page_id = row_id WHERE row_chart IN (4, 5)
AND ADDTIME(page_special_time, '36:00:00') < NOW()"""
cursor.execute(query)

def process_edit(self, page, **kwargs):
if page in self.ignore_list:
return
with self.conn.cursor() as cursor, self.db_access_lock:
self.sync_page(cursor, page)
def untrack_page(self, cursor, pageid):
self.logger.debug("Untracking page (id: {0})".format(pageid))
query = """DELETE FROM page, row USING page JOIN row
ON page_id = row_id WHERE page_id = ?"""
cursor.execute(query, (pageid,))

def process_move(self, page, **kwargs):
query1 = "SELECT * FROM page WHERE page_title = ?"
query2 = "SELECT page_latest FROM page WHERE page_namespace = ? AND page_title = ?"
query3 = "UPDATE page SET page_title = ?, page_modify_oldid = ? WHERE page_title = ?"
source, dest = page
with self.conn.cursor() as cursor, self.db_access_lock:
cursor.execute(query1, (source,))
result = cursor.fetchall()
if result:
res = self.site.sql_query(query2, self.split_title(dest))
try:
new_oldid = list(res)[0][0]
except IndexError:
new_oldid = result[0][11]
cursor.execute(query3, (dest, new_oldid, source))
else:
self.track_page(cursor, dest)

def process_delete(self, page, **kwargs):
query = "SELECT page_id FROM page WHERE page_namespace = ? AND page_title = ?"
with self.conn.cursor() as cursor, self.db_access_lock:
result = self.site.sql_query(query, self.split_title(page))
if list(result):
self.sync_page(cursor, page)
else:
self.untrack_page(cursor, title=page)

def sync_page(self, cursor, page):
query = "SELECT * FROM page WHERE page_title = ?"
cursor.execute(query, (page,))
result = cursor.fetchall()
if result:
self.update_page(cursor, page)
else:
self.track_page(cursor, page)

def untrack_page(self, cursor, pageid=None, title=None):
query = "DELETE FROM page, row USING page JOIN row ON page_id = row_id WHERE "
if pageid:
query += "page_id = ?"
cursor.execute(query, (pageid,))
elif title:
query += "page_title = ?"
cursor.execute(query, (title,))

def track_page(self, cursor, title):
def track_page(self, cursor, pageid, title):
"""Update hook for when page is not in our database."""
msg = "Tracking page [[{0}]] (id: {1})".format(title, pageid)
self.logger.debug(msg)

page = self.site.get_page(title)
status, chart = self.get_status_and_chart(page)
if not status or status in ("accept", "decline"):
if not status:
msg = "Could not find a status for [[{0}]]".format(title)
self.logger.warn(msg)
return

pageid = page.pageid()
title = page.title()
short = self.get_short_title(title)
size = len(page.get())
@@ -256,21 +212,29 @@ class Task(BaseTask):
m_user, m_time, m_id = self.get_modify(pageid)
s_user, s_time, s_id = self.get_special(page, status)

query1 = "INSERT INTO row VALUES (?, ?)"
query2 = "INSERT INTO page VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
cursor.execute(query1, (pageid, chart))
cursor.execute(query2, (pageid, status, title, short, size, notes,
query1 = "INSERT INTO row VALUES ?"
query2 = "INSERT INTO page VALUES ?"
cursor.execute(query1, ((pageid, chart),))
cursor.execute(query2, ((pageid, status, title, short, size, notes,
c_user, c_time, c_id, m_user, m_time, m_id,
s_user, s_time, s_id))
s_user, s_time, s_id),))

def update_page(self, cursor, title):
def update_page(self, cursor, pageid, title):
"""Update hook for when page is in our database."""
msg = "Updating page [[{0}]] (id: {1})".format(title, pageid)
self.logger.debug(msg)

page = self.site.get_page(title)
status, chart = self.get_status_and_chart(page)
if not status:
self.untrack_page(cursor, title=title)
self.untrack_page(cursor, pageid)

if pageid != page.pageid():
msg = "Page [[{0}]] is not what it should be! (id: {0} != {1})"
self.logger.warn(msg.format(pageid, page.pageid()))
self.report_replag()
self.untrack_page(cursor, pageid)

pageid = page.pageid()
title = page.title()
size = len(page.get())
notes = self.get_notes(page)
@@ -282,43 +246,72 @@ class Task(BaseTask):
result = dict_cursor.fetchall()[0]

if title != result["page_title"]:
query = "UPDATE page SET page_title = ?, page_short = ? WHERE page_id = ?"
short = self.get_short_title(title)
cursor.execute(query, (title, short, pageid))
self.update_page_title(cursor, result, pageid, title)

if m_id != result["page_modify_oldid"]:
query = """UPDATE page SET page_size = ?, page_modify_user = ?,
page_modify_time = ?, page_modify_oldid = ?
WHERE page_id = ?"""
cursor.execute(query, (size, m_user, m_time, m_id, pageid))
self.update_page_modify(cursor, result, pageid, size, m_user, m_time, m_id)

if status != result["page_status"]:
query1 = """UPDATE page JOIN row ON page_id = row_id
SET page_status = ?, row_chart = ? WHERE page_id = ?"""
query2 = """UPDATE page SET page_special_user = ?,
page_special_time = ?, page_special_oldid = ?
WHERE page_id = ?"""
cursor.execute(query1, (status, chart, pageid))
s_user, s_time, s_id = self.get_special(page, status)
if s_id != result["page_special_oldid"]:
cursor.execute(query2, (s_user, s_time, s_id, pageid))
self.update_page_special(cursor, result, pageid, status, chart, page)

if notes != result["page_notes"]:
query = "UPDATE page SET page_notes = ? WHERE page_id = ?"
cursor.execute(query, (notes, pageid))
self.update_page_notes(cursor, result, pageid, notes)

def split_title(self, title):
namespace, body = title.split(":", 1)[0]
if not body:
return 0, title
try:
ns = self.site.namespace_name_to_id(namespace)
except wiki.NamespaceNotFoundError:
return 0, title
return ns, body
def update_page_title(self, cursor, result, pageid, title):
query = "UPDATE page SET page_title = ?, page_short = ? WHERE page_id = ?"
short = self.get_short_title(title)
cursor.execute(query, (title, short, pageid))
msg = "{0}: title: {1} -> {2}"
self.logger.debug(msg.format(pageid, result["page_title"], title))

def update_page_modify(self, cursor, result, pageid, size, m_user, m_time, m_id):
query = """UPDATE page SET page_size = ?, page_modify_user = ?,
page_modify_time = ?, page_modify_oldid = ?
WHERE page_id = ?"""
cursor.execute(query, (size, m_user, m_time, m_id, pageid))

msg = "{0}: modify: {1} / {2} / {3} -> {4} / {5} / {6}"
msg = msg.format(pageid, result["page_modify_user"],
result["page_modify_time"],
result["page_modify_oldid"], m_user, m_time, m_id)
self.logger.debug(msg)

def update_page_special(self, cursor, result, pageid, status, chart, page):
query1 = """UPDATE page JOIN row ON page_id = row_id
SET page_status = ?, row_chart = ? WHERE page_id = ?"""
query2 = """UPDATE page SET page_special_user = ?,
page_special_time = ?, page_special_oldid = ?
WHERE page_id = ?"""
cursor.execute(query1, (status, chart, pageid))

msg = "{0}: status: {1} ({2}) -> {3} ({4})"
self.logger.debug(msg.format(pageid, result["page_status"],
result["row_chart"], status, chart))

s_user, s_time, s_id = self.get_special(page, status)

if s_id != result["page_special_oldid"]:
cursor.execute(query2, (s_user, s_time, s_id, pageid))
msg = "{0}: special: {1} / {2} / {3} -> {4} / {5} / {6}"
msg = msg.format(pageid, result["page_special_user"],
result["page_special_time"],
result["page_special_oldid"], m_user, m_time, m_id)
self.logger.debug(msg)

def update_page_notes(self, cursor, result, pageid, notes):
query = "UPDATE page SET page_notes = ? WHERE page_id = ?"
cursor.execute(query, (notes, pageid))
msg = "{0}: notes: {1} -> {2}"
self.logger.debug(msg.format(pageid, result["page_notes"], notes))

def get_status_and_chart(self, page):
content = page.get()
try:
content = page.get()
except wiki.PageNotFoundError:
msg = "Page [[{0}]] does not exist, but the server said it should!"
self.logger.warn(msg.format(page.title()))
return None, 0

if page.is_redirect():
target = page.get_redirect_target()
if self.site.get_page(target).namespace() == 0:
@@ -343,13 +336,12 @@ class Task(BaseTask):
return short

def get_create(self, pageid):
query1 = "SELECT MIN(rev_id) FROM revision WHERE rev_page = ?"
query2 = "SELECT rev_user_text, rev_timestamp, rev_id FROM revision WHERE rev_id = ?"
result1 = self.site.sql_query(query1, (pageid,))
rev_id = list(result1)[0][0]
result2 = self.site.sql_query(query2, (rev_id,))
m_user, m_time, m_id = list(result2)[0]
return m_user, datetime.strptime(m_time, "%Y%m%d%H%M%S"), m_id
query = """SELECT rev_user_text, rev_timestamp, rev_id
FROM revision WHERE rev_id =
(SELECT MIN(rev_id) FROM revision WHERE rev_page = ?)"""
result = self.site.sql_query(query, (pageid,))
c_user, c_time, c_id = list(result)[0]
return c_user, datetime.strptime(c_time, "%Y%m%d%H%M%S"), c_id

def get_modify(self, pageid):
query = """SELECT rev_user_text, rev_timestamp, rev_id FROM revision


+ 3
- 3
bot/wiki/category.py Ver arquivo

@@ -13,7 +13,7 @@ class Category(Page):
because it accepts category names without the namespace prefix.

Public methods:
members -- returns a list of titles in the category
members -- returns a list of pages in the category as (title, id) tuples
"""

def __repr__(self):
@@ -26,7 +26,7 @@ class Category(Page):
return '<Category "{0}" of {1}>'.format(self.title(), str(self._site))

def members(self, limit=50):
"""Returns a list of titles in the category.
"""Returns a list of pages in the category as (title, pageid) tuples.

If `limit` is provided, we will provide this many titles, or less if
the category is too small. `limit` defaults to 50; normal users can go
@@ -36,4 +36,4 @@ class Category(Page):
"cmlimit": limit, "cmtitle": self._title}
result = self._site._api_query(params)
members = result['query']['categorymembers']
return [member["title"] for member in members]
return [(member["title"], member["pageid"]) for member in members]

Carregando…
Cancelar
Salvar