Переглянути джерело

Update DRN clerkbot for a subpage-based system.

pull/8/merge
Ben Kurtovic 12 роки тому
джерело
коміт
76f9fcc5c4
1 змінених файлів з 76 додано та 63 видалено
  1. +76
    -63
      tasks/drn_clerkbot.py

+ 76
- 63
tasks/drn_clerkbot.py Переглянути файл

@@ -26,6 +26,7 @@ import re
from threading import RLock from threading import RLock
from time import mktime, sleep, time from time import mktime, sleep, time


import mwparserfromhell
import oursql import oursql


from earwigbot import exceptions from earwigbot import exceptions
@@ -69,7 +70,7 @@ class DRNClerkBot(Task):
"Wikipedia:Dispute resolution noticeboard/Volunteering") "Wikipedia:Dispute resolution noticeboard/Volunteering")
self.very_old_title = cfg.get("veryOldTitle", "User talk:Szhang (WMF)") self.very_old_title = cfg.get("veryOldTitle", "User talk:Szhang (WMF)")


clerk_summary = "Updating $3 case$4."
clerk_summary = "Updating case."
notify_summary = "Notifying user regarding [[WP:DRN|dispute resolution noticeboard]] case." notify_summary = "Notifying user regarding [[WP:DRN|dispute resolution noticeboard]] case."
chart_summary = "Updating statistics for the [[WP:DRN|dispute resolution noticeboard]]." chart_summary = "Updating statistics for the [[WP:DRN|dispute resolution noticeboard]]."
self.clerk_summary = self.make_summary(cfg.get("clerkSummary", clerk_summary)) self.clerk_summary = self.make_summary(cfg.get("clerkSummary", clerk_summary))
@@ -112,17 +113,19 @@ class DRNClerkBot(Task):
if action in ["all", "update_volunteers"]: if action in ["all", "update_volunteers"]:
self.update_volunteers(conn, site) self.update_volunteers(conn, site)
if action in ["all", "clerk"]: if action in ["all", "clerk"]:
log = u"Starting update to [[{0}]]".format(self.title)
volunteers = self.get_volunteers(conn)
log = u"Reading cases from [[{0}]]".format(self.title)
self.logger.info(log) self.logger.info(log)
cases = self.read_database(conn)
page = site.get_page(self.title) page = site.get_page(self.title)
text = page.get()
self.read_page(conn, cases, text)
notices = self.clerk(conn, cases)
cases = self.read_database(conn)
self.read_cases(conn, site, cases, page)
if self.shutoff_enabled(): if self.shutoff_enabled():
return return
if not self.save(page, cases, kwargs, start):
return
notices = []
for case in cases:
notices += self.clerk(conn, volunteers, case)
if not self.save(case, kwargs, start):
return
self.send_notices(site, notices) self.send_notices(site, notices)
if action in ["all", "update_chart"]: if action in ["all", "update_chart"]:
if self.shutoff_enabled(): if self.shutoff_enabled():
@@ -171,6 +174,14 @@ class DRNClerkBot(Task):
if additions: if additions:
cursor.executemany(query3, additions) cursor.executemany(query3, additions)


def get_volunteers(self, conn):
"""Return a list of all DRN volunteers."""
query = "SELECT volunteer_username FROM volunteers"
with conn.cursor() as cursor:
cursor.execute(query)
volunteers = [name for (name,) in cursor.fetchall()]
return volunteers

def read_database(self, conn): def read_database(self, conn):
"""Return a list of _Cases from the database.""" """Return a list of _Cases from the database."""
cases = [] cases = []
@@ -184,22 +195,25 @@ class DRNClerkBot(Task):
self.logger.debug(log.format(len(cases))) self.logger.debug(log.format(len(cases)))
return cases return cases


def read_page(self, conn, cases, text):
def read_cases(self, conn, site, cases, page):
"""Read the noticeboard content and update the list of _Cases.""" """Read the noticeboard content and update the list of _Cases."""
text = page.get()
code = mwparserfromhell.parse(text)
nextid = self.select_next_id(conn) nextid = self.select_next_id(conn)
tl_status_esc = re.escape(self.tl_status) tl_status_esc = re.escape(self.tl_status)
split = re.split("(^==\s*[^=]+?\s*==$)", text, flags=re.M|re.U)
for i in xrange(len(split)):
if i + 1 == len(split):
break
if not split[i].startswith("=="):
for template in code.filter_templates(recursive=True):
title = template.name.lower().strip()
if not title.startswith(self.title + "/"):
continue
if title == self.title + "/Header":
continue continue
title = split[i][2:-2].strip()
body = old = split[i + 1]
if not re.search("\s*\{\{" + tl_status_esc, body, re.U):
subpage = site.get_page(title)
body = old = subpage.get()
casename = subpage.title.split("/", 1)[1]
if not re.search(r"\s*\{\{" + tl_status_esc, body, re.U):
continue continue
status = self.read_status(body) status = self.read_status(body)
re_id = "<!-- Bot Case ID \(please don't modify\): (.*?) -->"
re_id = r"<!-- Bot Case ID \(please don't modify\): (.*?) -->"
try: try:
id_ = int(re.search(re_id, body).group(1)) id_ = int(re.search(re_id, body).group(1))
case = [case for case in cases if case.id == id_][0] case = [case for case in cases if case.id == id_][0]
@@ -220,20 +234,20 @@ class DRNClerkBot(Task):
f_time = datetime.strptime(match.group(2), strp) f_time = datetime.strptime(match.group(2), strp)
else: else:
f_user, f_time = None, datetime.utcnow() f_user, f_time = None, datetime.utcnow()
case = _Case(id_, title, status, self.STATUS_UNKNOWN, f_user,
f_time, f_user, f_time, "", self.min_ts,
case = _Case(id_, casename, status, self.STATUS_UNKNOWN,
f_user, f_time, f_user, f_time, "", self.min_ts,
self.min_ts, False, False, False, len(body), self.min_ts, False, False, False, len(body),
new=True)
subpage, new=True)
cases.append(case) cases.append(case)
log = u"Added new case {0} ('{1}', status={2}, by {3})" log = u"Added new case {0} ('{1}', status={2}, by {3})"
self.logger.debug(log.format(id_, title, status, f_user))
self.logger.debug(log.format(id_, casename, status, f_user))
else: else:
case.status = status case.status = status
log = u"Read active case {0} ('{1}')".format(id_, title)
log = u"Read active case {0} ('{1}')".format(id_, casename)
self.logger.debug(log) self.logger.debug(log)
if case.title != title:
self.update_case_title(conn, id_, title)
case.title = title
if case.title != casename:
self.update_case_title(conn, id_, casename)
case.title = casename
case.body, case.old = body, old case.body, case.old = body, old


for case in cases[:]: for case in cases[:]:
@@ -276,21 +290,19 @@ class DRNClerkBot(Task):
log = u"Updated title of case {0} to '{1}'".format(id_, title) log = u"Updated title of case {0} to '{1}'".format(id_, title)
self.logger.debug(log) self.logger.debug(log)


def clerk(self, conn, cases):
"""Actually go through cases and modify those to be updated."""
query = "SELECT volunteer_username FROM volunteers"
with conn.cursor() as cursor:
cursor.execute(query)
volunteers = [name for (name,) in cursor.fetchall()]
def clerk(self, conn, volunteers, case):
"""Actually go through a case and modify it if it is to be updated.

Return a list of any notices to send.
"""
notices = [] notices = []
for case in cases:
log = u"Clerking case {0} ('{1}')".format(case.id, case.title)
self.logger.debug(log)
if case.status == self.STATUS_UNKNOWN:
self.save_existing_case(conn, case)
else:
notices += self.clerk_case(conn, case, volunteers)
self.logger.debug("Done clerking cases")
log = u"Clerking case {0} ('{1}')".format(case.id, case.title)
self.logger.debug(log)
if case.status == self.STATUS_UNKNOWN:
self.save_existing_case(conn, case)
else:
notices += self.clerk_case(conn, case, volunteers)
self.logger.debug("Done clerking case")
return notices return notices


def clerk_case(self, conn, case, volunteers): def clerk_case(self, conn, case, volunteers):
@@ -371,7 +383,8 @@ class DRNClerkBot(Task):
title = case.title.replace("|", "&#124;") title = case.title.replace("|", "&#124;")
template = "{{subst:" + tmpl + "|" + title + "}}" template = "{{subst:" + tmpl + "|" + title + "}}"
miss = "<!-- Template:DRN stale notice | {0} -->".format(title) miss = "<!-- Template:DRN stale notice | {0} -->".format(title)
notice = _Notice(self.very_old_title, template, miss)
too_late = lambda text: miss in text
notice = _Notice(self.very_old_title, template, too_late)
case.very_old_notified = True case.very_old_notified = True
msg = u" {0}: will notify [[{1}]] with '{2}'" msg = u" {0}: will notify [[{1}]] with '{2}'"
log = msg.format(case.id, self.very_old_title, template) log = msg.format(case.id, self.very_old_title, template)
@@ -480,12 +493,19 @@ class DRNClerkBot(Task):
if case.parties_notified: if case.parties_notified:
return [] return []


def too_late(text):
code = mwparserfromhell.parse(text)
for link in code.filter_links(recursive=True):
title = link.title.strip_code().lower().strip()
title = re.sub(r"^wp:", "wikipedia:", title.replace("_", " "))
if title == case.page.title.lower():
return True
return False

notices = [] notices = []
template = "{{subst:" + self.tl_notify_party template = "{{subst:" + self.tl_notify_party
template += "|thread=" + case.title + "}} ~~~~" template += "|thread=" + case.title + "}} ~~~~"
too_late = "<!--Template:DRN-notice-->"

re_parties = "<span.*?>'''Users involved'''</span>(.*?)<span.*?>"
re_parties = r"<span.*?>'''Users involved'''</span>(.*?)<span.*?>"
text = re.search(re_parties, case.body, re.S|re.U) text = re.search(re_parties, case.body, re.S|re.U)
for line in text.group(1).splitlines(): for line in text.group(1).splitlines():
user = re.search("[:*#]{,5} \{\{User\|(.*?)\}\}", line) user = re.search("[:*#]{,5} \{\{User\|(.*?)\}\}", line)
@@ -601,18 +621,12 @@ class DRNClerkBot(Task):
log = u" {0}: no changes to commit".format(case.id) log = u" {0}: no changes to commit".format(case.id)
self.logger.debug(log) self.logger.debug(log)


def save(self, page, cases, kwargs, start):
"""Save any changes to the noticeboard."""
newtext = text = page.get()
counter = 0
for case in cases:
if case.old != case.body:
newtext = newtext.replace(case.old, case.body)
counter += 1
if newtext == text:
self.logger.info(u"Nothing to edit on [[{0}]]".format(page.title))
def save(self, case, kwargs, start):
"""Save any changes to a specific case subpage."""
page, text, newtext = case.page, case.old, case.body
if text == newtext:
self.logger.debug(u"Nothing to edit on [[{0}]]".format(page.title))
return True return True

worktime = time() - start worktime = time() - start
if worktime < 60: if worktime < 60:
log = "Waiting {0} seconds to avoid edit conflicts" log = "Waiting {0} seconds to avoid edit conflicts"
@@ -624,11 +638,9 @@ class DRNClerkBot(Task):
self.logger.warn(log) self.logger.warn(log)
self.run(**kwargs) self.run(**kwargs)
return False return False
summary = self.clerk_summary.replace("$3", str(counter))
summary = summary.replace("$4", "" if counter == 1 else "s")
page.edit(newtext, summary, minor=True, bot=True)
log = u"Saved page [[{0}]] ({1} updates)"
self.logger.info(log.format(page.title, counter))
page.edit(newtext, self.clerk_summary, minor=True, bot=True)
log = u"Saved page [[{0}]]"
self.logger.info(log.format(page.title))
return True return True


def send_notices(self, site, notices): def send_notices(self, site, notices):
@@ -651,7 +663,7 @@ class DRNClerkBot(Task):
text = page.get() text = page.get()
except exceptions.PageNotFoundError: except exceptions.PageNotFoundError:
text = "" text = ""
if notice.too_late and notice.too_late in text:
if notice.too_late(text):
log = u"Skipping [[{0}]]; was already notified with '{1}'" log = u"Skipping [[{0}]]; was already notified with '{1}'"
self.logger.info(log.format(page.title, template)) self.logger.info(log.format(page.title, template))
continue continue
@@ -708,7 +720,7 @@ class DRNClerkBot(Task):
case["volunteer_sortkey"] = int(mktime(case["case_volunteer_time"].timetuple())) case["volunteer_sortkey"] = int(mktime(case["case_volunteer_time"].timetuple()))
data += "|mu={case_modify_user}|ms={modify_sortkey}|mt={modify_time}" data += "|mu={case_modify_user}|ms={modify_sortkey}|mt={modify_time}"


title = case["case_title"].replace("_", " ").replace("|", "&#124;")
title = case["case_title"].replace("_", " ")
case["title"] = title[:47] + "..." if len(title) > 50 else title case["title"] = title[:47] + "..." if len(title) > 50 else title
case["file_time"] = self.format_time(case["case_file_time"]) case["file_time"] = self.format_time(case["case_file_time"])
case["file_sortkey"] = int(mktime(case["case_file_time"].timetuple())) case["file_sortkey"] = int(mktime(case["case_file_time"].timetuple()))
@@ -749,7 +761,7 @@ class _Case(object):
def __init__(self, id_, title, status, last_action, file_user, file_time, def __init__(self, id_, title, status, last_action, file_user, file_time,
modify_user, modify_time, volunteer_user, volunteer_time, modify_user, modify_time, volunteer_user, volunteer_time,
close_time, parties_notified, archived, very_old_notified, close_time, parties_notified, archived, very_old_notified,
last_volunteer_size, new=False):
last_volunteer_size, page=None, new=False):
self.id = id_ self.id = id_
self.title = title self.title = title
self.status = status self.status = status
@@ -765,6 +777,7 @@ class _Case(object):
self.very_old_notified = very_old_notified self.very_old_notified = very_old_notified
self.archived = archived self.archived = archived
self.last_volunteer_size = last_volunteer_size self.last_volunteer_size = last_volunteer_size
self.page = page
self.new = new self.new = new


self.original_status = status self.original_status = status


Завантаження…
Відмінити
Зберегти