浏览代码

Adding plugins from earwigbot.

tags/v1
Ben Kurtovic 12 年前
父节点
当前提交
2bded83bff
共有 21 个文件被更改,包括 3007 次插入0 次删除
  1. +6
    -0
      .gitignore
  2. +34
    -0
      commands/afc_pending.py
  3. +113
    -0
      commands/afc_report.py
  4. +162
    -0
      commands/afc_status.py
  5. +59
    -0
      commands/afc_submissions.py
  6. +74
    -0
      commands/geolocate.py
  7. +240
    -0
      commands/git_command.py
  8. +48
    -0
      commands/praise.py
  9. +34
    -0
      tasks/afc_catdelink.py
  10. +164
    -0
      tasks/afc_copyvios.py
  11. +34
    -0
      tasks/afc_dailycats.py
  12. +227
    -0
      tasks/afc_history.py
  13. +739
    -0
      tasks/afc_statistics.py
  14. +33
    -0
      tasks/afc_undated.py
  15. +34
    -0
      tasks/blp_tag.py
  16. +787
    -0
      tasks/drn_clerkbot.py
  17. +33
    -0
      tasks/image_display_resize.py
  18. +36
    -0
      tasks/schema/afc_copyvios.sql
  19. +23
    -0
      tasks/schema/afc_history.sql
  20. +68
    -0
      tasks/schema/afc_statistics.sql
  21. +59
    -0
      tasks/schema/drn_clerkbot.sql

+ 6
- 0
.gitignore 查看文件

@@ -0,0 +1,6 @@
*.pyc
*.egg
*.egg-info
.DS_Store
build
docs/_build

+ 34
- 0
commands/afc_pending.py 查看文件

@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot.commands import Command

class AFCPending(Command):
"""Link the user to the pending AFC submissions page and category."""
name = "pending"
commands = ["pending", "pend"]

def process(self, data):
msg1 = "Pending submissions status page: http://enwp.org/WP:AFC/ST"
msg2 = "Pending submissions category: http://enwp.org/CAT:PEND"
self.reply(data, msg1)
self.reply(data, msg2)

+ 113
- 0
commands/afc_report.py 查看文件

@@ -0,0 +1,113 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot import wiki
from earwigbot.commands import Command

class AFCReport(Command):
"""Get information about an AFC submission by name."""
name = "report"

def process(self, data):
self.site = self.bot.wiki.get_site()
self.data = data

try:
self.statistics = self.bot.tasks.get("afc_statistics")
except KeyError:
e = "Cannot run command: requires afc_statistics task (from earwigbot_plugins)"
self.logger.error(e)
msg = "command requires afc_statistics task (from earwigbot_plugins)"
self.reply(data, msg)
return

if not data.args:
msg = "What submission do you want me to give information about?"
self.reply(data, msg)
return

title = " ".join(data.args)
title = title.replace("http://en.wikipedia.org/wiki/", "")
title = title.replace("http://enwp.org/", "").strip()

# Given '!report Foo', first try [[Foo]]:
page = self.get_page(title)
if page:
return self.report(page)

# Then try [[Wikipedia:Articles for creation/Foo]]:
newtitle = "/".join(("Wikipedia:Articles for creation", title))
page = self.get_page(newtitle)
if page:
return self.report(page)

# Then try [[Wikipedia talk:Articles for creation/Foo]]:
newtitle = "/".join(("Wikipedia talk:Articles for creation", title))
page = self.get_page(newtitle)
if page:
return self.report(page)

self.reply(data, "Submission \x0302{0}\x0F not found.".format(title))

def get_page(self, title):
page = self.site.get_page(title, follow_redirects=False)
if page.exists == page.PAGE_EXISTS:
return page

def report(self, page):
url = page.url.encode("utf8")
url = url.replace("en.wikipedia.org/wiki", "enwp.org")
short = self.statistics.get_short_title(page.title)
status = self.get_status(page)
user = page.get_creator()
user_name = user.name
user_url = user.get_talkpage().url.encode("utf8")

msg1 = "AfC submission report for \x0302{0}\x0F ({1}):"
msg2 = "Status: \x0303{0}\x0F"
msg3 = "Submitted by \x0302{0}\x0F ({1})"
if status == "accepted":
msg3 = "Reviewed by \x0302{0}\x0F ({1})"

self.reply(self.data, msg1.format(short, url))
self.say(self.data.chan, msg2.format(status))
self.say(self.data.chan, msg3.format(user_name, user_url))

def get_status(self, page):
if page.is_redirect:
target = page.get_redirect_target()
if self.site.get_page(target).namespace == wiki.NS_MAIN:
return "accepted"
return "redirect"

statuses = self.statistics.get_statuses(page.get())
if "R" in statuses:
return "being reviewed"
elif "H" in statuses:
return "pending draft"
elif "P" in statuses:
return "pending submission"
elif "T" in statuses:
return "unsubmitted draft"
elif "D" in statuses:
return "declined"
return "unkown"

+ 162
- 0
commands/afc_status.py 查看文件

@@ -0,0 +1,162 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re

from earwigbot.commands import Command

class AFCStatus(Command):
"""Get the number of pending AfC submissions, open redirect requests, and
open file upload requests."""
name = "status"
commands = ["status", "count", "num", "number"]
hooks = ["join", "msg"]

def check(self, data):
if data.is_command and data.command in self.commands:
return True
try:
if data.line[1] == "JOIN" and data.chan == "#wikipedia-en-afc":
if data.nick != self.config.irc["frontend"]["nick"]:
return True
except IndexError:
pass
return False

def process(self, data):
self.site = self.bot.wiki.get_site()

if data.line[1] == "JOIN":
status = " ".join(("\x02Current status:\x0F", self.get_status()))
self.notice(data.nick, status)
return

if data.args:
action = data.args[0].lower()
if action.startswith("sub") or action == "s":
subs = self.count_submissions()
msg = "There are \x0305{0}\x0F pending AfC submissions (\x0302WP:AFC\x0F)."
self.reply(data, msg.format(subs))

elif action.startswith("redir") or action == "r":
redirs = self.count_redirects()
msg = "There are \x0305{0}\x0F open redirect requests (\x0302WP:AFC/R\x0F)."
self.reply(data, msg.format(redirs))

elif action.startswith("file") or action == "f":
files = self.count_redirects()
msg = "There are \x0305{0}\x0F open file upload requests (\x0302WP:FFU\x0F)."
self.reply(data, msg.format(files))

elif action.startswith("agg") or action == "a":
try:
agg_num = int(data.args[1])
except IndexError:
agg_data = (self.count_submissions(),
self.count_redirects(), self.count_files())
agg_num = self.get_aggregate_number(agg_data)
except ValueError:
msg = "\x0303{0}\x0F isn't a number!"
self.reply(data, msg.format(data.args[1]))
return
aggregate = self.get_aggregate(agg_num)
msg = "Aggregate is \x0305{0}\x0F (AfC {1})."
self.reply(data, msg.format(agg_num, aggregate))

elif action.startswith("nocolor") or action == "n":
self.reply(data, self.get_status(color=False))

else:
msg = "Unknown argument: \x0303{0}\x0F. Valid args are 'subs', 'redirs', 'files', 'agg', 'nocolor'."
self.reply(data, msg.format(data.args[0]))

else:
self.reply(data, self.get_status())

def get_status(self, color=True):
subs = self.count_submissions()
redirs = self.count_redirects()
files = self.count_files()
agg_num = self.get_aggregate_number((subs, redirs, files))
aggregate = self.get_aggregate(agg_num)

if color:
msg = "Articles for creation {0} (\x0302AFC\x0F: \x0305{1}\x0F; \x0302AFC/R\x0F: \x0305{2}\x0F; \x0302FFU\x0F: \x0305{3}\x0F)."
else:
msg = "Articles for creation {0} (AFC: {1}; AFC/R: {2}; FFU: {3})."
return msg.format(aggregate, subs, redirs, files)

def count_submissions(self):
"""Returns the number of open AFC submissions (count of CAT:PEND)."""
# Subtract two for [[Wikipedia:Articles for creation/Redirects]] and
# [[Wikipedia:Files for upload]], which aren't real submissions:
return self.site.get_category("Pending AfC submissions").pages - 2

def count_redirects(self):
"""Returns the number of open redirect submissions. Calculated as the
total number of submissions minus the closed ones."""
title = "Wikipedia:Articles for creation/Redirects"
content = self.site.get_page(title).get()
total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE))
closed = content.lower().count("{{afc-c|b}}")
redirs = total - closed
return redirs

def count_files(self):
"""Returns the number of open WP:FFU (Files For Upload) requests.
Calculated as the total number of requests minus the closed ones."""
content = self.site.get_page("Wikipedia:Files for upload").get()
total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE))
closed = content.lower().count("{{ifu-c|b}}")
files = total - closed
return files

def get_aggregate(self, num):
"""Returns a human-readable AFC status based on the number of pending
AFC submissions, open redirect requests, and open FFU requests. This
does not match {{AFC status}} directly because the algorithm factors in
WP:AFC/R and WP:FFU while the template only looks at the main
submissions. The reasoning is that AFC/R and FFU are still part of
the project, so even if there are no pending submissions, a backlog at
FFU (for example) indicates that our work is *not* done and the
project-wide backlog is most certainly *not* clear."""
if num == 0:
return "is \x02\x0303clear\x0F"
elif num <= 200:
return "is \x0303almost clear\x0F"
elif num <= 400:
return "is \x0312normal\x0F"
elif num <= 600:
return "is \x0307lightly backlogged\x0F"
elif num <= 900:
return "is \x0304backlogged\x0F"
elif num <= 1200:
return "is \x02\x0304heavily backlogged\x0F"
else:
return "is \x02\x1F\x0304severely backlogged\x0F"

def get_aggregate_number(self, (subs, redirs, files)):
"""Returns an 'aggregate number' based on the real number of pending
submissions in CAT:PEND (subs), open redirect submissions in WP:AFC/R
(redirs), and open files-for-upload requests in WP:FFU (files)."""
num = subs + (redirs / 2) + (files / 2)
return num

+ 59
- 0
commands/afc_submissions.py 查看文件

@@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot.commands import Command

class AFCSubmissions(Command):
"""Link the user directly to some pending AFC submissions."""
name = "submissions"
commands = ["submissions", "subs"]

def setup(self):
try:
self.ignore_list = self.config.commands[self.name]["ignoreList"]
except KeyError:
try:
ignores = self.config.tasks["afc_statistics"]["ignoreList"]
self.ignore_list = ignores
except KeyError:
self.ignore_list = []

def process(self, data):
if data.args:
try:
number = int(data.args[0])
except ValueError:
self.reply(data, "Argument must be a number.")
return
if number > 5:
msg = "Cannot get more than five submissions at a time."
self.reply(data, msg)
return
else:
number = 3

site = self.bot.wiki.get_site()
category = site.get_category("Pending AfC submissions")
members = category.get_members(limit=number + len(self.ignore_list))
urls = [member.url.encode("utf8") for member in members if member.title not in self.ignore_list]
pages = ", ".join(urls[:number])
self.reply(data, "{0} pending AfC subs: {1}".format(number, pages))

+ 74
- 0
commands/geolocate.py 查看文件

@@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import json
import urllib2

from earwigbot.commands import Command

class Geolocate(Command):
"""Geolocate an IP address (via http://ipinfodb.com/)."""
name = "geolocate"
commands = ["geolocate", "locate", "geo", "ip"]

def setup(self):
self.config.decrypt(self.config.commands, self.name, "apiKey")
try:
self.key = self.config.commands[self.name]["apiKey"]
except KeyError:
self.key = None
log = 'Cannot use without an API key for http://ipinfodb.com/ stored as config.commands["{0}"]["apiKey"]'
self.logger.warn(log.format(self.name))

def process(self, data):
if not data.args:
self.reply(data, "Please specify an IP to lookup.")
return

if not self.key:
msg = 'I need an API key for http://ipinfodb.com/ stored as \x0303config.commands["{0}"]["apiKey"]\x0F.'
log = 'Need an API key for http://ipinfodb.com/ stored as config.commands["{0}"]["apiKey"]'
self.reply(data, msg.format(self.name) + ".")
self.logger.error(log.format(self.name))
return

address = data.args[0]
url = "http://api.ipinfodb.com/v3/ip-city/?key={0}&ip={1}&format=json"
query = urllib2.urlopen(url.format(self.key, address)).read()
res = json.loads(query)

country = res["countryName"].title()
region = res["regionName"].title()
city = res["cityName"].title()
latitude = res["latitude"]
longitude = res["longitude"]
utcoffset = res["timeZone"]
if not country and not region and not city:
self.reply(data, "IP \x0302{0}\x0F not found.".format(address))
return
if country == "-" and region == "-" and city == "-":
self.reply(data, "IP \x0302{0}\x0F is reserved.".format(address))
return

msg = "{0}, {1}, {2} ({3}, {4}), UTC {5}"
geo = msg.format(country, region, city, latitude, longitude, utcoffset)
self.reply(data, geo)

+ 240
- 0
commands/git_command.py 查看文件

@@ -0,0 +1,240 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import time

import git

from earwigbot.commands import Command

class Git(Command):
"""Commands to interface with the bot's git repository; use '!git' for a
sub-command list."""
name = "git"

def setup(self):
try:
self.repos = self.config.commands[self.name]["repos"]
except KeyError:
self.repos = None

def process(self, data):
self.data = data
if not self.config.irc["permissions"].is_owner(data):
msg = "You must be a bot owner to use this command."
self.reply(data, msg)
return
if not data.args or data.args[0] == "help":
self.do_help()
return
if not self.repos:
self.reply(data, "No repos are specified in the config file.")
return

command = data.args[0]
try:
repo_name = data.args[1]
except IndexError:
repos = self.get_repos()
msg = "Which repo do you want to work with (options are {0})?"
self.reply(data, msg.format(repos))
return
if repo_name not in self.repos:
repos = self.get_repos()
msg = "Repository must be one of the following: {0}."
self.reply(data, msg.format(repos))
return
self.repo = git.Repo(self.repos[repo_name])

if command == "branch":
self.do_branch()
elif command == "branches":
self.do_branches()
elif command == "checkout":
self.do_checkout()
elif command == "delete":
self.do_delete()
elif command == "pull":
self.do_pull()
elif command == "status":
self.do_status()
else: # They asked us to do something we don't know
msg = "Unknown argument: \x0303{0}\x0F.".format(data.args[0])
self.reply(data, msg)

def get_repos(self):
data = self.repos.iteritems()
repos = ["\x0302{0}\x0F ({1})".format(k, v) for k, v in data]
return ", ".join(repos)

def get_remote(self):
try:
remote_name = self.data.args[2]
except IndexError:
remote_name = "origin"
try:
return getattr(self.repo.remotes, remote_name)
except AttributeError:
msg = "Unknown remote: \x0302{0}\x0F.".format(remote_name)
self.reply(self.data, msg)

def get_time_since(self, date):
diff = time.mktime(time.gmtime()) - date
if diff < 60:
return "{0} seconds".format(int(diff))
if diff < 60 * 60:
return "{0} minutes".format(int(diff / 60))
if diff < 60 * 60 * 24:
return "{0} hours".format(int(diff / 60 / 60))
return "{0} days".format(int(diff / 60 / 60 / 24))

def do_help(self):
"""Display all commands."""
help = {
"branch": "get current branch",
"branches": "get all branches",
"checkout": "switch branches",
"delete": "delete an old branch",
"pull": "update everything from the remote server",
"status": "check if we are up-to-date",
}
subcommands = ""
for key in sorted(help.keys()):
subcommands += "\x0303{0}\x0F ({1}), ".format(key, help[key])
subcommands = subcommands[:-2] # Trim last comma and space
msg = "Sub-commands are: {0}; repos are: {1}. Syntax: !git \x0303subcommand\x0F \x0302repo\x0F."
self.reply(self.data, msg.format(subcommands, self.get_repos()))

def do_branch(self):
"""Get our current branch."""
branch = self.repo.active_branch.name
msg = "Currently on branch \x0302{0}\x0F.".format(branch)
self.reply(self.data, msg)

def do_branches(self):
"""Get a list of branches."""
branches = [branch.name for branch in self.repo.branches]
msg = "Branches: \x0302{0}\x0F.".format(", ".join(branches))
self.reply(self.data, msg)

def do_checkout(self):
"""Switch branches."""
try:
target = self.data.args[2]
except IndexError: # No branch name provided
self.reply(self.data, "Wwitch to which branch?")
return

current_branch = self.repo.active_branch.name
if target == current_branch:
msg = "Already on \x0302{0}\x0F!".format(target)
self.reply(self.data, msg)
return

try:
ref = getattr(self.repo.branches, target)
except AttributeError:
msg = "Branch \x0302{0}\x0F doesn't exist!".format(target)
self.reply(self.data, msg)
else:
ref.checkout()
ms = "Switched from branch \x0302{0}\x0F to \x0302{1}\x0F."
msg = ms.format(current_branch, target)
self.reply(self.data, msg)
log = "{0} checked out branch {1} of {2}"
logmsg = log.format(self.data.nick, target, self.repo.working_dir)
self.logger.info(logmsg)

def do_delete(self):
"""Delete a branch, while making sure that we are not already on it."""
try:
target = self.data.args[2]
except IndexError: # No branch name provided
self.reply(self.data, "Delete which branch?")
return

current_branch = self.repo.active_branch.name
if current_branch == target:
msg = "You're currently on this branch; please checkout to a different branch before deleting."
self.reply(self.data, msg)
return

try:
ref = getattr(self.repo.branches, target)
except AttributeError:
msg = "Branch \x0302{0}\x0F doesn't exist!".format(target)
self.reply(self.data, msg)
else:
self.repo.git.branch("-d", ref)
msg = "Branch \x0302{0}\x0F has been deleted locally."
self.reply(self.data, msg.format(target))
log = "{0} deleted branch {1} of {2}"
logmsg = log.format(self.data.nick, target, self.repo.working_dir)
self.logger.info(logmsg)

def do_pull(self):
"""Pull from our remote repository."""
branch = self.repo.active_branch.name
msg = "Pulling from remote (currently on \x0302{0}\x0F)..."
self.reply(self.data, msg.format(branch))

remote = self.get_remote()
if not remote:
return
result = remote.pull()
updated = [info for info in result if info.flags != info.HEAD_UPTODATE]

if updated:
branches = ", ".join([info.ref.remote_head for info in updated])
msg = "Done; updates to \x0302{0}\x0F (from {1})."
self.reply(self.data, msg.format(branches, remote.url))
log = "{0} pulled {1} of {2} (updates to {3})"
self.logger.info(log.format(self.data.nick, remote.name,
self.repo.working_dir, branches))
else:
self.reply(self.data, "Done; no new changes.")
log = "{0} pulled {1} of {2} (no updates)"
self.logger.info(log.format(self.data.nick, remote.name,
self.repo.working_dir))

def do_status(self):
"""Check if we have anything to pull."""
remote = self.get_remote()
if not remote:
return
since = self.get_time_since(self.repo.head.object.committed_date)
result = remote.fetch(dry_run=True)
updated = [info for info in result if info.flags != info.HEAD_UPTODATE]

if updated:
branches = ", ".join([info.ref.remote_head for info in updated])
msg = "Last local commit was \x02{0}\x0F ago; updates to \x0302{1}\x0F."
self.reply(self.data, msg.format(since, branches))
log = "{0} got status of {1} of {2} (updates to {3})"
self.logger.info(log.format(self.data.nick, remote.name,
self.repo.working_dir, branches))
else:
msg = "Last commit was \x02{0}\x0F ago. Local copy is up-to-date with remote."
self.reply(self.data, msg.format(since))
log = "{0} pulled {1} of {2} (no updates)"
self.logger.info(log.format(self.data.nick, remote.name,
self.repo.working_dir))

+ 48
- 0
commands/praise.py 查看文件

@@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot.commands import Command

class Praise(Command):
"""Praise people!"""
name = "praise"

def setup(self):
try:
self.praises = self.config.commands[self.name]["praises"]
except KeyError:
self.praises = []

def check(self, data):
check = data.command == "praise" or data.command in self.praises
return data.is_command and check

def process(self, data):
if data.command in self.praises:
msg = self.praises[data.command]
self.say(data.chan, msg)
return
if not data.args:
msg = "You use this command to praise certain people. Who they are is a secret."
else:
msg = "You're doing it wrong."
self.reply(data, msg)

+ 34
- 0
tasks/afc_catdelink.py 查看文件

@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot.tasks import Task

class AFCCatDelink(Task):
"""A task to delink mainspace categories in declined [[WP:AFC]]
submissions."""
name = "afc_catdelink"

def setup(self):
pass

def run(self, **kwargs):
pass

+ 164
- 0
tasks/afc_copyvios.py 查看文件

@@ -0,0 +1,164 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from hashlib import sha256
from os.path import expanduser
from threading import Lock
from urllib import quote

import oursql

from earwigbot.tasks import Task

class AFCCopyvios(Task):
"""A task to check newly-edited [[WP:AFC]] submissions for copyright
violations."""
name = "afc_copyvios"
number = 1

def setup(self):
cfg = self.config.tasks.get(self.name, {})
self.template = cfg.get("template", "AfC suspected copyvio")
self.ignore_list = cfg.get("ignoreList", [])
self.min_confidence = cfg.get("minConfidence", 0.5)
self.max_queries = cfg.get("maxQueries", 10)
self.cache_results = cfg.get("cacheResults", False)
default_summary = "Tagging suspected [[WP:COPYVIO|copyright violation]] of {url}."
self.summary = self.make_summary(cfg.get("summary", default_summary))

# Connection data for our SQL database:
kwargs = cfg.get("sql", {})
kwargs["read_default_file"] = expanduser("~/.my.cnf")
self.conn_data = kwargs
self.db_access_lock = Lock()

def run(self, **kwargs):
"""Entry point for the bot task.

Takes a page title in kwargs and checks it for copyvios, adding
{{self.template}} at the top if a copyvio has been detected. A page is
only checked once (processed pages are stored by page_id in an SQL
database).
"""
if self.shutoff_enabled():
return
title = kwargs["page"]
page = self.bot.wiki.get_site().get_page(title)
with self.db_access_lock:
self.conn = oursql.connect(**self.conn_data)
self.process(page)

def process(self, page):
"""Detect copyvios in 'page' and add a note if any are found."""
title = page.title
if title in self.ignore_list:
msg = u"Skipping page in ignore list: [[{0}]]"
self.logger.info(msg.format(title))
return

pageid = page.pageid
if self.has_been_processed(pageid):
msg = u"Skipping check on already processed page [[{0}]]"
self.logger.info(msg.format(title))
return

self.logger.info(u"Checking [[{0}]]".format(title))
result = page.copyvio_check(self.min_confidence, self.max_queries)
url = result.url
orig_conf = "{0}%".format(round(result.confidence * 100, 2))

if result.violation:
# Things can change in the minute that it takes to do a check.
# Confirm that a violation still holds true:
page.load()
confirm = page.copyvio_compare(url, self.min_confidence)
new_conf = "{0}%".format(round(confirm.confidence * 100, 2))
if not confirm.violation:
msg = u"A violation was detected in [[{0}]], but couldn't be confirmed."
msg += u" It may have just been edited (best: {1} at {2} -> {3} confidence)"
self.logger.info(msg.format(title, url, orig_conf, new_conf))

safeurl = quote(url.encode("utf8"), safe="/:").decode("utf8")
content = page.get()
template = u"\{\{{0}|url={1}|confidence={2}\}\}\n"
template = template.format(self.template, safeurl, new_conf)
newtext = template + content
if "{url}" in self.summary:
page.edit(newtext, self.summary.format(url=url))
else:
page.edit(newtext, self.summary)
msg = u"Found violation: [[{0}]] -> {1} ({2} confidence)"
self.logger.info(msg.format(title, url, new_conf))
else:
msg = u"No violations detected in [[{0}]] (best: {1} at {2} confidence)"
self.logger.info(msg.format(title, url, orig_conf))

self.log_processed(pageid)
if self.cache_results:
self.cache_result(page, result)

def has_been_processed(self, pageid):
"""Returns True if pageid was processed before, otherwise False."""
query = "SELECT 1 FROM processed WHERE page_id = ?"
with self.conn.cursor() as cursor:
cursor.execute(query, (pageid,))
results = cursor.fetchall()
return True if results else False

def log_processed(self, pageid):
"""Adds pageid to our database of processed pages.

Raises an exception if the page has already been processed.
"""
query = "INSERT INTO processed VALUES (?)"
with self.conn.cursor() as cursor:
cursor.execute(query, (pageid,))

def cache_result(self, page, result):
"""Store the check's result in a cache table temporarily.

The cache contains the page's ID, a hash of its content, the URL of the
best match, the time of caching, and the number of queries used. It
will replace any existing cache entries for that page.

The cache is intended for EarwigBot's complementary Toolserver web
interface, in which copyvio checks can be done separately from the bot.
The cache saves time and money by saving the result of the web search
but neither the result of the comparison nor any actual text (which
could violate data retention policy). Cache entries are (intended to
be) retained for three days; this task does not remove old entries
(that is handled by the Toolserver component).

This will only be called if ``cache_results == True`` in the task's
config, which is ``False`` by default.
"""
pageid = page.pageid
hash = sha256(page.get()).hexdigest()
query1 = "SELECT 1 FROM cache WHERE cache_id = ?"
query2 = "DELETE FROM cache WHERE cache_id = ?"
query3 = "INSERT INTO cache VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?, ?)"
with self.conn.cursor() as cursor:
cursor.execute(query1, (pageid,))
if cursor.fetchall():
cursor.execute(query2, (pageid,))
args = (pageid, hash, result.url, result.queries, 0)
cursor.execute(query3, args)

+ 34
- 0
tasks/afc_dailycats.py 查看文件

@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot.tasks import Task

class AFCDailyCats(Task):
"""A task to create daily categories for [[WP:AFC]]."""
name = "afc_dailycats"
number = 3

def setup(self):
pass

def run(self, **kwargs):
pass

+ 227
- 0
tasks/afc_history.py 查看文件

@@ -0,0 +1,227 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from collections import OrderedDict
from datetime import datetime, timedelta
from itertools import count
from os.path import expanduser
from threading import Lock
from time import sleep

from matplotlib import pyplot as plt
from numpy import arange
import oursql

from earwigbot import wiki
from earwigbot.tasks import Task

class AFCHistory(Task):
"""A task to generate charts about AfC submissions over time.

The main function of the task is to work through the "AfC submissions by
date" categories (e.g. [[Category:AfC submissions by date/12 July 2011]])
and determine the number of declined, accepted, and currently pending
submissions every day.

This information is saved to a MySQL database ("u_earwig_afc_history") and
used to generate a graph showing the number of AfC submissions by date
with matplotlib and numpy. The chart is saved as a PNG to
config.tasks["afc_history"]["graph"]["dest"], which defaults to
"afc_history.png".
"""
name = "afc_history"

# Valid submission statuses:
STATUS_NONE = 0
STATUS_PEND = 1
STATUS_DECLINE = 2
STATUS_ACCEPT = 3

def setup(self):
cfg = self.config.tasks.get(self.name, {})
self.num_days = cfg.get("days", 90)
self.categories = cfg.get("categories", {})

# Graph stuff:
self.graph = cfg.get("graph", {})
self.destination = self.graph.get("dest", "afc_history.png")

# Connection data for our SQL database:
kwargs = cfg.get("sql", {})
kwargs["read_default_file"] = expanduser("~/.my.cnf")
self.conn_data = kwargs
self.db_access_lock = Lock()

def run(self, **kwargs):
self.site = self.bot.wiki.get_site()
with self.db_access_lock:
self.conn = oursql.connect(**self.conn_data)

action = kwargs.get("action")
try:
num_days = int(kwargs.get("days", self.num_days))
if action == "update":
self.update(num_days)
elif action == "generate":
self.generate(num_days)
finally:
self.conn.close()

def update(self, num_days):
self.logger.info("Updating past {0} days".format(num_days))
generator = self.backwards_cat_iterator()
for i in xrange(num_days):
category = generator.next()
date = category.title.split("/")[-1]
self.update_date(date, category)
sleep(10)
self.logger.info("Update complete")

def generate(self, num_days):
self.logger.info("Generating chart for past {0} days".format(num_days))
data = OrderedDict()
generator = self.backwards_cat_iterator()
for i in xrange(num_days):
category = generator.next()
date = category.title.split("/")[-1]
data[date] = self.get_date_counts(date)

data = OrderedDict(reversed(data.items())) # Oldest to most recent
self.generate_chart(data)
dest = expanduser(self.destination)
plt.savefig(dest)
self.logger.info("Chart saved to {0}".format(dest))

def backwards_cat_iterator(self):
date_base = self.categories["dateBase"]
current = datetime.utcnow()
while 1:
subcat = current.strftime("%d %B %Y")
title = "/".join((date_base, subcat))
yield self.site.get_category(title)
current -= timedelta(1) # Subtract one day from date

def update_date(self, date, category):
msg = "Updating {0} ([[{1}]])".format(date, category.title)
self.logger.debug(msg)

q_select = "SELECT page_date, page_status FROM page WHERE page_id = ?"
q_delete = "DELETE FROM page WHERE page_id = ?"
q_update = "UPDATE page SET page_date = ?, page_status = ? WHERE page_id = ?"
q_insert = "INSERT INTO page VALUES (?, ?, ?)"
members = category.get_members()

with self.conn.cursor() as cursor:
for title, pageid in members:
cursor.execute(q_select, (pageid,))
stored = cursor.fetchall()
status = self.get_status(title, pageid)

if status == self.STATUS_NONE:
if stored:
cursor.execute(q_delete, (pageid,))
continue

if stored:
stored_date, stored_status = list(stored)[0]
if date != stored_date or status != stored_status:
cursor.execute(q_update, (date, status, pageid))

else:
cursor.execute(q_insert, (pageid, date, status))

def get_status(self, title, pageid):
page = self.site.get_page(title)
ns = page.namespace

if ns == wiki.NS_FILE_TALK: # Ignore accepted FFU requests
return self.STATUS_NONE

if ns == wiki.NS_TALK:
new_page = page.toggle_talk()
sleep(2)
if new_page.is_redirect:
return self.STATUS_NONE # Ignore accepted AFC/R requests
return self.STATUS_ACCEPT

cats = self.categories
sq = self.site.sql_query
query = "SELECT 1 FROM categorylinks WHERE cl_to = ? AND cl_from = ?"
match = lambda cat: list(sq(query, (cat.replace(" ", "_"), pageid)))

if match(cats["pending"]):
return self.STATUS_PEND
elif match(cats["unsubmitted"]):
return self.STATUS_NONE
elif match(cats["declined"]):
return self.STATUS_DECLINE
return self.STATUS_NONE

def get_date_counts(self, date):
query = "SELECT COUNT(*) FROM page WHERE page_date = ? AND page_status = ?"
statuses = [self.STATUS_PEND, self.STATUS_DECLINE, self.STATUS_ACCEPT]
counts = {}
with self.conn.cursor() as cursor:
for status in statuses:
cursor.execute(query, (date, status))
count = cursor.fetchall()[0][0]
counts[status] = count
return counts

def generate_chart(self, data):
plt.title(self.graph.get("title", "AfC submissions by date"))
plt.xlabel(self.graph.get("xaxis", "Date"))
plt.ylabel(self.graph.get("yaxis", "Submissions"))

pends = [d[self.STATUS_PEND] for d in data.itervalues()]
declines = [d[self.STATUS_DECLINE] for d in data.itervalues()]
accepts = [d[self.STATUS_ACCEPT] for d in data.itervalues()]
pends_declines = [p + d for p, d in zip(pends, declines)]
ind = arange(len(data))
xsize = self.graph.get("xsize", 1200)
ysize = self.graph.get("ysize", 900)
width = self.graph.get("width", 1)
xstep = self.graph.get("xAxisStep", 6)
pcolor = self.graph.get("pendingColor", "#f0e460")
dcolor = self.graph.get("declinedColor", "#f291a6")
acolor = self.graph.get("acceptedColor", "#81fc4c")

p1 = plt.bar(ind, pends, width, color=pcolor)
p2 = plt.bar(ind, declines, width, color=dcolor, bottom=pends)
p3 = plt.bar(ind, accepts, width, color=acolor, bottom=pends_declines)

xticks = arange(xstep-1, ind.size+xstep-1, xstep) + width/2.0
xlabels = [d for c, d in zip(count(1), data.keys()) if not c % xstep]
plt.xticks(xticks, xlabels)
plt.yticks(arange(0, plt.ylim()[1], 10))
plt.tick_params(direction="out")

leg = plt.legend((p1[0], p2[0], p3[0]), ("Pending", "Declined",
"Accepted"), loc="upper left", fancybox=True)
leg.get_frame().set_alpha(0.5)

fig = plt.gcf()
fig.set_size_inches(xsize/100, ysize/100)
fig.autofmt_xdate()

ax = plt.gca()
ax.yaxis.grid(True)

+ 739
- 0
tasks/afc_statistics.py 查看文件

@@ -0,0 +1,739 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from datetime import datetime
import re
from os.path import expanduser
from threading import Lock
from time import sleep

import oursql

from earwigbot import exceptions
from earwigbot import wiki
from earwigbot.tasks import Task

class AFCStatistics(Task):
"""A task to generate statistics for WikiProject Articles for Creation.

Statistics are stored in a MySQL database ("u_earwig_afc_statistics")
accessed with oursql. Statistics are synchronied with the live database
every four minutes and saved once an hour, on the hour, to self.pagename.
In the live bot, this is "Template:AFC statistics".
"""
name = "afc_statistics"
number = 2

# Chart status number constants:
CHART_NONE = 0
CHART_PEND = 1
CHART_DRAFT = 2
CHART_REVIEW = 3
CHART_ACCEPT = 4
CHART_DECLINE = 5
CHART_MISPLACE = 6

def setup(self):
self.cfg = cfg = self.config.tasks.get(self.name, {})

# Set some wiki-related attributes:
self.pagename = cfg.get("page", "Template:AFC statistics")
self.pending_cat = cfg.get("pending", "Pending AfC submissions")
self.ignore_list = cfg.get("ignoreList", [])
default_summary = "Updating statistics for [[WP:WPAFC|WikiProject Articles for creation]]."
self.summary = self.make_summary(cfg.get("summary", default_summary))

# Templates used in chart generation:
templates = cfg.get("templates", {})
self.tl_header = templates.get("header", "AFC statistics/header")
self.tl_row = templates.get("row", "AFC statistics/row")
self.tl_footer = templates.get("footer", "AFC statistics/footer")

# Connection data for our SQL database:
kwargs = cfg.get("sql", {})
kwargs["read_default_file"] = expanduser("~/.my.cnf")
self.conn_data = kwargs
self.db_access_lock = Lock()

def run(self, **kwargs):
"""Entry point for a task event.

Depending on the kwargs passed, we will either synchronize our local
statistics database with the site (self.sync()) or save it to the wiki
(self.save()). We will additionally create an SQL connection with our
local database.
"""
action = kwargs.get("action")
if not self.db_access_lock.acquire(False): # Non-blocking
if action == "sync":
self.logger.info("A sync is already ongoing; aborting")
return
self.logger.info("Waiting for database access lock")
self.db_access_lock.acquire()

try:
self.site = self.bot.wiki.get_site()
self.conn = oursql.connect(**self.conn_data)
try:
if action == "save":
self.save(kwargs)
elif action == "sync":
self.sync(kwargs)
elif action == "update":
self.update(kwargs)
finally:
self.conn.close()
finally:
self.db_access_lock.release()

def save(self, kwargs):
"""Save our local statistics to the wiki.

After checking for emergency shutoff, the statistics chart is compiled,
and then saved to self.pagename using self.summary iff it has changed
since last save.
"""
self.logger.info("Saving chart")
if kwargs.get("fromIRC"):
summary = self.summary + " (!earwigbot)"
else:
if self.shutoff_enabled():
return
summary = self.summary

statistics = self.compile_charts()

page = self.site.get_page(self.pagename)
text = page.get()
newtext = re.sub(u"<!-- stat begin -->(.*?)<!-- stat end -->",
"<!-- stat begin -->\n" + statistics + "\n<!-- stat end -->",
text, flags=re.DOTALL)
if newtext == text:
self.logger.info("Chart unchanged; not saving")
return # Don't edit the page if we're not adding anything

newtext = re.sub("<!-- sig begin -->(.*?)<!-- sig end -->",
"<!-- sig begin -->~~~ at ~~~~~<!-- sig end -->",
newtext)
page.edit(newtext, summary, minor=True, bot=True)
self.logger.info(u"Chart saved to [[{0}]]".format(page.title))

def compile_charts(self):
"""Compile and return all statistics information from our local db."""
stats = ""
with self.conn.cursor() as cursor:
cursor.execute("SELECT * FROM chart")
for chart in cursor:
stats += self.compile_chart(chart) + "\n"
return stats[:-1] # Drop the last newline

def compile_chart(self, chart_info):
"""Compile and return a single statistics chart."""
chart_id, chart_title, special_title = chart_info

chart = self.tl_header + "|" + chart_title
if special_title:
chart += "|" + special_title
chart = "{{" + chart + "}}"

query = "SELECT * FROM page JOIN row ON page_id = row_id WHERE row_chart = ?"
with self.conn.cursor(oursql.DictCursor) as cursor:
cursor.execute(query, (chart_id,))
for page in cursor:
chart += "\n" + self.compile_chart_row(page)

chart += "\n{{" + self.tl_footer + "}}"
return chart

def compile_chart_row(self, page):
"""Compile and return a single chart row.

'page' is a dict of page information, taken as a row from the page
table, where keys are column names and values are their cell contents.
"""
row = u"{0}|s={page_status}|t={page_title}|h={page_short}|z={page_size}|"
if page["page_special_oldid"]:
row += "sr={page_special_user}|sd={page_special_time}|si={page_special_oldid}|"
row += "mr={page_modify_user}|md={page_modify_time}|mi={page_modify_oldid}"

page["page_special_time"] = self.format_time(page["page_special_time"])
page["page_modify_time"] = self.format_time(page["page_modify_time"])

if page["page_notes"]:
row += "|n=1{page_notes}"

return "{{" + row.format(self.tl_row, **page) + "}}"

def format_time(self, dt):
"""Format a datetime into the standard MediaWiki timestamp format."""
return dt.strftime("%H:%M, %d %b %Y")

def sync(self, kwargs):
"""Synchronize our local statistics database with the site.

Syncing involves, in order, updating tracked submissions that have
been changed since last sync (self.update_tracked()), adding pending
submissions that are not tracked (self.add_untracked()), and removing
old submissions from the database (self.delete_old()).

The sync will be canceled if SQL replication lag is greater than 600
seconds, because this will lead to potential problems and outdated
data, not to mention putting demand on an already overloaded server.
Giving sync the kwarg "ignore_replag" will go around this restriction.
"""
self.logger.info("Starting sync")

replag = self.site.get_replag()
self.logger.debug("Server replag is {0}".format(replag))
if replag > 600 and not kwargs.get("ignore_replag"):
msg = "Sync canceled as replag ({0} secs) is greater than ten minutes"
self.logger.warn(msg.format(replag))
return

with self.conn.cursor() as cursor:
self.update_tracked(cursor)
self.add_untracked(cursor)
self.delete_old(cursor)

self.logger.info("Sync completed")

def update_tracked(self, cursor):
"""Update tracked submissions that have been changed since last sync.

This is done by iterating through every page in our database and
comparing our stored latest revision ID with the actual latest revision
ID from an SQL query. If they differ, we will update our information
about the page (self.update_page()).

If the page does not exist, we will remove it from our database with
self.untrack_page().
"""
self.logger.debug("Updating tracked submissions")
query1 = "SELECT page_id, page_title, page_modify_oldid FROM page"
query2 = """SELECT page_latest, page_title, page_namespace FROM page
WHERE page_id = ?"""
cursor.execute(query1)

for pageid, title, oldid in cursor:
result = list(self.site.sql_query(query2, (pageid,)))
if not result:
self.untrack_page(cursor, pageid)
continue

real_oldid = result[0][0]
if oldid != real_oldid:
msg = u"Updating page [[{0}]] (id: {1}) @ {2}"
self.logger.debug(msg.format(title, pageid, oldid))
self.logger.debug(" {0} -> {1}".format(oldid, real_oldid))
base = result[0][1].decode("utf8").replace("_", " ")
ns = self.site.namespace_id_to_name(result[0][2])
if ns:
real_title = u":".join((ns, base))
else:
real_title = base
try:
self.update_page(cursor, pageid, real_title)
except Exception:
e = u"Error updating page [[{0}]] (id: {1})"
self.logger.exception(e.format(real_title, pageid))

def add_untracked(self, cursor):
"""Add pending submissions that are not yet tracked.

This is done by compiling a list of all currently tracked submissions
and iterating through all members of self.pending_cat via SQL. If a
page in the pending category is not tracked and is not in
self.ignore_list, we will track it with self.track_page().
"""
self.logger.debug("Adding untracked pending submissions")
cursor.execute("SELECT page_id FROM page")
tracked = [i[0] for i in cursor.fetchall()]

category = self.site.get_category(self.pending_cat)
for page in category.get_members():
title, pageid = page.title, page.pageid
if title in self.ignore_list:
continue
if pageid not in tracked:
msg = u"Tracking page [[{0}]] (id: {1})".format(title, pageid)
self.logger.debug(msg)
try:
self.track_page(cursor, pageid, title)
except Exception:
e = u"Error tracking page [[{0}]] (id: {1})"
self.logger.exception(e.format(title, pageid))

def delete_old(self, cursor):
"""Remove old submissions from the database.

"Old" is defined as a submission that has been declined or accepted
more than 36 hours ago. Pending submissions cannot be "old".
"""
self.logger.debug("Removing old submissions from chart")
query = """DELETE FROM page, row USING page JOIN row
ON page_id = row_id WHERE row_chart IN (?, ?)
AND ADDTIME(page_special_time, '36:00:00') < NOW()"""
cursor.execute(query, (self.CHART_ACCEPT, self.CHART_DECLINE))

def update(self, kwargs):
"""Update a page by name, regardless of whether anything has changed.

Mainly intended as a command to be used via IRC, e.g.:
!tasks start afc_statistics action=update page=Foobar
"""
title = kwargs.get("page")
if not title:
return

title = title.replace("_", " ").decode("utf8")
query = "SELECT page_id, page_modify_oldid FROM page WHERE page_title = ?"
with self.conn.cursor() as cursor:
cursor.execute(query, (title,))
try:
pageid, oldid = cursor.fetchall()[0]
except IndexError:
msg = u"Page [[{0}]] not found in database".format(title)
self.logger.error(msg)

msg = u"Updating page [[{0}]] (id: {1}) @ {2}"
self.logger.info(msg.format(title, pageid, oldid))
self.update_page(cursor, pageid, title)

def untrack_page(self, cursor, pageid):
"""Remove a page, given by ID, from our database."""
self.logger.debug("Untracking page (id: {0})".format(pageid))
query = """DELETE FROM page, row USING page JOIN row
ON page_id = row_id WHERE page_id = ?"""
cursor.execute(query, (pageid,))

def track_page(self, cursor, pageid, title):
"""Update hook for when page is not in our database.

A variety of SQL queries are used to gather information about the page,
which is then saved to our database.
"""
content = self.get_content(title)
if content is None:
msg = u"Could not get page content for [[{0}]]".format(title)
self.logger.error(msg)
return

namespace = self.site.get_page(title).namespace
status, chart = self.get_status_and_chart(content, namespace)
if chart == self.CHART_NONE:
msg = u"Could not find a status for [[{0}]]".format(title)
self.logger.warn(msg)
return

short = self.get_short_title(title)
size = self.get_size(content)
m_user, m_time, m_id = self.get_modify(pageid)
s_user, s_time, s_id = self.get_special(pageid, chart)
notes = self.get_notes(chart, content, m_time, s_user)

query1 = "INSERT INTO row VALUES (?, ?)"
query2 = "INSERT INTO page VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
cursor.execute(query1, (pageid, chart))
cursor.execute(query2, (pageid, status, title, short, size, notes,
m_user, m_time, m_id, s_user, s_time, s_id))

def update_page(self, cursor, pageid, title):
"""Update hook for when page is already in our database.

A variety of SQL queries are used to gather information about the page,
which is compared against our stored information. Differing information
is then updated.
"""
content = self.get_content(title)
if content is None:
msg = u"Could not get page content for [[{0}]]".format(title)
self.logger.error(msg)
return

namespace = self.site.get_page(title).namespace
status, chart = self.get_status_and_chart(content, namespace)
if chart == self.CHART_NONE:
self.untrack_page(cursor, pageid)
return

query = "SELECT * FROM page JOIN row ON page_id = row_id WHERE page_id = ?"
with self.conn.cursor(oursql.DictCursor) as dict_cursor:
dict_cursor.execute(query, (pageid,))
result = dict_cursor.fetchall()[0]

size = self.get_size(content)
m_user, m_time, m_id = self.get_modify(pageid)

if title != result["page_title"]:
self.update_page_title(cursor, result, pageid, title)

if m_id != result["page_modify_oldid"]:
self.update_page_modify(cursor, result, pageid, size, m_user,
m_time, m_id)

if status != result["page_status"]:
special = self.update_page_status(cursor, result, pageid, status,
chart)
s_user = special[0]
else:
s_user = result["page_special_user"]

notes = self.get_notes(chart, content, m_time, s_user)
if notes != result["page_notes"]:
self.update_page_notes(cursor, result, pageid, notes)

def update_page_title(self, cursor, result, pageid, title):
"""Update the title and short_title of a page in our database."""
query = "UPDATE page SET page_title = ?, page_short = ? WHERE page_id = ?"
short = self.get_short_title(title)
cursor.execute(query, (title, short, pageid))

msg = u" {0}: title: {1} -> {2}"
self.logger.debug(msg.format(pageid, result["page_title"], title))

def update_page_modify(self, cursor, result, pageid, size, m_user, m_time, m_id):
"""Update the last modified information of a page in our database."""
query = """UPDATE page SET page_size = ?, page_modify_user = ?,
page_modify_time = ?, page_modify_oldid = ?
WHERE page_id = ?"""
cursor.execute(query, (size, m_user, m_time, m_id, pageid))

msg = u" {0}: modify: {1} / {2} / {3} -> {4} / {5} / {6}"
msg = msg.format(pageid, result["page_modify_user"],
result["page_modify_time"],
result["page_modify_oldid"], m_user, m_time, m_id)
self.logger.debug(msg)

def update_page_status(self, cursor, result, pageid, status, chart):
"""Update the status and "specialed" information of a page."""
query1 = """UPDATE page JOIN row ON page_id = row_id
SET page_status = ?, row_chart = ? WHERE page_id = ?"""
query2 = """UPDATE page SET page_special_user = ?,
page_special_time = ?, page_special_oldid = ?
WHERE page_id = ?"""
cursor.execute(query1, (status, chart, pageid))

msg = " {0}: status: {1} ({2}) -> {3} ({4})"
self.logger.debug(msg.format(pageid, result["page_status"],
result["row_chart"], status, chart))

s_user, s_time, s_id = self.get_special(pageid, chart)
if s_id != result["page_special_oldid"]:
cursor.execute(query2, (s_user, s_time, s_id, pageid))
msg = u"{0}: special: {1} / {2} / {3} -> {4} / {5} / {6}"
msg = msg.format(pageid, result["page_special_user"],
result["page_special_time"],
result["page_special_oldid"], s_user, s_time, s_id)
self.logger.debug(msg)

return s_user, s_time, s_id

def update_page_notes(self, cursor, result, pageid, notes):
"""Update the notes (or warnings) of a page in our database."""
query = "UPDATE page SET page_notes = ? WHERE page_id = ?"
cursor.execute(query, (notes, pageid))
msg = " {0}: notes: {1} -> {2}"
self.logger.debug(msg.format(pageid, result["page_notes"], notes))

def get_content(self, title):
"""Get the current content of a page by title from the API.

The page's current revision ID is retrieved from SQL, and then
an API query is made to get its content. This is the only API query
used in the task's code.
"""
query = "SELECT page_latest FROM page WHERE page_title = ? AND page_namespace = ?"
try:
namespace, base = title.split(":", 1)
except ValueError:
base = title
ns = wiki.NS_MAIN
else:
try:
ns = self.site.namespace_name_to_id(namespace)
except exceptions.NamespaceNotFoundError:
base = title
ns = wiki.NS_MAIN

result = self.site.sql_query(query, (base.replace(" ", "_"), ns))
try:
revid = int(list(result)[0][0])
except IndexError:
return None
return self.get_revision_content(revid)

def get_revision_content(self, revid, tries=1):
"""Get the content of a revision by ID from the API."""
res = self.site.api_query(action="query", prop="revisions",
revids=revid, rvprop="content")
try:
return res["query"]["pages"].values()[0]["revisions"][0]["*"]
except KeyError:
if tries > 0:
sleep(5)
return self.get_revision_content(revid, tries=tries - 1)

def get_status_and_chart(self, content, namespace):
"""Determine the status and chart number of an AFC submission.

The methodology used here is the same one I've been using for years
(see also commands.afc_report), but with the new draft system taken
into account. The order here is important: if there is more than one
{{AFC submission}} template on a page, we need to know which one to
use (revision history search to find the most recent isn't a viable
idea :P).
"""
statuses = self.get_statuses(content)

if "R" in statuses:
status, chart = "r", self.CHART_REVIEW
elif "H" in statuses:
status, chart = "p", self.CHART_DRAFT
elif "P" in statuses:
status, chart = "p", self.CHART_PEND
elif "T" in statuses:
status, chart = None, self.CHART_NONE
elif "D" in statuses:
status, chart = "d", self.CHART_DECLINE
else:
status, chart = None, self.CHART_NONE

if namespace == wiki.NS_MAIN:
if not statuses:
status, chart = "a", self.CHART_ACCEPT
else:
status, chart = None, self.CHART_MISPLACE

return status, chart

def get_statuses(self, content):
"""Return a list of all AFC submission statuses in a page's text."""
re_has_templates = "\{\{[aA][fF][cC] submission\s*(\}\}|\||/)"
re_template = "\{\{[aA][fF][cC] submission\s*(.*?)\}\}"
re_remove_embed = "(\{\{[aA][fF][cC] submission\s*(.*?))\{\{(.*?)\}\}(.*?)\}\}"
valid = ["R", "H", "P", "T", "D"]
subtemps = {
"/reviewing": "R",
"/onhold": "H",
"/pending": "P",
"/draft": "T",
"/declined": "D"
}
statuses = []

while re.search(re_has_templates, content):
status = "P"
match = re.search(re_template, content, re.S)
if not match:
return statuses
temp = match.group(1)
limit = 0
while "{{" in temp and limit < 50:
content = re.sub(re_remove_embed, "\\1\\4}}", content, 1, re.S)
match = re.search(re_template, content, re.S)
temp = match.group(1)
limit += 1
params = temp.split("|")
try:
subtemp, params = params[0].strip(), params[1:]
except IndexError:
status = "P"
params = []
else:
if subtemp:
status = subtemps.get(subtemp)
params = []
for param in params:
param = param.strip().upper()
if "=" in param:
key, value = param.split("=", 1)
if key.strip() == "1":
status = value if value in valid else "P"
break
else:
status = param if param in valid else "P"
break
statuses.append(status)
content = re.sub(re_template, "", content, 1, re.S)

return statuses

def get_short_title(self, title):
"""Shorten a title so we can display it in a chart using less space.

Basically, this just means removing the "Wikipedia talk:Articles for
creation" part from the beginning. If it is longer than 50 characters,
we'll shorten it down to 47 and add an poor-man's ellipsis at the end.
"""
short = re.sub("Wikipedia(\s*talk)?\:Articles\sfor\screation\/", "", title)
if len(short) > 50:
short = short[:47] + "..."
return short

def get_size(self, content):
"""Return a page's size in a short, pretty format."""
return "{0} kB".format(round(len(content) / 1000.0, 1))

def get_modify(self, pageid):
"""Return information about a page's last edit ("modification").

This consists of the most recent editor, modification time, and the
lastest revision ID.
"""
query = """SELECT rev_user_text, rev_timestamp, rev_id FROM revision
JOIN page ON rev_id = page_latest WHERE page_id = ?"""
result = self.site.sql_query(query, (pageid,))
m_user, m_time, m_id = list(result)[0]
timestamp = datetime.strptime(m_time, "%Y%m%d%H%M%S")
return m_user.decode("utf8"), timestamp, m_id

def get_special(self, pageid, chart):
"""Return information about a page's "special" edit.

I tend to use the term "special" as a verb a lot, which is bound to
cause confusion. It is merely a short way of saying "the edit in which
a declined submission was declined, an accepted submission was
accepted, a submission in review was set as such, a pending submission
was submitted, and a "misplaced" submission was created."

This "information" consists of the special edit's editor, its time, and
its revision ID. If the page's status is not something that involves
"special"-ing, we will return None for all three. The same will be
returned if we cannot determine when the page was "special"-ed, or if
it was "special"-ed more than 100 edits ago.
"""
if chart == self.CHART_NONE:
return None, None, None
elif chart == self.CHART_MISPLACE:
return self.get_create(pageid)
elif chart == self.CHART_ACCEPT:
search_for = None
search_not = ["R", "H", "P", "T", "D"]
elif chart == self.CHART_DRAFT:
search_for = "H"
search_not = []
elif chart == self.CHART_PEND:
search_for = "P"
search_not = []
elif chart == self.CHART_REVIEW:
search_for = "R"
search_not = []
elif chart == self.CHART_DECLINE:
search_for = "D"
search_not = ["R", "H", "P", "T"]

query = """SELECT rev_user_text, rev_timestamp, rev_id
FROM revision WHERE rev_page = ? ORDER BY rev_id DESC"""
result = self.site.sql_query(query, (pageid,))

counter = 0
last = (None, None, None)
for user, ts, revid in result:
counter += 1
if counter > 50:
msg = "Exceeded 50 content lookups while determining special for page (id: {0}, chart: {1})"
self.logger.warn(msg.format(pageid, chart))
return None, None, None
try:
content = self.get_revision_content(revid)
except exceptions.APIError:
msg = "API error interrupted SQL query in get_special() for page (id: {0}, chart: {1})"
self.logger.exception(msg.format(pageid, chart))
return None, None, None
statuses = self.get_statuses(content)
matches = [s in statuses for s in search_not]
if search_for:
if search_for not in statuses or any(matches):
return last
else:
if any(matches):
return last
timestamp = datetime.strptime(ts, "%Y%m%d%H%M%S")
last = (user.decode("utf8"), timestamp, revid)

return last

def get_create(self, pageid):
"""Return information about a page's first edit ("creation").

This consists of the page creator, creation time, and the earliest
revision ID.
"""
query = """SELECT rev_user_text, rev_timestamp, rev_id
FROM revision WHERE rev_id =
(SELECT MIN(rev_id) FROM revision WHERE rev_page = ?)"""
result = self.site.sql_query(query, (pageid,))
c_user, c_time, c_id = list(result)[0]
timestamp = datetime.strptime(c_time, "%Y%m%d%H%M%S")
return c_user.decode("utf8"), timestamp, c_id

def get_notes(self, chart, content, m_time, s_user):
"""Return any special notes or warnings about this page.

copyvio: submission is a suspected copyright violation
unsourced: submission lacks references completely
no-inline: submission has no inline citations
short: submission is less than a kilobyte in length
resubmit: submission was resubmitted after a previous decline
old: submission has not been touched in > 4 days
blocked: submitter is currently blocked
"""
notes = ""

ignored_charts = [self.CHART_NONE, self.CHART_ACCEPT, self.CHART_DECLINE]
if chart in ignored_charts:
return notes

copyvios = self.config.tasks.get("afc_copyvios", {})
regex = "\{\{\s*" + copyvios.get("template", "AfC suspected copyvio")
if re.search(regex, content):
notes += "|nc=1" # Submission is a suspected copyvio

if not re.search("\<ref\s*(.*?)\>(.*?)\</ref\>", content, re.I | re.S):
regex = "(https?:)|\[//(?!{0})([^ \]\\t\\n\\r\\f\\v]+?)"
sitedomain = re.escape(self.site.domain)
if re.search(regex.format(sitedomain), content, re.I | re.S):
notes += "|ni=1" # Submission has no inline citations
else:
notes += "|nu=1" # Submission is completely unsourced

if len(content) < 1000:
notes += "|ns=1" # Submission is short

statuses = self.get_statuses(content)
if "D" in statuses and chart != self.CHART_MISPLACE:
notes += "|nr=1" # Submission was resubmitted

time_since_modify = (datetime.utcnow() - m_time).total_seconds()
max_time = 4 * 24 * 60 * 60
if time_since_modify > max_time:
notes += "|no=1" # Submission hasn't been touched in over 4 days

if chart in [self.CHART_PEND, self.CHART_DRAFT] and s_user:
submitter = self.site.get_user(s_user)
try:
if submitter.blockinfo:
notes += "|nb=1" # Submitter is blocked
except exceptions.UserNotFoundError: # Likely an IP
pass

return notes

+ 33
- 0
tasks/afc_undated.py 查看文件

@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot.tasks import Task

class AFCUndated(Task):
"""A task to clear [[Category:Undated AfC submissions]]."""
name = "afc_undated"

def setup(self):
pass

def run(self, **kwargs):
pass

+ 34
- 0
tasks/blp_tag.py 查看文件

@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot.tasks import Task

class BLPTag(Task):
"""A task to add |blp=yes to ``{{WPB}}`` or ``{{WPBS}}`` when it is used
along with ``{{WP Biography}}``."""
name = "blp_tag"

def setup(self):
pass

def run(self, **kwargs):
pass

+ 787
- 0
tasks/drn_clerkbot.py 查看文件

@@ -0,0 +1,787 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from datetime import datetime
from os.path import expanduser
import re
from threading import RLock
from time import mktime, sleep, time

import oursql

from earwigbot import exceptions
from earwigbot.tasks import Task
from earwigbot.wiki import constants

class DRNClerkBot(Task):
"""A task to clerk for [[WP:DRN]]."""
name = "drn_clerkbot"
number = 19

# Case status:
STATUS_UNKNOWN = 0
STATUS_NEW = 1
STATUS_OPEN = 2
STATUS_STALE = 3
STATUS_NEEDASSIST = 4
STATUS_REVIEW = 5
STATUS_RESOLVED = 6
STATUS_CLOSED = 7

ALIASES = {
STATUS_NEW: ("",),
STATUS_OPEN: ("open", "active", "inprogress"),
STATUS_STALE: ("stale",),
STATUS_NEEDASSIST: ("needassist", "relist", "relisted"),
STATUS_REVIEW: ("review",),
STATUS_RESOLVED: ("resolved", "resolve"),
STATUS_CLOSED: ("closed", "close"),
}

def setup(self):
"""Hook called immediately after the task is loaded."""
cfg = self.config.tasks.get(self.name, {})

# Set some wiki-related attributes:
self.title = cfg.get("title",
"Wikipedia:Dispute resolution noticeboard")
self.chart_title = cfg.get("chartTitle", "Template:DRN case status")
self.volunteer_title = cfg.get("volunteers",
"Wikipedia:Dispute resolution noticeboard/Volunteering")
self.very_old_title = cfg.get("veryOldTitle", "User talk:Szhang (WMF)")

clerk_summary = "Updating $3 case$4."
notify_summary = "Notifying user regarding [[WP:DRN|dispute resolution noticeboard]] case."
chart_summary = "Updating statistics for the [[WP:DRN|dispute resolution noticeboard]]."
self.clerk_summary = self.make_summary(cfg.get("clerkSummary", clerk_summary))
self.notify_summary = self.make_summary(cfg.get("notifySummary", notify_summary))
self.chart_summary = self.make_summary(cfg.get("chartSummary", chart_summary))

# Templates used:
templates = cfg.get("templates", {})
self.tl_status = templates.get("status", "DR case status")
self.tl_notify_party = templates.get("notifyParty", "DRN-notice")
self.tl_notify_stale = templates.get("notifyStale", "DRN stale notice")
self.tl_archive_top = templates.get("archiveTop", "DRN archive top")
self.tl_archive_bottom = templates.get("archiveBottom",
"DRN archive bottom")
self.tl_chart_header = templates.get("chartHeader",
"DRN case status/header")
self.tl_chart_row = templates.get("chartRow", "DRN case status/row")
self.tl_chart_footer = templates.get("chartFooter",
"DRN case status/footer")

# Connection data for our SQL database:
kwargs = cfg.get("sql", {})
kwargs["read_default_file"] = expanduser("~/.my.cnf")
self.conn_data = kwargs
self.db_access_lock = RLock()

# Minimum size a MySQL TIMESTAMP field can hold:
self.min_ts = datetime(1970, 1, 1, 0, 0, 1)

def run(self, **kwargs):
"""Entry point for a task event."""
if not self.db_access_lock.acquire(False): # Non-blocking
self.logger.info("A job is already ongoing; aborting")
return
action = kwargs.get("action", "all")
try:
start = time()
conn = oursql.connect(**self.conn_data)
site = self.bot.wiki.get_site()
if action in ["all", "update_volunteers"]:
self.update_volunteers(conn, site)
if action in ["all", "clerk"]:
log = u"Starting update to [[{0}]]".format(self.title)
self.logger.info(log)
cases = self.read_database(conn)
page = site.get_page(self.title)
text = page.get()
self.read_page(conn, cases, text)
notices = self.clerk(conn, cases)
if self.shutoff_enabled():
return
if not self.save(page, cases, kwargs, start):
return
self.send_notices(site, notices)
if action in ["all", "update_chart"]:
if self.shutoff_enabled():
return
self.update_chart(conn, site)
if action in ["all", "purge"]:
self.purge_old_data(conn)
finally:
self.db_access_lock.release()

def update_volunteers(self, conn, site):
"""Updates and stores the list of dispute resolution volunteers."""
log = u"Updating volunteer list from [[{0}]]"
self.logger.info(log.format(self.volunteer_title))
page = site.get_page(self.volunteer_title)
try:
text = page.get()
except exceptions.PageNotFoundError:
text = ""
marker = "<!-- please don't remove this comment (used by EarwigBot) -->"
if marker not in text:
log = u"The marker ({0}) wasn't found in the volunteer list at [[{1}]]!"
self.logger.error(log.format(marker, page.title))
return
text = text.split(marker)[1]
additions = set()
for line in text.splitlines():
user = re.search("\# \{\{User\|(.+?)\}\}", line)
if user:
uname = user.group(1).replace("_", " ").strip()
additions.add((uname[0].upper() + uname[1:],))

removals = set()
query1 = "SELECT volunteer_username FROM volunteers"
query2 = "DELETE FROM volunteers WHERE volunteer_username = ?"
query3 = "INSERT INTO volunteers (volunteer_username) VALUES (?)"
with conn.cursor() as cursor:
cursor.execute(query1)
for row in cursor:
if row in additions:
additions.remove(row)
else:
removals.add(row)
if removals:
cursor.executemany(query2, removals)
if additions:
cursor.executemany(query3, additions)

def read_database(self, conn):
"""Return a list of _Cases from the database."""
cases = []
query = "SELECT * FROM cases"
with conn.cursor() as cursor:
cursor.execute(query)
for row in cursor:
case = _Case(*row)
cases.append(case)
log = "Read {0} cases from the database"
self.logger.debug(log.format(len(cases)))
return cases

def read_page(self, conn, cases, text):
"""Read the noticeboard content and update the list of _Cases."""
nextid = self.select_next_id(conn)
tl_status_esc = re.escape(self.tl_status)
split = re.split("(^==\s*[^=]+?\s*==$)", text, flags=re.M|re.U)
for i in xrange(len(split)):
if i + 1 == len(split):
break
if not split[i].startswith("=="):
continue
title = split[i][2:-2].strip()
body = old = split[i + 1]
if not re.search("\s*\{\{" + tl_status_esc, body, re.U):
continue
status = self.read_status(body)
re_id = "<!-- Bot Case ID \(please don't modify\): (.*?) -->"
try:
id_ = int(re.search(re_id, body).group(1))
case = [case for case in cases if case.id == id_][0]
except (AttributeError, IndexError, ValueError):
id_ = nextid
nextid += 1
re_id2 = "(\{\{" + tl_status_esc
re_id2 += r"(.*?)\}\})(<!-- Bot Case ID \(please don't modify\): .*? -->)?"
repl = ur"\1 <!-- Bot Case ID (please don't modify): {0} -->"
body = re.sub(re_id2, repl.format(id_), body)
re_f = r"\{\{drn filing editor\|(.*?)\|"
re_f += r"(\d{2}:\d{2},\s\d{1,2}\s\w+\s\d{4}\s\(UTC\))\}\}"
match = re.search(re_f, body, re.U)
if match:
f_user = match.group(1).split("/", 1)[0].replace("_", " ")
f_user = f_user[0].upper() + f_user[1:]
strp = "%H:%M, %d %B %Y (UTC)"
f_time = datetime.strptime(match.group(2), strp)
else:
f_user, f_time = None, datetime.utcnow()
case = _Case(id_, title, status, self.STATUS_UNKNOWN, f_user,
f_time, f_user, f_time, "", self.min_ts,
self.min_ts, False, False, False, len(body),
new=True)
cases.append(case)
log = u"Added new case {0} ('{1}', status={2}, by {3})"
self.logger.debug(log.format(id_, title, status, f_user))
else:
case.status = status
log = u"Read active case {0} ('{1}')".format(id_, title)
self.logger.debug(log)
if case.title != title:
self.update_case_title(conn, id_, title)
case.title = title
case.body, case.old = body, old

for case in cases[:]:
if case.body is None:
if case.original_status == self.STATUS_UNKNOWN:
cases.remove(case) # Ignore archived case
else:
case.status = self.STATUS_UNKNOWN
log = u"Dropped case {0} because it is no longer on the page ('{1}')"
self.logger.debug(log.format(case.id, case.title))

self.logger.debug("Done reading cases from the noticeboard page")

def select_next_id(self, conn):
"""Return the next incremental ID for a case."""
query = "SELECT MAX(case_id) FROM cases"
with conn.cursor() as cursor:
cursor.execute(query)
current = cursor.fetchone()[0]
if current:
return int(current) + 1
return 1

def read_status(self, body):
"""Parse the current status from a case body."""
templ = re.escape(self.tl_status)
status = re.search("\{\{" + templ + "\|?(.*?)\}\}", body, re.S|re.U)
if not status:
return self.STATUS_NEW
for option, names in self.ALIASES.iteritems():
if status.group(1).lower() in names:
return option
return self.STATUS_NEW

def update_case_title(self, conn, id_, title):
"""Update a case title in the database."""
query = "UPDATE cases SET case_title = ? WHERE case_id = ?"
with conn.cursor() as cursor:
cursor.execute(query, (title, id_))
log = u"Updated title of case {0} to '{1}'".format(id_, title)
self.logger.debug(log)

def clerk(self, conn, cases):
"""Actually go through cases and modify those to be updated."""
query = "SELECT volunteer_username FROM volunteers"
with conn.cursor() as cursor:
cursor.execute(query)
volunteers = [name for (name,) in cursor.fetchall()]
notices = []
for case in cases:
log = u"Clerking case {0} ('{1}')".format(case.id, case.title)
self.logger.debug(log)
if case.status == self.STATUS_UNKNOWN:
self.save_existing_case(conn, case)
else:
notices += self.clerk_case(conn, case, volunteers)
self.logger.debug("Done clerking cases")
return notices

def clerk_case(self, conn, case, volunteers):
"""Clerk a particular case and return a list of any notices to send."""
notices = []
signatures = self.read_signatures(case.body)
storedsigs = self.get_signatures_from_db(conn, case)
newsigs = set(signatures) - set(storedsigs)
if any([editor in volunteers for (editor, timestamp) in newsigs]):
case.last_volunteer_size = len(case.body)

if case.status == self.STATUS_NEW:
notices = self.clerk_new_case(case, volunteers, signatures)
elif case.status == self.STATUS_OPEN:
notices = self.clerk_open_case(case, signatures)
elif case.status == self.STATUS_NEEDASSIST:
notices = self.clerk_needassist_case(case, volunteers, newsigs)
elif case.status == self.STATUS_STALE:
notices = self.clerk_stale_case(case, newsigs)
elif case.status == self.STATUS_REVIEW:
notices = self.clerk_review_case(case)
elif case.status in [self.STATUS_RESOLVED, self.STATUS_CLOSED]:
self.clerk_closed_case(case, signatures)
self.save_case_updates(conn, case, volunteers, signatures, storedsigs)
return notices

def clerk_new_case(self, case, volunteers, signatures):
"""Clerk a case in the "brand new" state.

The case will be set to "open" if a volunteer edits it, or "needassist"
if it increases by over 15,000 bytes or goes by without any volunteer
edits for two days.
"""
notices = self.notify_parties(case)
if any([editor in volunteers for (editor, timestamp) in signatures]):
self.update_status(case, self.STATUS_OPEN)
else:
age = (datetime.utcnow() - case.file_time).total_seconds()
if age > 60 * 60 * 24 * 2:
self.update_status(case, self.STATUS_NEEDASSIST)
elif len(case.body) - case.last_volunteer_size > 15000:
self.update_status(case, self.STATUS_NEEDASSIST)
return notices

def clerk_open_case(self, case, signatures):
"""Clerk an open case (has been edited by a reviewer).

The case will be set to "needassist" if 15,000 bytes have been added
since a volunteer last edited, "stale" if no edits have occured in two
days, or "review" if it has been open for over four days.
"""
if self.check_for_review(case):
return []
if len(case.body) - case.last_volunteer_size > 15000:
self.update_status(case, self.STATUS_NEEDASSIST)
timestamps = [timestamp for (editor, timestamp) in signatures]
if timestamps:
age = (datetime.utcnow() - max(timestamps)).total_seconds()
if age > 60 * 60 * 24 * 2:
self.update_status(case, self.STATUS_STALE)
return []

def clerk_needassist_case(self, case, volunteers, newsigs):
"""Clerk a "needassist" case (no volunteer edits in 15,000 bytes).

The case will be set to "open" if a volunteer edits, or "review" if it
has been open for over four days.
"""
if self.check_for_review(case):
return []
if any([editor in volunteers for (editor, timestamp) in newsigs]):
self.update_status(case, self.STATUS_OPEN)
return []

def clerk_stale_case(self, case, newsigs):
"""Clerk a stale case (no edits in two days).

The case will be set to "open" if anyone edits, or "review" if it has
been open for over four days.
"""
if self.check_for_review(case):
return []
if newsigs:
self.update_status(case, self.STATUS_OPEN)
return []

def clerk_review_case(self, case):
"""Clerk a "review" case (open for more than four days).

A message will be set to the "very old notifiee", which is generally
[[User talk:Szhang (WMF)]], if the case has been open for more than
five days.
"""
age = (datetime.utcnow() - case.file_time).total_seconds()
if age > 60 * 60 * 24 * 5:
if not case.very_old_notified:
tmpl = self.tl_notify_stale
title = case.title.replace("|", "&#124;")
template = "{{subst:" + tmpl + "|" + title + "}}"
miss = "<!-- Template:DRN stale notice | {0} -->".format(title)
notice = _Notice(self.very_old_title, template, miss)
case.very_old_notified = True
msg = u" {0}: will notify [[{1}]] with '{2}'"
log = msg.format(case.id, self.very_old_title, template)
self.logger.debug(log)
return [notice]
return []

def clerk_closed_case(self, case, signatures):
"""Clerk a closed or resolved case.

The case will be archived if it has been closed/resolved for more than
one day and no edits have been made in the meantime. "Archiving" is
the process of adding {{DRN archive top}}, {{DRN archive bottom}}, and
removing the [[User:DoNotArchiveUntil]] comment.
"""
if case.close_time == self.min_ts:
case.close_time = datetime.utcnow()
if case.archived:
return
timestamps = [timestamp for (editor, timestamp) in signatures]
closed_age = (datetime.utcnow() - case.close_time).total_seconds()
if timestamps:
modify_age = (datetime.utcnow() - max(timestamps)).total_seconds()
else:
modify_age = 0
if closed_age > 60 * 60 * 24 and modify_age > 60 * 60 * 24:
arch_top = self.tl_archive_top
arch_bottom = self.tl_archive_bottom
reg = "<!-- \[\[User:DoNotArchiveUntil\]\] .*? -->(<!-- .*? -->)?"
if re.search(reg, case.body):
case.body = re.sub("\{\{" + arch_top + "\}\}", "", case.body)
case.body = re.sub(reg, "{{" + arch_top + "}}", case.body)
if not re.search(arch_bottom + "\s*\}\}\s*\Z", case.body):
case.body += "\n{{" + arch_bottom + "}}"
case.archived = True
self.logger.debug(u" {0}: archived case".format(case.id))

def check_for_review(self, case):
"""Check whether a case is old enough to be set to "review"."""
age = (datetime.utcnow() - case.file_time).total_seconds()
if age > 60 * 60 * 24 * 4:
self.update_status(case, self.STATUS_REVIEW)
return True
return False

def update_status(self, case, new):
"""Safely update the status of a case, so we don't edit war."""
old_n = self.ALIASES[case.status][0].upper()
new_n = self.ALIASES[new][0].upper()
old_n = "NEW" if not old_n else old_n
new_n = "NEW" if not new_n else new_n
if case.last_action != new:
case.status = new
log = u" {0}: {1} -> {2}"
self.logger.debug(log.format(case.id, old_n, new_n))
return
log = u"Avoiding {0} {1} -> {2} because we already did this ('{3}')"
self.logger.info(log.format(case.id, old_n, new_n, case.title))

def read_signatures(self, text):
"""Return a list of all parseable signatures in the body of a case.

Signatures are returned as tuples of (editor, timestamp as datetime).
"""
regex = r"\[\[(?:User(?:\stalk)?\:|Special\:Contributions\/)"
regex += r"([^\n\[\]|]{,256}?)(?:\||\]\])"
regex += r"(?!.*?(?:User(?:\stalk)?\:|Special\:Contributions\/).*?)"
regex += r".{,256}?(\d{2}:\d{2},\s\d{1,2}\s\w+\s\d{4}\s\(UTC\))"
matches = re.findall(regex, text, re.U|re.I)
signatures = []
for userlink, stamp in matches:
username = userlink.split("/", 1)[0].replace("_", " ").strip()
username = username[0].upper() + username[1:]
if username == "DoNotArchiveUntil":
continue
stamp = stamp.strip()
timestamp = datetime.strptime(stamp, "%H:%M, %d %B %Y (UTC)")
signatures.append((username, timestamp))
return signatures

def get_signatures_from_db(self, conn, case):
"""Return a list of signatures in a case from the database.

The return type is the same as read_signatures().
"""
query = "SELECT signature_username, signature_timestamp FROM signatures WHERE signature_case = ?"
with conn.cursor() as cursor:
cursor.execute(query, (case.id,))
return cursor.fetchall()

def notify_parties(self, case):
"""Schedule notices to be sent to all parties of a case."""
if case.parties_notified:
return []

notices = []
template = "{{subst:" + self.tl_notify_party
template += "|thread=" + case.title + "}} ~~~~"
too_late = "<!--Template:DRN-notice-->"

re_parties = "<span.*?>'''Users involved'''</span>(.*?)<span.*?>"
text = re.search(re_parties, case.body, re.S|re.U)
for line in text.group(1).splitlines():
user = re.search("[:*#]{,5} \{\{User\|(.*?)\}\}", line)
if user:
party = user.group(1).replace("_", " ").strip()
if party:
party = party[0].upper() + party[1:]
if party == case.file_user:
continue
notice = _Notice("User talk:" + party, template, too_late)
notices.append(notice)

case.parties_notified = True
log = u" {0}: will try to notify {1} parties with '{2}'"
self.logger.debug(log.format(case.id, len(notices), template))
return notices

def save_case_updates(self, conn, case, volunteers, sigs, storedsigs):
"""Save any updates made to a case and signatures in the database."""
if case.status != case.original_status:
case.last_action = case.status
new = self.ALIASES[case.status][0]
tl_status_esc = re.escape(self.tl_status)
search = "\{\{" + tl_status_esc + "(\|?.*?)\}\}"
repl = "{{" + self.tl_status + "|" + new + "}}"
case.body = re.sub(search, repl, case.body)

if sigs:
newest_ts = max([stamp for (user, stamp) in sigs])
newest_user = [usr for (usr, stamp) in sigs if stamp == newest_ts][0]
case.modify_time = newest_ts
case.modify_user = newest_user

if any([usr in volunteers for (usr, stamp) in sigs]):
newest_vts = max([stamp for (usr, stamp) in sigs if usr in volunteers])
newest_vuser = [usr for (usr, stamp) in sigs if stamp == newest_vts][0]
case.volunteer_time = newest_vts
case.volunteer_user = newest_vuser

if case.new:
self.save_new_case(conn, case)
else:
self.save_existing_case(conn, case)

with conn.cursor() as cursor:
query1 = "DELETE FROM signatures WHERE signature_case = ? AND signature_username = ? AND signature_timestamp = ?"
query2 = "INSERT INTO signatures (signature_case, signature_username, signature_timestamp) VALUES (?, ?, ?)"
removals = set(storedsigs) - set(sigs)
additions = set(sigs) - set(storedsigs)
if removals:
args = [(case.id, name, stamp) for (name, stamp) in removals]
cursor.executemany(query1, args)
if additions:
args = []
for name, stamp in additions:
args.append((case.id, name, stamp))
cursor.executemany(query2, args)
msg = u" {0}: added {1} signatures and removed {2}"
log = msg.format(case.id, len(additions), len(removals))
self.logger.debug(log)

def save_new_case(self, conn, case):
"""Save a brand new case to the database."""
args = (case.id, case.title, case.status, case.last_action,
case.file_user, case.file_time, case.modify_user,
case.modify_time, case.volunteer_user, case.volunteer_time,
case.close_time, case.parties_notified,
case.very_old_notified, case.archived,
case.last_volunteer_size)
with conn.cursor() as cursor:
query = "INSERT INTO cases VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
cursor.execute(query, args)
log = u" {0}: inserted new case into database".format(case.id)
self.logger.debug(log)

def save_existing_case(self, conn, case):
"""Save an existing case to the database, updating as necessary."""
with conn.cursor(oursql.DictCursor) as cursor:
query = "SELECT * FROM cases WHERE case_id = ?"
cursor.execute(query, (case.id,))
stored = cursor.fetchone()

with conn.cursor() as cursor:
changes, args = [], []
fields_to_check = [
("case_status", case.status),
("case_last_action", case.last_action),
("case_file_user", case.file_user),
("case_file_time", case.file_time),
("case_modify_user", case.modify_user),
("case_modify_time", case.modify_time),
("case_volunteer_user", case.volunteer_user),
("case_volunteer_time", case.volunteer_time),
("case_close_time", case.close_time),
("case_parties_notified", case.parties_notified),
("case_very_old_notified", case.very_old_notified),
("case_archived", case.archived),
("case_last_volunteer_size", case.last_volunteer_size)
]
for column, data in fields_to_check:
if data != stored[column]:
changes.append(column + " = ?")
args.append(data)
msg = u" {0}: will alter {1} ('{2}' -> '{3}')"
log = msg.format(case.id, column, stored[column], data)
self.logger.debug(log)
if changes:
changes = ", ".join(changes)
args.append(case.id)
query = "UPDATE cases SET {0} WHERE case_id = ?".format(changes)
cursor.execute(query, args)
else:
log = u" {0}: no changes to commit".format(case.id)
self.logger.debug(log)

def save(self, page, cases, kwargs, start):
"""Save any changes to the noticeboard."""
newtext = text = page.get()
counter = 0
for case in cases:
if case.old != case.body:
newtext = newtext.replace(case.old, case.body)
counter += 1
if newtext == text:
self.logger.info(u"Nothing to edit on [[{0}]]".format(page.title))
return True

worktime = time() - start
if worktime < 60:
log = "Waiting {0} seconds to avoid edit conflicts"
self.logger.debug(log.format(int(60 - worktime)))
sleep(60 - worktime)
page.reload()
if page.get() != text:
log = "Someone has edited the page while we were working; restarting"
self.logger.warn(log)
self.run(**kwargs)
return False
summary = self.clerk_summary.replace("$3", str(counter))
summary = summary.replace("$4", "" if counter == 1 else "s")
page.edit(newtext, summary, minor=True, bot=True)
log = u"Saved page [[{0}]] ({1} updates)"
self.logger.info(log.format(page.title, counter))
return True

def send_notices(self, site, notices):
"""Send out any templated notices to users or pages."""
if not notices:
self.logger.info("No notices to send")
return
for notice in notices:
target, template = notice.target, notice.template
log = u"Trying to notify [[{0}]] with '{1}'"
self.logger.debug(log.format(target, template))
page = site.get_page(target)
if page.namespace == constants.NS_USER_TALK:
user = site.get_user(target.split(":", 1)[1])
if not user.exists and not user.is_ip:
log = u"Skipping [[{0}]]; user does not exist and is not an IP"
self.logger.info(log.format(target))
continue
try:
text = page.get()
except exceptions.PageNotFoundError:
text = ""
if notice.too_late and notice.too_late in text:
log = u"Skipping [[{0}]]; was already notified with '{1}'"
self.logger.info(log.format(page.title, template))
continue
text += ("\n" if text else "") + template
try:
page.edit(text, self.notify_summary, minor=False, bot=True)
except exceptions.EditError as error:
name, msg = type(error).name, error.message
log = u"Couldn't leave notice on [[{0}]] because of {1}: {2}"
self.logger.error(log.format(page.title, name, msg))
else:
log = u"Notified [[{0}]] with '{1}'"
self.logger.info(log.format(page.title, template))

self.logger.debug("Done sending notices")

def update_chart(self, conn, site):
"""Update the chart of open or recently closed cases."""
page = site.get_page(self.chart_title)
self.logger.info(u"Updating case status at [[{0}]]".format(page.title))
statuses = self.compile_chart(conn)
text = page.get()
newtext = re.sub(u"<!-- status begin -->(.*?)<!-- status end -->",
"<!-- status begin -->\n" + statuses + "\n<!-- status end -->",
text, flags=re.DOTALL)
if newtext == text:
self.logger.info("Chart unchanged; not saving")
return

newtext = re.sub("<!-- sig begin -->(.*?)<!-- sig end -->",
"<!-- sig begin -->~~~ at ~~~~~<!-- sig end -->",
newtext)
page.edit(newtext, self.chart_summary, minor=True, bot=True)
self.logger.info(u"Chart saved to [[{0}]]".format(page.title))

def compile_chart(self, conn):
"""Actually generate the chart from the database."""
chart = "{{" + self.tl_chart_header + "|small={{{small|}}}}}\n"
query = "SELECT * FROM cases WHERE case_status != ?"
with conn.cursor(oursql.DictCursor) as cursor:
cursor.execute(query, (self.STATUS_UNKNOWN,))
for case in cursor:
chart += self.compile_row(case)
chart += "{{" + self.tl_chart_footer + "|small={{{small|}}}}}"
return chart

def compile_row(self, case):
"""Generate a single row of the chart from a dict via the database."""
data = u"|t={case_title}|d={title}|s={case_status}"
data += "|cu={case_file_user}|cs={file_sortkey}|ct={file_time}"
if case["case_volunteer_user"]:
data += "|vu={case_volunteer_user}|vs={volunteer_sortkey}|vt={volunteer_time}"
case["volunteer_time"] = self.format_time(case["case_volunteer_time"])
case["volunteer_sortkey"] = int(mktime(case["case_volunteer_time"].timetuple()))
data += "|mu={case_modify_user}|ms={modify_sortkey}|mt={modify_time}"

title = case["case_title"].replace("_", " ").replace("|", "&#124;")
case["title"] = title[:47] + "..." if len(title) > 50 else title
case["file_time"] = self.format_time(case["case_file_time"])
case["file_sortkey"] = int(mktime(case["case_file_time"].timetuple()))
case["modify_time"] = self.format_time(case["case_modify_time"])
case["modify_sortkey"] = int(mktime(case["case_modify_time"].timetuple()))
row = "{{" + self.tl_chart_row + data.format(**case)
return row + "|sm={{{small|}}}}}\n"

def format_time(self, dt):
"""Return a string telling the time since datetime occured."""
parts = [("year", 31536000), ("day", 86400), ("hour", 3600)]
seconds = int((datetime.utcnow() - dt).total_seconds())
msg = []
for name, size in parts:
num = seconds // size
seconds -= num * size
if num:
chunk = "{0} {1}".format(num, name if num == 1 else name + "s")
msg.append(chunk)
return ", ".join(msg) + " ago" if msg else "0 hours ago"

def purge_old_data(self, conn):
"""Delete old cases (> six months) from the database."""
log = "Purging closed cases older than six months from the database"
self.logger.info(log)
query = """DELETE cases, signatures
FROM cases JOIN signatures ON case_id = signature_case
WHERE case_status = ?
AND case_file_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 180 DAY)
AND case_modify_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 180 DAY)
"""
with conn.cursor() as cursor:
cursor.execute(query, (self.STATUS_UNKNOWN,))


class _Case(object):
"""A object representing a dispute resolution case."""
def __init__(self, id_, title, status, last_action, file_user, file_time,
modify_user, modify_time, volunteer_user, volunteer_time,
close_time, parties_notified, archived, very_old_notified,
last_volunteer_size, new=False):
self.id = id_
self.title = title
self.status = status
self.last_action = last_action
self.file_user = file_user
self.file_time = file_time
self.modify_user = modify_user
self.modify_time = modify_time
self.volunteer_user = volunteer_user
self.volunteer_time = volunteer_time
self.close_time = close_time
self.parties_notified = parties_notified
self.very_old_notified = very_old_notified
self.archived = archived
self.last_volunteer_size = last_volunteer_size
self.new = new

self.original_status = status
self.body = None
self.old = None


class _Notice(object):
"""An object representing a notice to be sent to a user or a page."""
def __init__(self, target, template, too_late=None):
self.target = target
self.template = template
self.too_late = too_late

+ 33
- 0
tasks/image_display_resize.py 查看文件

@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from earwigbot.tasks import Task

class ImageDisplayResize(Task):
"""A task to resize upscaled portraits in infoboxes."""
name = "image_display_resize"

def setup(self):
pass

def run(self, **kwargs):
pass

+ 36
- 0
tasks/schema/afc_copyvios.sql 查看文件

@@ -0,0 +1,36 @@
-- MySQL dump 10.13 Distrib 5.5.12, for solaris10 (i386)
--
-- Host: sql Database: u_earwig_afc_copyvios
-- ------------------------------------------------------
-- Server version 5.1.59

CREATE DATABASE `u_earwig_afc_copyvios`
DEFAULT CHARACTER SET utf8
DEFAULT COLLATE utf8_unicode_ci;

--
-- Table structure for table `cache`
--

DROP TABLE IF EXISTS `cache`;
CREATE TABLE `cache` (
`cache_id` int(10) unsigned NOT NULL,
`cache_hash` char(64) COLLATE utf8_unicode_ci DEFAULT NULL,
`cache_url` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL,
`cache_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
`cache_queries` int(4) DEFAULT NULL,
`cache_process_time` float DEFAULT NULL,
PRIMARY KEY (`cache_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

--
-- Table structure for table `processed`
--

DROP TABLE IF EXISTS `processed`;
CREATE TABLE `processed` (
`page_id` int(10) unsigned NOT NULL,
PRIMARY KEY (`page_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

-- Dump completed on 2012-07-20 20:21:00

+ 23
- 0
tasks/schema/afc_history.sql 查看文件

@@ -0,0 +1,23 @@
-- MySQL dump 10.13 Distrib 5.5.12, for solaris10 (i386)
--
-- Host: sql Database: u_earwig_afc_history
-- ------------------------------------------------------
-- Server version 5.1.59

CREATE DATABASE `u_earwig_afc_history`
DEFAULT CHARACTER SET utf8
DEFAULT COLLATE utf8_unicode_ci;

--
-- Table structure for table `page`
--

DROP TABLE IF EXISTS `page`;
CREATE TABLE `page` (
`page_id` int(10) unsigned NOT NULL,
`page_date` varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL,
`page_status` tinyint(3) unsigned DEFAULT NULL,
PRIMARY KEY (`page_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

-- Dump completed on 2012-07-20 20:20:39

+ 68
- 0
tasks/schema/afc_statistics.sql 查看文件

@@ -0,0 +1,68 @@
-- MySQL dump 10.13 Distrib 5.5.12, for solaris10 (i386)
--
-- Host: sql Database: u_earwig_afc_statistics
-- ------------------------------------------------------
-- Server version 5.1.59

CREATE DATABASE `u_earwig_afc_statistics`
DEFAULT CHARACTER SET utf8
DEFAULT COLLATE utf8_unicode_ci;

--
-- Table structure for table `chart`
--

DROP TABLE IF EXISTS `chart`;
CREATE TABLE `chart` (
`chart_id` tinyint(3) unsigned NOT NULL AUTO_INCREMENT,
`chart_title` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`chart_special_title` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
PRIMARY KEY (`chart_id`)
) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

--
-- Dumping data for table `chart`
--

LOCK TABLES `chart` WRITE;
INSERT INTO `chart` VALUES
(1,'Pending submissions','Submitted'),
(3,'Being reviewed','Reviewer'),
(4,'Recently accepted','Accepted'),
(5,'Recently declined','Declined'),
(6,'Misplaced submissions','Created');
UNLOCK TABLES;

--
-- Table structure for table `row`
--

DROP TABLE IF EXISTS `row`;
CREATE TABLE `row` (
`row_id` int(10) unsigned NOT NULL,
`row_chart` tinyint(3) unsigned DEFAULT NULL,
PRIMARY KEY (`row_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

--
-- Table structure for table `page`
--

DROP TABLE IF EXISTS `page`;
CREATE TABLE `page` (
`page_id` int(10) unsigned NOT NULL,
`page_status` varchar(16) COLLATE utf8_unicode_ci DEFAULT NULL,
`page_title` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL,
`page_short` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL,
`page_size` varchar(16) COLLATE utf8_unicode_ci DEFAULT NULL,
`page_notes` tinytext COLLATE utf8_unicode_ci,
`page_modify_user` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`page_modify_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
`page_modify_oldid` int(10) unsigned DEFAULT NULL,
`page_special_user` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`page_special_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
`page_special_oldid` int(10) unsigned DEFAULT NULL,
PRIMARY KEY (`page_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

-- Dump completed on 2012-07-20 20:25:10

+ 59
- 0
tasks/schema/drn_clerkbot.sql 查看文件

@@ -0,0 +1,59 @@
-- MySQL dump 10.13 Distrib 5.5.12, for solaris10 (i386)
--
-- Host: sql Database: u_earwig_drn_clerkbot
-- ------------------------------------------------------
-- Server version 5.1.59

CREATE DATABASE `u_earwig_drn_clerkbot`
DEFAULT CHARACTER SET utf8
DEFAULT COLLATE utf8_unicode_ci;

--
-- Table structure for table `case`
--

DROP TABLE IF EXISTS `cases`;
CREATE TABLE `cases` (
`case_id` int(10) unsigned NOT NULL,
`case_title` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL,
`case_status` int(2) unsigned DEFAULT NULL,
`case_last_action` int(2) unsigned DEFAULT NULL,
`case_file_user` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL,
`case_file_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
`case_modify_user` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL,
`case_modify_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
`case_volunteer_user` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL,
`case_volunteer_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
`case_close_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
`case_parties_notified` tinyint(1) unsigned DEFAULT NULL,
`case_very_old_notified` tinyint(1) unsigned DEFAULT NULL,
`case_archived` tinyint(1) unsigned DEFAULT NULL,
`case_last_volunteer_size` int(9) unsigned DEFAULT NULL,
PRIMARY KEY (`case_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

--
-- Table structure for table `signature`
--

DROP TABLE IF EXISTS `signatures`;
CREATE TABLE `signatures` (
`signature_id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`signature_case` int(10) unsigned NOT NULL,
`signature_username` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL,
`signature_timestamp` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
PRIMARY KEY (`signature_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

--
-- Table structure for table `volunteer`
--

DROP TABLE IF EXISTS `volunteers`;
CREATE TABLE `volunteers` (
`volunteer_id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`volunteer_username` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL,
PRIMARY KEY (`volunteer_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

-- Dump completed on 2012-07-31 1:34:28

正在加载...
取消
保存