From 36c32b0d99b5e809920a69996166f77dad877be0 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 23 Aug 2012 17:39:49 -0400 Subject: [PATCH] Converting a number of commands/tasks to plugins. --- earwigbot/commands/afc_pending.py | 34 -- earwigbot/commands/afc_report.py | 113 ----- earwigbot/commands/afc_status.py | 162 ------ earwigbot/commands/afc_submissions.py | 59 --- earwigbot/commands/geolocate.py | 74 --- earwigbot/commands/git_command.py | 240 --------- earwigbot/commands/praise.py | 48 -- earwigbot/tasks/afc_catdelink.py | 34 -- earwigbot/tasks/afc_copyvios.py | 164 ------- earwigbot/tasks/afc_dailycats.py | 34 -- earwigbot/tasks/afc_history.py | 227 --------- earwigbot/tasks/afc_statistics.py | 739 ---------------------------- earwigbot/tasks/afc_undated.py | 33 -- earwigbot/tasks/blp_tag.py | 34 -- earwigbot/tasks/drn_clerkbot.py | 787 ------------------------------ earwigbot/tasks/image_display_resize.py | 33 -- earwigbot/tasks/schema/afc_copyvios.sql | 36 -- earwigbot/tasks/schema/afc_history.sql | 23 - earwigbot/tasks/schema/afc_statistics.sql | 68 --- earwigbot/tasks/schema/drn_clerkbot.sql | 59 --- 20 files changed, 3001 deletions(-) delete mode 100644 earwigbot/commands/afc_pending.py delete mode 100644 earwigbot/commands/afc_report.py delete mode 100644 earwigbot/commands/afc_status.py delete mode 100644 earwigbot/commands/afc_submissions.py delete mode 100644 earwigbot/commands/geolocate.py delete mode 100644 earwigbot/commands/git_command.py delete mode 100644 earwigbot/commands/praise.py delete mode 100644 earwigbot/tasks/afc_catdelink.py delete mode 100644 earwigbot/tasks/afc_copyvios.py delete mode 100644 earwigbot/tasks/afc_dailycats.py delete mode 100644 earwigbot/tasks/afc_history.py delete mode 100644 earwigbot/tasks/afc_statistics.py delete mode 100644 earwigbot/tasks/afc_undated.py delete mode 100644 earwigbot/tasks/blp_tag.py delete mode 100644 earwigbot/tasks/drn_clerkbot.py delete mode 100644 earwigbot/tasks/image_display_resize.py delete mode 100644 earwigbot/tasks/schema/afc_copyvios.sql delete mode 100644 earwigbot/tasks/schema/afc_history.sql delete mode 100644 earwigbot/tasks/schema/afc_statistics.sql delete mode 100644 earwigbot/tasks/schema/drn_clerkbot.sql diff --git a/earwigbot/commands/afc_pending.py b/earwigbot/commands/afc_pending.py deleted file mode 100644 index 32b1f5d..0000000 --- a/earwigbot/commands/afc_pending.py +++ /dev/null @@ -1,34 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from earwigbot.commands import Command - -class AFCPending(Command): - """Link the user to the pending AFC submissions page and category.""" - name = "pending" - commands = ["pending", "pend"] - - def process(self, data): - msg1 = "Pending submissions status page: http://enwp.org/WP:AFC/ST" - msg2 = "Pending submissions category: http://enwp.org/CAT:PEND" - self.reply(data, msg1) - self.reply(data, msg2) diff --git a/earwigbot/commands/afc_report.py b/earwigbot/commands/afc_report.py deleted file mode 100644 index 14ec082..0000000 --- a/earwigbot/commands/afc_report.py +++ /dev/null @@ -1,113 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from earwigbot import wiki -from earwigbot.commands import Command - -class AFCReport(Command): - """Get information about an AFC submission by name.""" - name = "report" - - def process(self, data): - self.site = self.bot.wiki.get_site() - self.data = data - - try: - self.statistics = self.bot.tasks.get("afc_statistics") - except KeyError: - e = "Cannot run command: requires afc_statistics task (from earwigbot_plugins)" - self.logger.error(e) - msg = "command requires afc_statistics task (from earwigbot_plugins)" - self.reply(data, msg) - return - - if not data.args: - msg = "What submission do you want me to give information about?" - self.reply(data, msg) - return - - title = " ".join(data.args) - title = title.replace("http://en.wikipedia.org/wiki/", "") - title = title.replace("http://enwp.org/", "").strip() - - # Given '!report Foo', first try [[Foo]]: - page = self.get_page(title) - if page: - return self.report(page) - - # Then try [[Wikipedia:Articles for creation/Foo]]: - newtitle = "/".join(("Wikipedia:Articles for creation", title)) - page = self.get_page(newtitle) - if page: - return self.report(page) - - # Then try [[Wikipedia talk:Articles for creation/Foo]]: - newtitle = "/".join(("Wikipedia talk:Articles for creation", title)) - page = self.get_page(newtitle) - if page: - return self.report(page) - - self.reply(data, "Submission \x0302{0}\x0F not found.".format(title)) - - def get_page(self, title): - page = self.site.get_page(title, follow_redirects=False) - if page.exists == page.PAGE_EXISTS: - return page - - def report(self, page): - url = page.url.encode("utf8") - url = url.replace("en.wikipedia.org/wiki", "enwp.org") - short = self.statistics.get_short_title(page.title) - status = self.get_status(page) - user = page.get_creator() - user_name = user.name - user_url = user.get_talkpage().url.encode("utf8") - - msg1 = "AfC submission report for \x0302{0}\x0F ({1}):" - msg2 = "Status: \x0303{0}\x0F" - msg3 = "Submitted by \x0302{0}\x0F ({1})" - if status == "accepted": - msg3 = "Reviewed by \x0302{0}\x0F ({1})" - - self.reply(self.data, msg1.format(short, url)) - self.say(self.data.chan, msg2.format(status)) - self.say(self.data.chan, msg3.format(user_name, user_url)) - - def get_status(self, page): - if page.is_redirect: - target = page.get_redirect_target() - if self.site.get_page(target).namespace == wiki.NS_MAIN: - return "accepted" - return "redirect" - - statuses = self.statistics.get_statuses(page.get()) - if "R" in statuses: - return "being reviewed" - elif "H" in statuses: - return "pending draft" - elif "P" in statuses: - return "pending submission" - elif "T" in statuses: - return "unsubmitted draft" - elif "D" in statuses: - return "declined" - return "unkown" diff --git a/earwigbot/commands/afc_status.py b/earwigbot/commands/afc_status.py deleted file mode 100644 index 168c719..0000000 --- a/earwigbot/commands/afc_status.py +++ /dev/null @@ -1,162 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import re - -from earwigbot.commands import Command - -class AFCStatus(Command): - """Get the number of pending AfC submissions, open redirect requests, and - open file upload requests.""" - name = "status" - commands = ["status", "count", "num", "number"] - hooks = ["join", "msg"] - - def check(self, data): - if data.is_command and data.command in self.commands: - return True - try: - if data.line[1] == "JOIN" and data.chan == "#wikipedia-en-afc": - if data.nick != self.config.irc["frontend"]["nick"]: - return True - except IndexError: - pass - return False - - def process(self, data): - self.site = self.bot.wiki.get_site() - - if data.line[1] == "JOIN": - status = " ".join(("\x02Current status:\x0F", self.get_status())) - self.notice(data.nick, status) - return - - if data.args: - action = data.args[0].lower() - if action.startswith("sub") or action == "s": - subs = self.count_submissions() - msg = "There are \x0305{0}\x0F pending AfC submissions (\x0302WP:AFC\x0F)." - self.reply(data, msg.format(subs)) - - elif action.startswith("redir") or action == "r": - redirs = self.count_redirects() - msg = "There are \x0305{0}\x0F open redirect requests (\x0302WP:AFC/R\x0F)." - self.reply(data, msg.format(redirs)) - - elif action.startswith("file") or action == "f": - files = self.count_redirects() - msg = "There are \x0305{0}\x0F open file upload requests (\x0302WP:FFU\x0F)." - self.reply(data, msg.format(files)) - - elif action.startswith("agg") or action == "a": - try: - agg_num = int(data.args[1]) - except IndexError: - agg_data = (self.count_submissions(), - self.count_redirects(), self.count_files()) - agg_num = self.get_aggregate_number(agg_data) - except ValueError: - msg = "\x0303{0}\x0F isn't a number!" - self.reply(data, msg.format(data.args[1])) - return - aggregate = self.get_aggregate(agg_num) - msg = "Aggregate is \x0305{0}\x0F (AfC {1})." - self.reply(data, msg.format(agg_num, aggregate)) - - elif action.startswith("nocolor") or action == "n": - self.reply(data, self.get_status(color=False)) - - else: - msg = "Unknown argument: \x0303{0}\x0F. Valid args are 'subs', 'redirs', 'files', 'agg', 'nocolor'." - self.reply(data, msg.format(data.args[0])) - - else: - self.reply(data, self.get_status()) - - def get_status(self, color=True): - subs = self.count_submissions() - redirs = self.count_redirects() - files = self.count_files() - agg_num = self.get_aggregate_number((subs, redirs, files)) - aggregate = self.get_aggregate(agg_num) - - if color: - msg = "Articles for creation {0} (\x0302AFC\x0F: \x0305{1}\x0F; \x0302AFC/R\x0F: \x0305{2}\x0F; \x0302FFU\x0F: \x0305{3}\x0F)." - else: - msg = "Articles for creation {0} (AFC: {1}; AFC/R: {2}; FFU: {3})." - return msg.format(aggregate, subs, redirs, files) - - def count_submissions(self): - """Returns the number of open AFC submissions (count of CAT:PEND).""" - # Subtract two for [[Wikipedia:Articles for creation/Redirects]] and - # [[Wikipedia:Files for upload]], which aren't real submissions: - return self.site.get_category("Pending AfC submissions").pages - 2 - - def count_redirects(self): - """Returns the number of open redirect submissions. Calculated as the - total number of submissions minus the closed ones.""" - title = "Wikipedia:Articles for creation/Redirects" - content = self.site.get_page(title).get() - total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE)) - closed = content.lower().count("{{afc-c|b}}") - redirs = total - closed - return redirs - - def count_files(self): - """Returns the number of open WP:FFU (Files For Upload) requests. - Calculated as the total number of requests minus the closed ones.""" - content = self.site.get_page("Wikipedia:Files for upload").get() - total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE)) - closed = content.lower().count("{{ifu-c|b}}") - files = total - closed - return files - - def get_aggregate(self, num): - """Returns a human-readable AFC status based on the number of pending - AFC submissions, open redirect requests, and open FFU requests. This - does not match {{AFC status}} directly because the algorithm factors in - WP:AFC/R and WP:FFU while the template only looks at the main - submissions. The reasoning is that AFC/R and FFU are still part of - the project, so even if there are no pending submissions, a backlog at - FFU (for example) indicates that our work is *not* done and the - project-wide backlog is most certainly *not* clear.""" - if num == 0: - return "is \x02\x0303clear\x0F" - elif num <= 200: - return "is \x0303almost clear\x0F" - elif num <= 400: - return "is \x0312normal\x0F" - elif num <= 600: - return "is \x0307lightly backlogged\x0F" - elif num <= 900: - return "is \x0304backlogged\x0F" - elif num <= 1200: - return "is \x02\x0304heavily backlogged\x0F" - else: - return "is \x02\x1F\x0304severely backlogged\x0F" - - def get_aggregate_number(self, (subs, redirs, files)): - """Returns an 'aggregate number' based on the real number of pending - submissions in CAT:PEND (subs), open redirect submissions in WP:AFC/R - (redirs), and open files-for-upload requests in WP:FFU (files).""" - num = subs + (redirs / 2) + (files / 2) - return num diff --git a/earwigbot/commands/afc_submissions.py b/earwigbot/commands/afc_submissions.py deleted file mode 100644 index 3c40774..0000000 --- a/earwigbot/commands/afc_submissions.py +++ /dev/null @@ -1,59 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from earwigbot.commands import Command - -class AFCSubmissions(Command): - """Link the user directly to some pending AFC submissions.""" - name = "submissions" - commands = ["submissions", "subs"] - - def setup(self): - try: - self.ignore_list = self.config.commands[self.name]["ignoreList"] - except KeyError: - try: - ignores = self.config.tasks["afc_statistics"]["ignoreList"] - self.ignore_list = ignores - except KeyError: - self.ignore_list = [] - - def process(self, data): - if data.args: - try: - number = int(data.args[0]) - except ValueError: - self.reply(data, "Argument must be a number.") - return - if number > 5: - msg = "Cannot get more than five submissions at a time." - self.reply(data, msg) - return - else: - number = 3 - - site = self.bot.wiki.get_site() - category = site.get_category("Pending AfC submissions") - members = category.get_members(limit=number + len(self.ignore_list)) - urls = [member.url.encode("utf8") for member in members if member.title not in self.ignore_list] - pages = ", ".join(urls[:number]) - self.reply(data, "{0} pending AfC subs: {1}".format(number, pages)) diff --git a/earwigbot/commands/geolocate.py b/earwigbot/commands/geolocate.py deleted file mode 100644 index 6bb8327..0000000 --- a/earwigbot/commands/geolocate.py +++ /dev/null @@ -1,74 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import json -import urllib2 - -from earwigbot.commands import Command - -class Geolocate(Command): - """Geolocate an IP address (via http://ipinfodb.com/).""" - name = "geolocate" - commands = ["geolocate", "locate", "geo", "ip"] - - def setup(self): - self.config.decrypt(self.config.commands, self.name, "apiKey") - try: - self.key = self.config.commands[self.name]["apiKey"] - except KeyError: - self.key = None - log = 'Cannot use without an API key for http://ipinfodb.com/ stored as config.commands["{0}"]["apiKey"]' - self.logger.warn(log.format(self.name)) - - def process(self, data): - if not data.args: - self.reply(data, "Please specify an IP to lookup.") - return - - if not self.key: - msg = 'I need an API key for http://ipinfodb.com/ stored as \x0303config.commands["{0}"]["apiKey"]\x0F.' - log = 'Need an API key for http://ipinfodb.com/ stored as config.commands["{0}"]["apiKey"]' - self.reply(data, msg.format(self.name) + ".") - self.logger.error(log.format(self.name)) - return - - address = data.args[0] - url = "http://api.ipinfodb.com/v3/ip-city/?key={0}&ip={1}&format=json" - query = urllib2.urlopen(url.format(self.key, address)).read() - res = json.loads(query) - - country = res["countryName"].title() - region = res["regionName"].title() - city = res["cityName"].title() - latitude = res["latitude"] - longitude = res["longitude"] - utcoffset = res["timeZone"] - if not country and not region and not city: - self.reply(data, "IP \x0302{0}\x0F not found.".format(address)) - return - if country == "-" and region == "-" and city == "-": - self.reply(data, "IP \x0302{0}\x0F is reserved.".format(address)) - return - - msg = "{0}, {1}, {2} ({3}, {4}), UTC {5}" - geo = msg.format(country, region, city, latitude, longitude, utcoffset) - self.reply(data, geo) diff --git a/earwigbot/commands/git_command.py b/earwigbot/commands/git_command.py deleted file mode 100644 index cc6cbe5..0000000 --- a/earwigbot/commands/git_command.py +++ /dev/null @@ -1,240 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import time - -import git - -from earwigbot.commands import Command - -class Git(Command): - """Commands to interface with the bot's git repository; use '!git' for a - sub-command list.""" - name = "git" - - def setup(self): - try: - self.repos = self.config.commands[self.name]["repos"] - except KeyError: - self.repos = None - - def process(self, data): - self.data = data - if not self.config.irc["permissions"].is_owner(data): - msg = "You must be a bot owner to use this command." - self.reply(data, msg) - return - if not data.args or data.args[0] == "help": - self.do_help() - return - if not self.repos: - self.reply(data, "No repos are specified in the config file.") - return - - command = data.args[0] - try: - repo_name = data.args[1] - except IndexError: - repos = self.get_repos() - msg = "Which repo do you want to work with (options are {0})?" - self.reply(data, msg.format(repos)) - return - if repo_name not in self.repos: - repos = self.get_repos() - msg = "Repository must be one of the following: {0}." - self.reply(data, msg.format(repos)) - return - self.repo = git.Repo(self.repos[repo_name]) - - if command == "branch": - self.do_branch() - elif command == "branches": - self.do_branches() - elif command == "checkout": - self.do_checkout() - elif command == "delete": - self.do_delete() - elif command == "pull": - self.do_pull() - elif command == "status": - self.do_status() - else: # They asked us to do something we don't know - msg = "Unknown argument: \x0303{0}\x0F.".format(data.args[0]) - self.reply(data, msg) - - def get_repos(self): - data = self.repos.iteritems() - repos = ["\x0302{0}\x0F ({1})".format(k, v) for k, v in data] - return ", ".join(repos) - - def get_remote(self): - try: - remote_name = self.data.args[2] - except IndexError: - remote_name = "origin" - try: - return getattr(self.repo.remotes, remote_name) - except AttributeError: - msg = "Unknown remote: \x0302{0}\x0F.".format(remote_name) - self.reply(self.data, msg) - - def get_time_since(self, date): - diff = time.mktime(time.gmtime()) - date - if diff < 60: - return "{0} seconds".format(int(diff)) - if diff < 60 * 60: - return "{0} minutes".format(int(diff / 60)) - if diff < 60 * 60 * 24: - return "{0} hours".format(int(diff / 60 / 60)) - return "{0} days".format(int(diff / 60 / 60 / 24)) - - def do_help(self): - """Display all commands.""" - help = { - "branch": "get current branch", - "branches": "get all branches", - "checkout": "switch branches", - "delete": "delete an old branch", - "pull": "update everything from the remote server", - "status": "check if we are up-to-date", - } - subcommands = "" - for key in sorted(help.keys()): - subcommands += "\x0303{0}\x0F ({1}), ".format(key, help[key]) - subcommands = subcommands[:-2] # Trim last comma and space - msg = "Sub-commands are: {0}; repos are: {1}. Syntax: !git \x0303subcommand\x0F \x0302repo\x0F." - self.reply(self.data, msg.format(subcommands, self.get_repos())) - - def do_branch(self): - """Get our current branch.""" - branch = self.repo.active_branch.name - msg = "Currently on branch \x0302{0}\x0F.".format(branch) - self.reply(self.data, msg) - - def do_branches(self): - """Get a list of branches.""" - branches = [branch.name for branch in self.repo.branches] - msg = "Branches: \x0302{0}\x0F.".format(", ".join(branches)) - self.reply(self.data, msg) - - def do_checkout(self): - """Switch branches.""" - try: - target = self.data.args[2] - except IndexError: # No branch name provided - self.reply(self.data, "Wwitch to which branch?") - return - - current_branch = self.repo.active_branch.name - if target == current_branch: - msg = "Already on \x0302{0}\x0F!".format(target) - self.reply(self.data, msg) - return - - try: - ref = getattr(self.repo.branches, target) - except AttributeError: - msg = "Branch \x0302{0}\x0F doesn't exist!".format(target) - self.reply(self.data, msg) - else: - ref.checkout() - ms = "Switched from branch \x0302{0}\x0F to \x0302{1}\x0F." - msg = ms.format(current_branch, target) - self.reply(self.data, msg) - log = "{0} checked out branch {1} of {2}" - logmsg = log.format(self.data.nick, target, self.repo.working_dir) - self.logger.info(logmsg) - - def do_delete(self): - """Delete a branch, while making sure that we are not already on it.""" - try: - target = self.data.args[2] - except IndexError: # No branch name provided - self.reply(self.data, "Delete which branch?") - return - - current_branch = self.repo.active_branch.name - if current_branch == target: - msg = "You're currently on this branch; please checkout to a different branch before deleting." - self.reply(self.data, msg) - return - - try: - ref = getattr(self.repo.branches, target) - except AttributeError: - msg = "Branch \x0302{0}\x0F doesn't exist!".format(target) - self.reply(self.data, msg) - else: - self.repo.git.branch("-d", ref) - msg = "Branch \x0302{0}\x0F has been deleted locally." - self.reply(self.data, msg.format(target)) - log = "{0} deleted branch {1} of {2}" - logmsg = log.format(self.data.nick, target, self.repo.working_dir) - self.logger.info(logmsg) - - def do_pull(self): - """Pull from our remote repository.""" - branch = self.repo.active_branch.name - msg = "Pulling from remote (currently on \x0302{0}\x0F)..." - self.reply(self.data, msg.format(branch)) - - remote = self.get_remote() - if not remote: - return - result = remote.pull() - updated = [info for info in result if info.flags != info.HEAD_UPTODATE] - - if updated: - branches = ", ".join([info.ref.remote_head for info in updated]) - msg = "Done; updates to \x0302{0}\x0F (from {1})." - self.reply(self.data, msg.format(branches, remote.url)) - log = "{0} pulled {1} of {2} (updates to {3})" - self.logger.info(log.format(self.data.nick, remote.name, - self.repo.working_dir, branches)) - else: - self.reply(self.data, "Done; no new changes.") - log = "{0} pulled {1} of {2} (no updates)" - self.logger.info(log.format(self.data.nick, remote.name, - self.repo.working_dir)) - - def do_status(self): - """Check if we have anything to pull.""" - remote = self.get_remote() - if not remote: - return - since = self.get_time_since(self.repo.head.object.committed_date) - result = remote.fetch(dry_run=True) - updated = [info for info in result if info.flags != info.HEAD_UPTODATE] - - if updated: - branches = ", ".join([info.ref.remote_head for info in updated]) - msg = "Last local commit was \x02{0}\x0F ago; updates to \x0302{1}\x0F." - self.reply(self.data, msg.format(since, branches)) - log = "{0} got status of {1} of {2} (updates to {3})" - self.logger.info(log.format(self.data.nick, remote.name, - self.repo.working_dir, branches)) - else: - msg = "Last commit was \x02{0}\x0F ago. Local copy is up-to-date with remote." - self.reply(self.data, msg.format(since)) - log = "{0} pulled {1} of {2} (no updates)" - self.logger.info(log.format(self.data.nick, remote.name, - self.repo.working_dir)) diff --git a/earwigbot/commands/praise.py b/earwigbot/commands/praise.py deleted file mode 100644 index 8c6c706..0000000 --- a/earwigbot/commands/praise.py +++ /dev/null @@ -1,48 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from earwigbot.commands import Command - -class Praise(Command): - """Praise people!""" - name = "praise" - - def setup(self): - try: - self.praises = self.config.commands[self.name]["praises"] - except KeyError: - self.praises = [] - - def check(self, data): - check = data.command == "praise" or data.command in self.praises - return data.is_command and check - - def process(self, data): - if data.command in self.praises: - msg = self.praises[data.command] - self.say(data.chan, msg) - return - if not data.args: - msg = "You use this command to praise certain people. Who they are is a secret." - else: - msg = "You're doing it wrong." - self.reply(data, msg) diff --git a/earwigbot/tasks/afc_catdelink.py b/earwigbot/tasks/afc_catdelink.py deleted file mode 100644 index 5600003..0000000 --- a/earwigbot/tasks/afc_catdelink.py +++ /dev/null @@ -1,34 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from earwigbot.tasks import Task - -class AFCCatDelink(Task): - """A task to delink mainspace categories in declined [[WP:AFC]] - submissions.""" - name = "afc_catdelink" - - def setup(self): - pass - - def run(self, **kwargs): - pass diff --git a/earwigbot/tasks/afc_copyvios.py b/earwigbot/tasks/afc_copyvios.py deleted file mode 100644 index c13dce3..0000000 --- a/earwigbot/tasks/afc_copyvios.py +++ /dev/null @@ -1,164 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from hashlib import sha256 -from os.path import expanduser -from threading import Lock -from urllib import quote - -import oursql - -from earwigbot.tasks import Task - -class AFCCopyvios(Task): - """A task to check newly-edited [[WP:AFC]] submissions for copyright - violations.""" - name = "afc_copyvios" - number = 1 - - def setup(self): - cfg = self.config.tasks.get(self.name, {}) - self.template = cfg.get("template", "AfC suspected copyvio") - self.ignore_list = cfg.get("ignoreList", []) - self.min_confidence = cfg.get("minConfidence", 0.5) - self.max_queries = cfg.get("maxQueries", 10) - self.cache_results = cfg.get("cacheResults", False) - default_summary = "Tagging suspected [[WP:COPYVIO|copyright violation]] of {url}." - self.summary = self.make_summary(cfg.get("summary", default_summary)) - - # Connection data for our SQL database: - kwargs = cfg.get("sql", {}) - kwargs["read_default_file"] = expanduser("~/.my.cnf") - self.conn_data = kwargs - self.db_access_lock = Lock() - - def run(self, **kwargs): - """Entry point for the bot task. - - Takes a page title in kwargs and checks it for copyvios, adding - {{self.template}} at the top if a copyvio has been detected. A page is - only checked once (processed pages are stored by page_id in an SQL - database). - """ - if self.shutoff_enabled(): - return - title = kwargs["page"] - page = self.bot.wiki.get_site().get_page(title) - with self.db_access_lock: - self.conn = oursql.connect(**self.conn_data) - self.process(page) - - def process(self, page): - """Detect copyvios in 'page' and add a note if any are found.""" - title = page.title - if title in self.ignore_list: - msg = u"Skipping page in ignore list: [[{0}]]" - self.logger.info(msg.format(title)) - return - - pageid = page.pageid - if self.has_been_processed(pageid): - msg = u"Skipping check on already processed page [[{0}]]" - self.logger.info(msg.format(title)) - return - - self.logger.info(u"Checking [[{0}]]".format(title)) - result = page.copyvio_check(self.min_confidence, self.max_queries) - url = result.url - orig_conf = "{0}%".format(round(result.confidence * 100, 2)) - - if result.violation: - # Things can change in the minute that it takes to do a check. - # Confirm that a violation still holds true: - page.load() - confirm = page.copyvio_compare(url, self.min_confidence) - new_conf = "{0}%".format(round(confirm.confidence * 100, 2)) - if not confirm.violation: - msg = u"A violation was detected in [[{0}]], but couldn't be confirmed." - msg += u" It may have just been edited (best: {1} at {2} -> {3} confidence)" - self.logger.info(msg.format(title, url, orig_conf, new_conf)) - - safeurl = quote(url.encode("utf8"), safe="/:").decode("utf8") - content = page.get() - template = u"\{\{{0}|url={1}|confidence={2}\}\}\n" - template = template.format(self.template, safeurl, new_conf) - newtext = template + content - if "{url}" in self.summary: - page.edit(newtext, self.summary.format(url=url)) - else: - page.edit(newtext, self.summary) - msg = u"Found violation: [[{0}]] -> {1} ({2} confidence)" - self.logger.info(msg.format(title, url, new_conf)) - else: - msg = u"No violations detected in [[{0}]] (best: {1} at {2} confidence)" - self.logger.info(msg.format(title, url, orig_conf)) - - self.log_processed(pageid) - if self.cache_results: - self.cache_result(page, result) - - def has_been_processed(self, pageid): - """Returns True if pageid was processed before, otherwise False.""" - query = "SELECT 1 FROM processed WHERE page_id = ?" - with self.conn.cursor() as cursor: - cursor.execute(query, (pageid,)) - results = cursor.fetchall() - return True if results else False - - def log_processed(self, pageid): - """Adds pageid to our database of processed pages. - - Raises an exception if the page has already been processed. - """ - query = "INSERT INTO processed VALUES (?)" - with self.conn.cursor() as cursor: - cursor.execute(query, (pageid,)) - - def cache_result(self, page, result): - """Store the check's result in a cache table temporarily. - - The cache contains the page's ID, a hash of its content, the URL of the - best match, the time of caching, and the number of queries used. It - will replace any existing cache entries for that page. - - The cache is intended for EarwigBot's complementary Toolserver web - interface, in which copyvio checks can be done separately from the bot. - The cache saves time and money by saving the result of the web search - but neither the result of the comparison nor any actual text (which - could violate data retention policy). Cache entries are (intended to - be) retained for three days; this task does not remove old entries - (that is handled by the Toolserver component). - - This will only be called if ``cache_results == True`` in the task's - config, which is ``False`` by default. - """ - pageid = page.pageid - hash = sha256(page.get()).hexdigest() - query1 = "SELECT 1 FROM cache WHERE cache_id = ?" - query2 = "DELETE FROM cache WHERE cache_id = ?" - query3 = "INSERT INTO cache VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?, ?)" - with self.conn.cursor() as cursor: - cursor.execute(query1, (pageid,)) - if cursor.fetchall(): - cursor.execute(query2, (pageid,)) - args = (pageid, hash, result.url, result.queries, 0) - cursor.execute(query3, args) diff --git a/earwigbot/tasks/afc_dailycats.py b/earwigbot/tasks/afc_dailycats.py deleted file mode 100644 index dc8e769..0000000 --- a/earwigbot/tasks/afc_dailycats.py +++ /dev/null @@ -1,34 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from earwigbot.tasks import Task - -class AFCDailyCats(Task): - """A task to create daily categories for [[WP:AFC]].""" - name = "afc_dailycats" - number = 3 - - def setup(self): - pass - - def run(self, **kwargs): - pass diff --git a/earwigbot/tasks/afc_history.py b/earwigbot/tasks/afc_history.py deleted file mode 100644 index d623e31..0000000 --- a/earwigbot/tasks/afc_history.py +++ /dev/null @@ -1,227 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from collections import OrderedDict -from datetime import datetime, timedelta -from itertools import count -from os.path import expanduser -from threading import Lock -from time import sleep - -from matplotlib import pyplot as plt -from numpy import arange -import oursql - -from earwigbot import wiki -from earwigbot.tasks import Task - -class AFCHistory(Task): - """A task to generate charts about AfC submissions over time. - - The main function of the task is to work through the "AfC submissions by - date" categories (e.g. [[Category:AfC submissions by date/12 July 2011]]) - and determine the number of declined, accepted, and currently pending - submissions every day. - - This information is saved to a MySQL database ("u_earwig_afc_history") and - used to generate a graph showing the number of AfC submissions by date - with matplotlib and numpy. The chart is saved as a PNG to - config.tasks["afc_history"]["graph"]["dest"], which defaults to - "afc_history.png". - """ - name = "afc_history" - - # Valid submission statuses: - STATUS_NONE = 0 - STATUS_PEND = 1 - STATUS_DECLINE = 2 - STATUS_ACCEPT = 3 - - def setup(self): - cfg = self.config.tasks.get(self.name, {}) - self.num_days = cfg.get("days", 90) - self.categories = cfg.get("categories", {}) - - # Graph stuff: - self.graph = cfg.get("graph", {}) - self.destination = self.graph.get("dest", "afc_history.png") - - # Connection data for our SQL database: - kwargs = cfg.get("sql", {}) - kwargs["read_default_file"] = expanduser("~/.my.cnf") - self.conn_data = kwargs - self.db_access_lock = Lock() - - def run(self, **kwargs): - self.site = self.bot.wiki.get_site() - with self.db_access_lock: - self.conn = oursql.connect(**self.conn_data) - - action = kwargs.get("action") - try: - num_days = int(kwargs.get("days", self.num_days)) - if action == "update": - self.update(num_days) - elif action == "generate": - self.generate(num_days) - finally: - self.conn.close() - - def update(self, num_days): - self.logger.info("Updating past {0} days".format(num_days)) - generator = self.backwards_cat_iterator() - for i in xrange(num_days): - category = generator.next() - date = category.title.split("/")[-1] - self.update_date(date, category) - sleep(10) - self.logger.info("Update complete") - - def generate(self, num_days): - self.logger.info("Generating chart for past {0} days".format(num_days)) - data = OrderedDict() - generator = self.backwards_cat_iterator() - for i in xrange(num_days): - category = generator.next() - date = category.title.split("/")[-1] - data[date] = self.get_date_counts(date) - - data = OrderedDict(reversed(data.items())) # Oldest to most recent - self.generate_chart(data) - dest = expanduser(self.destination) - plt.savefig(dest) - self.logger.info("Chart saved to {0}".format(dest)) - - def backwards_cat_iterator(self): - date_base = self.categories["dateBase"] - current = datetime.utcnow() - while 1: - subcat = current.strftime("%d %B %Y") - title = "/".join((date_base, subcat)) - yield self.site.get_category(title) - current -= timedelta(1) # Subtract one day from date - - def update_date(self, date, category): - msg = "Updating {0} ([[{1}]])".format(date, category.title) - self.logger.debug(msg) - - q_select = "SELECT page_date, page_status FROM page WHERE page_id = ?" - q_delete = "DELETE FROM page WHERE page_id = ?" - q_update = "UPDATE page SET page_date = ?, page_status = ? WHERE page_id = ?" - q_insert = "INSERT INTO page VALUES (?, ?, ?)" - members = category.get_members() - - with self.conn.cursor() as cursor: - for title, pageid in members: - cursor.execute(q_select, (pageid,)) - stored = cursor.fetchall() - status = self.get_status(title, pageid) - - if status == self.STATUS_NONE: - if stored: - cursor.execute(q_delete, (pageid,)) - continue - - if stored: - stored_date, stored_status = list(stored)[0] - if date != stored_date or status != stored_status: - cursor.execute(q_update, (date, status, pageid)) - - else: - cursor.execute(q_insert, (pageid, date, status)) - - def get_status(self, title, pageid): - page = self.site.get_page(title) - ns = page.namespace - - if ns == wiki.NS_FILE_TALK: # Ignore accepted FFU requests - return self.STATUS_NONE - - if ns == wiki.NS_TALK: - new_page = page.toggle_talk() - sleep(2) - if new_page.is_redirect: - return self.STATUS_NONE # Ignore accepted AFC/R requests - return self.STATUS_ACCEPT - - cats = self.categories - sq = self.site.sql_query - query = "SELECT 1 FROM categorylinks WHERE cl_to = ? AND cl_from = ?" - match = lambda cat: list(sq(query, (cat.replace(" ", "_"), pageid))) - - if match(cats["pending"]): - return self.STATUS_PEND - elif match(cats["unsubmitted"]): - return self.STATUS_NONE - elif match(cats["declined"]): - return self.STATUS_DECLINE - return self.STATUS_NONE - - def get_date_counts(self, date): - query = "SELECT COUNT(*) FROM page WHERE page_date = ? AND page_status = ?" - statuses = [self.STATUS_PEND, self.STATUS_DECLINE, self.STATUS_ACCEPT] - counts = {} - with self.conn.cursor() as cursor: - for status in statuses: - cursor.execute(query, (date, status)) - count = cursor.fetchall()[0][0] - counts[status] = count - return counts - - def generate_chart(self, data): - plt.title(self.graph.get("title", "AfC submissions by date")) - plt.xlabel(self.graph.get("xaxis", "Date")) - plt.ylabel(self.graph.get("yaxis", "Submissions")) - - pends = [d[self.STATUS_PEND] for d in data.itervalues()] - declines = [d[self.STATUS_DECLINE] for d in data.itervalues()] - accepts = [d[self.STATUS_ACCEPT] for d in data.itervalues()] - pends_declines = [p + d for p, d in zip(pends, declines)] - ind = arange(len(data)) - xsize = self.graph.get("xsize", 1200) - ysize = self.graph.get("ysize", 900) - width = self.graph.get("width", 1) - xstep = self.graph.get("xAxisStep", 6) - pcolor = self.graph.get("pendingColor", "#f0e460") - dcolor = self.graph.get("declinedColor", "#f291a6") - acolor = self.graph.get("acceptedColor", "#81fc4c") - - p1 = plt.bar(ind, pends, width, color=pcolor) - p2 = plt.bar(ind, declines, width, color=dcolor, bottom=pends) - p3 = plt.bar(ind, accepts, width, color=acolor, bottom=pends_declines) - - xticks = arange(xstep-1, ind.size+xstep-1, xstep) + width/2.0 - xlabels = [d for c, d in zip(count(1), data.keys()) if not c % xstep] - plt.xticks(xticks, xlabels) - plt.yticks(arange(0, plt.ylim()[1], 10)) - plt.tick_params(direction="out") - - leg = plt.legend((p1[0], p2[0], p3[0]), ("Pending", "Declined", - "Accepted"), loc="upper left", fancybox=True) - leg.get_frame().set_alpha(0.5) - - fig = plt.gcf() - fig.set_size_inches(xsize/100, ysize/100) - fig.autofmt_xdate() - - ax = plt.gca() - ax.yaxis.grid(True) diff --git a/earwigbot/tasks/afc_statistics.py b/earwigbot/tasks/afc_statistics.py deleted file mode 100644 index 55444d6..0000000 --- a/earwigbot/tasks/afc_statistics.py +++ /dev/null @@ -1,739 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from datetime import datetime -import re -from os.path import expanduser -from threading import Lock -from time import sleep - -import oursql - -from earwigbot import exceptions -from earwigbot import wiki -from earwigbot.tasks import Task - -class AFCStatistics(Task): - """A task to generate statistics for WikiProject Articles for Creation. - - Statistics are stored in a MySQL database ("u_earwig_afc_statistics") - accessed with oursql. Statistics are synchronied with the live database - every four minutes and saved once an hour, on the hour, to self.pagename. - In the live bot, this is "Template:AFC statistics". - """ - name = "afc_statistics" - number = 2 - - # Chart status number constants: - CHART_NONE = 0 - CHART_PEND = 1 - CHART_DRAFT = 2 - CHART_REVIEW = 3 - CHART_ACCEPT = 4 - CHART_DECLINE = 5 - CHART_MISPLACE = 6 - - def setup(self): - self.cfg = cfg = self.config.tasks.get(self.name, {}) - - # Set some wiki-related attributes: - self.pagename = cfg.get("page", "Template:AFC statistics") - self.pending_cat = cfg.get("pending", "Pending AfC submissions") - self.ignore_list = cfg.get("ignoreList", []) - default_summary = "Updating statistics for [[WP:WPAFC|WikiProject Articles for creation]]." - self.summary = self.make_summary(cfg.get("summary", default_summary)) - - # Templates used in chart generation: - templates = cfg.get("templates", {}) - self.tl_header = templates.get("header", "AFC statistics/header") - self.tl_row = templates.get("row", "AFC statistics/row") - self.tl_footer = templates.get("footer", "AFC statistics/footer") - - # Connection data for our SQL database: - kwargs = cfg.get("sql", {}) - kwargs["read_default_file"] = expanduser("~/.my.cnf") - self.conn_data = kwargs - self.db_access_lock = Lock() - - def run(self, **kwargs): - """Entry point for a task event. - - Depending on the kwargs passed, we will either synchronize our local - statistics database with the site (self.sync()) or save it to the wiki - (self.save()). We will additionally create an SQL connection with our - local database. - """ - action = kwargs.get("action") - if not self.db_access_lock.acquire(False): # Non-blocking - if action == "sync": - self.logger.info("A sync is already ongoing; aborting") - return - self.logger.info("Waiting for database access lock") - self.db_access_lock.acquire() - - try: - self.site = self.bot.wiki.get_site() - self.conn = oursql.connect(**self.conn_data) - try: - if action == "save": - self.save(kwargs) - elif action == "sync": - self.sync(kwargs) - elif action == "update": - self.update(kwargs) - finally: - self.conn.close() - finally: - self.db_access_lock.release() - - def save(self, kwargs): - """Save our local statistics to the wiki. - - After checking for emergency shutoff, the statistics chart is compiled, - and then saved to self.pagename using self.summary iff it has changed - since last save. - """ - self.logger.info("Saving chart") - if kwargs.get("fromIRC"): - summary = self.summary + " (!earwigbot)" - else: - if self.shutoff_enabled(): - return - summary = self.summary - - statistics = self.compile_charts() - - page = self.site.get_page(self.pagename) - text = page.get() - newtext = re.sub(u"(.*?)", - "\n" + statistics + "\n", - text, flags=re.DOTALL) - if newtext == text: - self.logger.info("Chart unchanged; not saving") - return # Don't edit the page if we're not adding anything - - newtext = re.sub("(.*?)", - "~~~ at ~~~~~", - newtext) - page.edit(newtext, summary, minor=True, bot=True) - self.logger.info(u"Chart saved to [[{0}]]".format(page.title)) - - def compile_charts(self): - """Compile and return all statistics information from our local db.""" - stats = "" - with self.conn.cursor() as cursor: - cursor.execute("SELECT * FROM chart") - for chart in cursor: - stats += self.compile_chart(chart) + "\n" - return stats[:-1] # Drop the last newline - - def compile_chart(self, chart_info): - """Compile and return a single statistics chart.""" - chart_id, chart_title, special_title = chart_info - - chart = self.tl_header + "|" + chart_title - if special_title: - chart += "|" + special_title - chart = "{{" + chart + "}}" - - query = "SELECT * FROM page JOIN row ON page_id = row_id WHERE row_chart = ?" - with self.conn.cursor(oursql.DictCursor) as cursor: - cursor.execute(query, (chart_id,)) - for page in cursor: - chart += "\n" + self.compile_chart_row(page) - - chart += "\n{{" + self.tl_footer + "}}" - return chart - - def compile_chart_row(self, page): - """Compile and return a single chart row. - - 'page' is a dict of page information, taken as a row from the page - table, where keys are column names and values are their cell contents. - """ - row = u"{0}|s={page_status}|t={page_title}|h={page_short}|z={page_size}|" - if page["page_special_oldid"]: - row += "sr={page_special_user}|sd={page_special_time}|si={page_special_oldid}|" - row += "mr={page_modify_user}|md={page_modify_time}|mi={page_modify_oldid}" - - page["page_special_time"] = self.format_time(page["page_special_time"]) - page["page_modify_time"] = self.format_time(page["page_modify_time"]) - - if page["page_notes"]: - row += "|n=1{page_notes}" - - return "{{" + row.format(self.tl_row, **page) + "}}" - - def format_time(self, dt): - """Format a datetime into the standard MediaWiki timestamp format.""" - return dt.strftime("%H:%M, %d %b %Y") - - def sync(self, kwargs): - """Synchronize our local statistics database with the site. - - Syncing involves, in order, updating tracked submissions that have - been changed since last sync (self.update_tracked()), adding pending - submissions that are not tracked (self.add_untracked()), and removing - old submissions from the database (self.delete_old()). - - The sync will be canceled if SQL replication lag is greater than 600 - seconds, because this will lead to potential problems and outdated - data, not to mention putting demand on an already overloaded server. - Giving sync the kwarg "ignore_replag" will go around this restriction. - """ - self.logger.info("Starting sync") - - replag = self.site.get_replag() - self.logger.debug("Server replag is {0}".format(replag)) - if replag > 600 and not kwargs.get("ignore_replag"): - msg = "Sync canceled as replag ({0} secs) is greater than ten minutes" - self.logger.warn(msg.format(replag)) - return - - with self.conn.cursor() as cursor: - self.update_tracked(cursor) - self.add_untracked(cursor) - self.delete_old(cursor) - - self.logger.info("Sync completed") - - def update_tracked(self, cursor): - """Update tracked submissions that have been changed since last sync. - - This is done by iterating through every page in our database and - comparing our stored latest revision ID with the actual latest revision - ID from an SQL query. If they differ, we will update our information - about the page (self.update_page()). - - If the page does not exist, we will remove it from our database with - self.untrack_page(). - """ - self.logger.debug("Updating tracked submissions") - query1 = "SELECT page_id, page_title, page_modify_oldid FROM page" - query2 = """SELECT page_latest, page_title, page_namespace FROM page - WHERE page_id = ?""" - cursor.execute(query1) - - for pageid, title, oldid in cursor: - result = list(self.site.sql_query(query2, (pageid,))) - if not result: - self.untrack_page(cursor, pageid) - continue - - real_oldid = result[0][0] - if oldid != real_oldid: - msg = u"Updating page [[{0}]] (id: {1}) @ {2}" - self.logger.debug(msg.format(title, pageid, oldid)) - self.logger.debug(" {0} -> {1}".format(oldid, real_oldid)) - base = result[0][1].decode("utf8").replace("_", " ") - ns = self.site.namespace_id_to_name(result[0][2]) - if ns: - real_title = u":".join((ns, base)) - else: - real_title = base - try: - self.update_page(cursor, pageid, real_title) - except Exception: - e = u"Error updating page [[{0}]] (id: {1})" - self.logger.exception(e.format(real_title, pageid)) - - def add_untracked(self, cursor): - """Add pending submissions that are not yet tracked. - - This is done by compiling a list of all currently tracked submissions - and iterating through all members of self.pending_cat via SQL. If a - page in the pending category is not tracked and is not in - self.ignore_list, we will track it with self.track_page(). - """ - self.logger.debug("Adding untracked pending submissions") - cursor.execute("SELECT page_id FROM page") - tracked = [i[0] for i in cursor.fetchall()] - - category = self.site.get_category(self.pending_cat) - for page in category.get_members(): - title, pageid = page.title, page.pageid - if title in self.ignore_list: - continue - if pageid not in tracked: - msg = u"Tracking page [[{0}]] (id: {1})".format(title, pageid) - self.logger.debug(msg) - try: - self.track_page(cursor, pageid, title) - except Exception: - e = u"Error tracking page [[{0}]] (id: {1})" - self.logger.exception(e.format(title, pageid)) - - def delete_old(self, cursor): - """Remove old submissions from the database. - - "Old" is defined as a submission that has been declined or accepted - more than 36 hours ago. Pending submissions cannot be "old". - """ - self.logger.debug("Removing old submissions from chart") - query = """DELETE FROM page, row USING page JOIN row - ON page_id = row_id WHERE row_chart IN (?, ?) - AND ADDTIME(page_special_time, '36:00:00') < NOW()""" - cursor.execute(query, (self.CHART_ACCEPT, self.CHART_DECLINE)) - - def update(self, kwargs): - """Update a page by name, regardless of whether anything has changed. - - Mainly intended as a command to be used via IRC, e.g.: - !tasks start afc_statistics action=update page=Foobar - """ - title = kwargs.get("page") - if not title: - return - - title = title.replace("_", " ").decode("utf8") - query = "SELECT page_id, page_modify_oldid FROM page WHERE page_title = ?" - with self.conn.cursor() as cursor: - cursor.execute(query, (title,)) - try: - pageid, oldid = cursor.fetchall()[0] - except IndexError: - msg = u"Page [[{0}]] not found in database".format(title) - self.logger.error(msg) - - msg = u"Updating page [[{0}]] (id: {1}) @ {2}" - self.logger.info(msg.format(title, pageid, oldid)) - self.update_page(cursor, pageid, title) - - def untrack_page(self, cursor, pageid): - """Remove a page, given by ID, from our database.""" - self.logger.debug("Untracking page (id: {0})".format(pageid)) - query = """DELETE FROM page, row USING page JOIN row - ON page_id = row_id WHERE page_id = ?""" - cursor.execute(query, (pageid,)) - - def track_page(self, cursor, pageid, title): - """Update hook for when page is not in our database. - - A variety of SQL queries are used to gather information about the page, - which is then saved to our database. - """ - content = self.get_content(title) - if content is None: - msg = u"Could not get page content for [[{0}]]".format(title) - self.logger.error(msg) - return - - namespace = self.site.get_page(title).namespace - status, chart = self.get_status_and_chart(content, namespace) - if chart == self.CHART_NONE: - msg = u"Could not find a status for [[{0}]]".format(title) - self.logger.warn(msg) - return - - short = self.get_short_title(title) - size = self.get_size(content) - m_user, m_time, m_id = self.get_modify(pageid) - s_user, s_time, s_id = self.get_special(pageid, chart) - notes = self.get_notes(chart, content, m_time, s_user) - - query1 = "INSERT INTO row VALUES (?, ?)" - query2 = "INSERT INTO page VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" - cursor.execute(query1, (pageid, chart)) - cursor.execute(query2, (pageid, status, title, short, size, notes, - m_user, m_time, m_id, s_user, s_time, s_id)) - - def update_page(self, cursor, pageid, title): - """Update hook for when page is already in our database. - - A variety of SQL queries are used to gather information about the page, - which is compared against our stored information. Differing information - is then updated. - """ - content = self.get_content(title) - if content is None: - msg = u"Could not get page content for [[{0}]]".format(title) - self.logger.error(msg) - return - - namespace = self.site.get_page(title).namespace - status, chart = self.get_status_and_chart(content, namespace) - if chart == self.CHART_NONE: - self.untrack_page(cursor, pageid) - return - - query = "SELECT * FROM page JOIN row ON page_id = row_id WHERE page_id = ?" - with self.conn.cursor(oursql.DictCursor) as dict_cursor: - dict_cursor.execute(query, (pageid,)) - result = dict_cursor.fetchall()[0] - - size = self.get_size(content) - m_user, m_time, m_id = self.get_modify(pageid) - - if title != result["page_title"]: - self.update_page_title(cursor, result, pageid, title) - - if m_id != result["page_modify_oldid"]: - self.update_page_modify(cursor, result, pageid, size, m_user, - m_time, m_id) - - if status != result["page_status"]: - special = self.update_page_status(cursor, result, pageid, status, - chart) - s_user = special[0] - else: - s_user = result["page_special_user"] - - notes = self.get_notes(chart, content, m_time, s_user) - if notes != result["page_notes"]: - self.update_page_notes(cursor, result, pageid, notes) - - def update_page_title(self, cursor, result, pageid, title): - """Update the title and short_title of a page in our database.""" - query = "UPDATE page SET page_title = ?, page_short = ? WHERE page_id = ?" - short = self.get_short_title(title) - cursor.execute(query, (title, short, pageid)) - - msg = u" {0}: title: {1} -> {2}" - self.logger.debug(msg.format(pageid, result["page_title"], title)) - - def update_page_modify(self, cursor, result, pageid, size, m_user, m_time, m_id): - """Update the last modified information of a page in our database.""" - query = """UPDATE page SET page_size = ?, page_modify_user = ?, - page_modify_time = ?, page_modify_oldid = ? - WHERE page_id = ?""" - cursor.execute(query, (size, m_user, m_time, m_id, pageid)) - - msg = u" {0}: modify: {1} / {2} / {3} -> {4} / {5} / {6}" - msg = msg.format(pageid, result["page_modify_user"], - result["page_modify_time"], - result["page_modify_oldid"], m_user, m_time, m_id) - self.logger.debug(msg) - - def update_page_status(self, cursor, result, pageid, status, chart): - """Update the status and "specialed" information of a page.""" - query1 = """UPDATE page JOIN row ON page_id = row_id - SET page_status = ?, row_chart = ? WHERE page_id = ?""" - query2 = """UPDATE page SET page_special_user = ?, - page_special_time = ?, page_special_oldid = ? - WHERE page_id = ?""" - cursor.execute(query1, (status, chart, pageid)) - - msg = " {0}: status: {1} ({2}) -> {3} ({4})" - self.logger.debug(msg.format(pageid, result["page_status"], - result["row_chart"], status, chart)) - - s_user, s_time, s_id = self.get_special(pageid, chart) - if s_id != result["page_special_oldid"]: - cursor.execute(query2, (s_user, s_time, s_id, pageid)) - msg = u"{0}: special: {1} / {2} / {3} -> {4} / {5} / {6}" - msg = msg.format(pageid, result["page_special_user"], - result["page_special_time"], - result["page_special_oldid"], s_user, s_time, s_id) - self.logger.debug(msg) - - return s_user, s_time, s_id - - def update_page_notes(self, cursor, result, pageid, notes): - """Update the notes (or warnings) of a page in our database.""" - query = "UPDATE page SET page_notes = ? WHERE page_id = ?" - cursor.execute(query, (notes, pageid)) - msg = " {0}: notes: {1} -> {2}" - self.logger.debug(msg.format(pageid, result["page_notes"], notes)) - - def get_content(self, title): - """Get the current content of a page by title from the API. - - The page's current revision ID is retrieved from SQL, and then - an API query is made to get its content. This is the only API query - used in the task's code. - """ - query = "SELECT page_latest FROM page WHERE page_title = ? AND page_namespace = ?" - try: - namespace, base = title.split(":", 1) - except ValueError: - base = title - ns = wiki.NS_MAIN - else: - try: - ns = self.site.namespace_name_to_id(namespace) - except exceptions.NamespaceNotFoundError: - base = title - ns = wiki.NS_MAIN - - result = self.site.sql_query(query, (base.replace(" ", "_"), ns)) - try: - revid = int(list(result)[0][0]) - except IndexError: - return None - return self.get_revision_content(revid) - - def get_revision_content(self, revid, tries=1): - """Get the content of a revision by ID from the API.""" - res = self.site.api_query(action="query", prop="revisions", - revids=revid, rvprop="content") - try: - return res["query"]["pages"].values()[0]["revisions"][0]["*"] - except KeyError: - if tries > 0: - sleep(5) - return self.get_revision_content(revid, tries=tries - 1) - - def get_status_and_chart(self, content, namespace): - """Determine the status and chart number of an AFC submission. - - The methodology used here is the same one I've been using for years - (see also commands.afc_report), but with the new draft system taken - into account. The order here is important: if there is more than one - {{AFC submission}} template on a page, we need to know which one to - use (revision history search to find the most recent isn't a viable - idea :P). - """ - statuses = self.get_statuses(content) - - if "R" in statuses: - status, chart = "r", self.CHART_REVIEW - elif "H" in statuses: - status, chart = "p", self.CHART_DRAFT - elif "P" in statuses: - status, chart = "p", self.CHART_PEND - elif "T" in statuses: - status, chart = None, self.CHART_NONE - elif "D" in statuses: - status, chart = "d", self.CHART_DECLINE - else: - status, chart = None, self.CHART_NONE - - if namespace == wiki.NS_MAIN: - if not statuses: - status, chart = "a", self.CHART_ACCEPT - else: - status, chart = None, self.CHART_MISPLACE - - return status, chart - - def get_statuses(self, content): - """Return a list of all AFC submission statuses in a page's text.""" - re_has_templates = "\{\{[aA][fF][cC] submission\s*(\}\}|\||/)" - re_template = "\{\{[aA][fF][cC] submission\s*(.*?)\}\}" - re_remove_embed = "(\{\{[aA][fF][cC] submission\s*(.*?))\{\{(.*?)\}\}(.*?)\}\}" - valid = ["R", "H", "P", "T", "D"] - subtemps = { - "/reviewing": "R", - "/onhold": "H", - "/pending": "P", - "/draft": "T", - "/declined": "D" - } - statuses = [] - - while re.search(re_has_templates, content): - status = "P" - match = re.search(re_template, content, re.S) - if not match: - return statuses - temp = match.group(1) - limit = 0 - while "{{" in temp and limit < 50: - content = re.sub(re_remove_embed, "\\1\\4}}", content, 1, re.S) - match = re.search(re_template, content, re.S) - temp = match.group(1) - limit += 1 - params = temp.split("|") - try: - subtemp, params = params[0].strip(), params[1:] - except IndexError: - status = "P" - params = [] - else: - if subtemp: - status = subtemps.get(subtemp) - params = [] - for param in params: - param = param.strip().upper() - if "=" in param: - key, value = param.split("=", 1) - if key.strip() == "1": - status = value if value in valid else "P" - break - else: - status = param if param in valid else "P" - break - statuses.append(status) - content = re.sub(re_template, "", content, 1, re.S) - - return statuses - - def get_short_title(self, title): - """Shorten a title so we can display it in a chart using less space. - - Basically, this just means removing the "Wikipedia talk:Articles for - creation" part from the beginning. If it is longer than 50 characters, - we'll shorten it down to 47 and add an poor-man's ellipsis at the end. - """ - short = re.sub("Wikipedia(\s*talk)?\:Articles\sfor\screation\/", "", title) - if len(short) > 50: - short = short[:47] + "..." - return short - - def get_size(self, content): - """Return a page's size in a short, pretty format.""" - return "{0} kB".format(round(len(content) / 1000.0, 1)) - - def get_modify(self, pageid): - """Return information about a page's last edit ("modification"). - - This consists of the most recent editor, modification time, and the - lastest revision ID. - """ - query = """SELECT rev_user_text, rev_timestamp, rev_id FROM revision - JOIN page ON rev_id = page_latest WHERE page_id = ?""" - result = self.site.sql_query(query, (pageid,)) - m_user, m_time, m_id = list(result)[0] - timestamp = datetime.strptime(m_time, "%Y%m%d%H%M%S") - return m_user.decode("utf8"), timestamp, m_id - - def get_special(self, pageid, chart): - """Return information about a page's "special" edit. - - I tend to use the term "special" as a verb a lot, which is bound to - cause confusion. It is merely a short way of saying "the edit in which - a declined submission was declined, an accepted submission was - accepted, a submission in review was set as such, a pending submission - was submitted, and a "misplaced" submission was created." - - This "information" consists of the special edit's editor, its time, and - its revision ID. If the page's status is not something that involves - "special"-ing, we will return None for all three. The same will be - returned if we cannot determine when the page was "special"-ed, or if - it was "special"-ed more than 100 edits ago. - """ - if chart == self.CHART_NONE: - return None, None, None - elif chart == self.CHART_MISPLACE: - return self.get_create(pageid) - elif chart == self.CHART_ACCEPT: - search_for = None - search_not = ["R", "H", "P", "T", "D"] - elif chart == self.CHART_DRAFT: - search_for = "H" - search_not = [] - elif chart == self.CHART_PEND: - search_for = "P" - search_not = [] - elif chart == self.CHART_REVIEW: - search_for = "R" - search_not = [] - elif chart == self.CHART_DECLINE: - search_for = "D" - search_not = ["R", "H", "P", "T"] - - query = """SELECT rev_user_text, rev_timestamp, rev_id - FROM revision WHERE rev_page = ? ORDER BY rev_id DESC""" - result = self.site.sql_query(query, (pageid,)) - - counter = 0 - last = (None, None, None) - for user, ts, revid in result: - counter += 1 - if counter > 50: - msg = "Exceeded 50 content lookups while determining special for page (id: {0}, chart: {1})" - self.logger.warn(msg.format(pageid, chart)) - return None, None, None - try: - content = self.get_revision_content(revid) - except exceptions.APIError: - msg = "API error interrupted SQL query in get_special() for page (id: {0}, chart: {1})" - self.logger.exception(msg.format(pageid, chart)) - return None, None, None - statuses = self.get_statuses(content) - matches = [s in statuses for s in search_not] - if search_for: - if search_for not in statuses or any(matches): - return last - else: - if any(matches): - return last - timestamp = datetime.strptime(ts, "%Y%m%d%H%M%S") - last = (user.decode("utf8"), timestamp, revid) - - return last - - def get_create(self, pageid): - """Return information about a page's first edit ("creation"). - - This consists of the page creator, creation time, and the earliest - revision ID. - """ - query = """SELECT rev_user_text, rev_timestamp, rev_id - FROM revision WHERE rev_id = - (SELECT MIN(rev_id) FROM revision WHERE rev_page = ?)""" - result = self.site.sql_query(query, (pageid,)) - c_user, c_time, c_id = list(result)[0] - timestamp = datetime.strptime(c_time, "%Y%m%d%H%M%S") - return c_user.decode("utf8"), timestamp, c_id - - def get_notes(self, chart, content, m_time, s_user): - """Return any special notes or warnings about this page. - - copyvio: submission is a suspected copyright violation - unsourced: submission lacks references completely - no-inline: submission has no inline citations - short: submission is less than a kilobyte in length - resubmit: submission was resubmitted after a previous decline - old: submission has not been touched in > 4 days - blocked: submitter is currently blocked - """ - notes = "" - - ignored_charts = [self.CHART_NONE, self.CHART_ACCEPT, self.CHART_DECLINE] - if chart in ignored_charts: - return notes - - copyvios = self.config.tasks.get("afc_copyvios", {}) - regex = "\{\{\s*" + copyvios.get("template", "AfC suspected copyvio") - if re.search(regex, content): - notes += "|nc=1" # Submission is a suspected copyvio - - if not re.search("\(.*?)\", content, re.I | re.S): - regex = "(https?:)|\[//(?!{0})([^ \]\\t\\n\\r\\f\\v]+?)" - sitedomain = re.escape(self.site.domain) - if re.search(regex.format(sitedomain), content, re.I | re.S): - notes += "|ni=1" # Submission has no inline citations - else: - notes += "|nu=1" # Submission is completely unsourced - - if len(content) < 1000: - notes += "|ns=1" # Submission is short - - statuses = self.get_statuses(content) - if "D" in statuses and chart != self.CHART_MISPLACE: - notes += "|nr=1" # Submission was resubmitted - - time_since_modify = (datetime.utcnow() - m_time).total_seconds() - max_time = 4 * 24 * 60 * 60 - if time_since_modify > max_time: - notes += "|no=1" # Submission hasn't been touched in over 4 days - - if chart in [self.CHART_PEND, self.CHART_DRAFT] and s_user: - submitter = self.site.get_user(s_user) - try: - if submitter.blockinfo: - notes += "|nb=1" # Submitter is blocked - except exceptions.UserNotFoundError: # Likely an IP - pass - - return notes diff --git a/earwigbot/tasks/afc_undated.py b/earwigbot/tasks/afc_undated.py deleted file mode 100644 index e897bd4..0000000 --- a/earwigbot/tasks/afc_undated.py +++ /dev/null @@ -1,33 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from earwigbot.tasks import Task - -class AFCUndated(Task): - """A task to clear [[Category:Undated AfC submissions]].""" - name = "afc_undated" - - def setup(self): - pass - - def run(self, **kwargs): - pass diff --git a/earwigbot/tasks/blp_tag.py b/earwigbot/tasks/blp_tag.py deleted file mode 100644 index 3c5d6d1..0000000 --- a/earwigbot/tasks/blp_tag.py +++ /dev/null @@ -1,34 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from earwigbot.tasks import Task - -class BLPTag(Task): - """A task to add |blp=yes to ``{{WPB}}`` or ``{{WPBS}}`` when it is used - along with ``{{WP Biography}}``.""" - name = "blp_tag" - - def setup(self): - pass - - def run(self, **kwargs): - pass diff --git a/earwigbot/tasks/drn_clerkbot.py b/earwigbot/tasks/drn_clerkbot.py deleted file mode 100644 index d2a4204..0000000 --- a/earwigbot/tasks/drn_clerkbot.py +++ /dev/null @@ -1,787 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from datetime import datetime -from os.path import expanduser -import re -from threading import RLock -from time import mktime, sleep, time - -import oursql - -from earwigbot import exceptions -from earwigbot.tasks import Task -from earwigbot.wiki import constants - -class DRNClerkBot(Task): - """A task to clerk for [[WP:DRN]].""" - name = "drn_clerkbot" - number = 19 - - # Case status: - STATUS_UNKNOWN = 0 - STATUS_NEW = 1 - STATUS_OPEN = 2 - STATUS_STALE = 3 - STATUS_NEEDASSIST = 4 - STATUS_REVIEW = 5 - STATUS_RESOLVED = 6 - STATUS_CLOSED = 7 - - ALIASES = { - STATUS_NEW: ("",), - STATUS_OPEN: ("open", "active", "inprogress"), - STATUS_STALE: ("stale",), - STATUS_NEEDASSIST: ("needassist", "relist", "relisted"), - STATUS_REVIEW: ("review",), - STATUS_RESOLVED: ("resolved", "resolve"), - STATUS_CLOSED: ("closed", "close"), - } - - def setup(self): - """Hook called immediately after the task is loaded.""" - cfg = self.config.tasks.get(self.name, {}) - - # Set some wiki-related attributes: - self.title = cfg.get("title", - "Wikipedia:Dispute resolution noticeboard") - self.chart_title = cfg.get("chartTitle", "Template:DRN case status") - self.volunteer_title = cfg.get("volunteers", - "Wikipedia:Dispute resolution noticeboard/Volunteering") - self.very_old_title = cfg.get("veryOldTitle", "User talk:Szhang (WMF)") - - clerk_summary = "Updating $3 case$4." - notify_summary = "Notifying user regarding [[WP:DRN|dispute resolution noticeboard]] case." - chart_summary = "Updating statistics for the [[WP:DRN|dispute resolution noticeboard]]." - self.clerk_summary = self.make_summary(cfg.get("clerkSummary", clerk_summary)) - self.notify_summary = self.make_summary(cfg.get("notifySummary", notify_summary)) - self.chart_summary = self.make_summary(cfg.get("chartSummary", chart_summary)) - - # Templates used: - templates = cfg.get("templates", {}) - self.tl_status = templates.get("status", "DR case status") - self.tl_notify_party = templates.get("notifyParty", "DRN-notice") - self.tl_notify_stale = templates.get("notifyStale", "DRN stale notice") - self.tl_archive_top = templates.get("archiveTop", "DRN archive top") - self.tl_archive_bottom = templates.get("archiveBottom", - "DRN archive bottom") - self.tl_chart_header = templates.get("chartHeader", - "DRN case status/header") - self.tl_chart_row = templates.get("chartRow", "DRN case status/row") - self.tl_chart_footer = templates.get("chartFooter", - "DRN case status/footer") - - # Connection data for our SQL database: - kwargs = cfg.get("sql", {}) - kwargs["read_default_file"] = expanduser("~/.my.cnf") - self.conn_data = kwargs - self.db_access_lock = RLock() - - # Minimum size a MySQL TIMESTAMP field can hold: - self.min_ts = datetime(1970, 1, 1, 0, 0, 1) - - def run(self, **kwargs): - """Entry point for a task event.""" - if not self.db_access_lock.acquire(False): # Non-blocking - self.logger.info("A job is already ongoing; aborting") - return - action = kwargs.get("action", "all") - try: - start = time() - conn = oursql.connect(**self.conn_data) - site = self.bot.wiki.get_site() - if action in ["all", "update_volunteers"]: - self.update_volunteers(conn, site) - if action in ["all", "clerk"]: - log = u"Starting update to [[{0}]]".format(self.title) - self.logger.info(log) - cases = self.read_database(conn) - page = site.get_page(self.title) - text = page.get() - self.read_page(conn, cases, text) - notices = self.clerk(conn, cases) - if self.shutoff_enabled(): - return - if not self.save(page, cases, kwargs, start): - return - self.send_notices(site, notices) - if action in ["all", "update_chart"]: - if self.shutoff_enabled(): - return - self.update_chart(conn, site) - if action in ["all", "purge"]: - self.purge_old_data(conn) - finally: - self.db_access_lock.release() - - def update_volunteers(self, conn, site): - """Updates and stores the list of dispute resolution volunteers.""" - log = u"Updating volunteer list from [[{0}]]" - self.logger.info(log.format(self.volunteer_title)) - page = site.get_page(self.volunteer_title) - try: - text = page.get() - except exceptions.PageNotFoundError: - text = "" - marker = "" - if marker not in text: - log = u"The marker ({0}) wasn't found in the volunteer list at [[{1}]]!" - self.logger.error(log.format(marker, page.title)) - return - text = text.split(marker)[1] - additions = set() - for line in text.splitlines(): - user = re.search("\# \{\{User\|(.+?)\}\}", line) - if user: - uname = user.group(1).replace("_", " ").strip() - additions.add((uname[0].upper() + uname[1:],)) - - removals = set() - query1 = "SELECT volunteer_username FROM volunteers" - query2 = "DELETE FROM volunteers WHERE volunteer_username = ?" - query3 = "INSERT INTO volunteers (volunteer_username) VALUES (?)" - with conn.cursor() as cursor: - cursor.execute(query1) - for row in cursor: - if row in additions: - additions.remove(row) - else: - removals.add(row) - if removals: - cursor.executemany(query2, removals) - if additions: - cursor.executemany(query3, additions) - - def read_database(self, conn): - """Return a list of _Cases from the database.""" - cases = [] - query = "SELECT * FROM cases" - with conn.cursor() as cursor: - cursor.execute(query) - for row in cursor: - case = _Case(*row) - cases.append(case) - log = "Read {0} cases from the database" - self.logger.debug(log.format(len(cases))) - return cases - - def read_page(self, conn, cases, text): - """Read the noticeboard content and update the list of _Cases.""" - nextid = self.select_next_id(conn) - tl_status_esc = re.escape(self.tl_status) - split = re.split("(^==\s*[^=]+?\s*==$)", text, flags=re.M|re.U) - for i in xrange(len(split)): - if i + 1 == len(split): - break - if not split[i].startswith("=="): - continue - title = split[i][2:-2].strip() - body = old = split[i + 1] - if not re.search("\s*\{\{" + tl_status_esc, body, re.U): - continue - status = self.read_status(body) - re_id = "" - try: - id_ = int(re.search(re_id, body).group(1)) - case = [case for case in cases if case.id == id_][0] - except (AttributeError, IndexError, ValueError): - id_ = nextid - nextid += 1 - re_id2 = "(\{\{" + tl_status_esc - re_id2 += r"(.*?)\}\})()?" - repl = ur"\1 " - body = re.sub(re_id2, repl.format(id_), body) - re_f = r"\{\{drn filing editor\|(.*?)\|" - re_f += r"(\d{2}:\d{2},\s\d{1,2}\s\w+\s\d{4}\s\(UTC\))\}\}" - match = re.search(re_f, body, re.U) - if match: - f_user = match.group(1).split("/", 1)[0].replace("_", " ") - f_user = f_user[0].upper() + f_user[1:] - strp = "%H:%M, %d %B %Y (UTC)" - f_time = datetime.strptime(match.group(2), strp) - else: - f_user, f_time = None, datetime.utcnow() - case = _Case(id_, title, status, self.STATUS_UNKNOWN, f_user, - f_time, f_user, f_time, "", self.min_ts, - self.min_ts, False, False, False, len(body), - new=True) - cases.append(case) - log = u"Added new case {0} ('{1}', status={2}, by {3})" - self.logger.debug(log.format(id_, title, status, f_user)) - else: - case.status = status - log = u"Read active case {0} ('{1}')".format(id_, title) - self.logger.debug(log) - if case.title != title: - self.update_case_title(conn, id_, title) - case.title = title - case.body, case.old = body, old - - for case in cases[:]: - if case.body is None: - if case.original_status == self.STATUS_UNKNOWN: - cases.remove(case) # Ignore archived case - else: - case.status = self.STATUS_UNKNOWN - log = u"Dropped case {0} because it is no longer on the page ('{1}')" - self.logger.debug(log.format(case.id, case.title)) - - self.logger.debug("Done reading cases from the noticeboard page") - - def select_next_id(self, conn): - """Return the next incremental ID for a case.""" - query = "SELECT MAX(case_id) FROM cases" - with conn.cursor() as cursor: - cursor.execute(query) - current = cursor.fetchone()[0] - if current: - return int(current) + 1 - return 1 - - def read_status(self, body): - """Parse the current status from a case body.""" - templ = re.escape(self.tl_status) - status = re.search("\{\{" + templ + "\|?(.*?)\}\}", body, re.S|re.U) - if not status: - return self.STATUS_NEW - for option, names in self.ALIASES.iteritems(): - if status.group(1).lower() in names: - return option - return self.STATUS_NEW - - def update_case_title(self, conn, id_, title): - """Update a case title in the database.""" - query = "UPDATE cases SET case_title = ? WHERE case_id = ?" - with conn.cursor() as cursor: - cursor.execute(query, (title, id_)) - log = u"Updated title of case {0} to '{1}'".format(id_, title) - self.logger.debug(log) - - def clerk(self, conn, cases): - """Actually go through cases and modify those to be updated.""" - query = "SELECT volunteer_username FROM volunteers" - with conn.cursor() as cursor: - cursor.execute(query) - volunteers = [name for (name,) in cursor.fetchall()] - notices = [] - for case in cases: - log = u"Clerking case {0} ('{1}')".format(case.id, case.title) - self.logger.debug(log) - if case.status == self.STATUS_UNKNOWN: - self.save_existing_case(conn, case) - else: - notices += self.clerk_case(conn, case, volunteers) - self.logger.debug("Done clerking cases") - return notices - - def clerk_case(self, conn, case, volunteers): - """Clerk a particular case and return a list of any notices to send.""" - notices = [] - signatures = self.read_signatures(case.body) - storedsigs = self.get_signatures_from_db(conn, case) - newsigs = set(signatures) - set(storedsigs) - if any([editor in volunteers for (editor, timestamp) in newsigs]): - case.last_volunteer_size = len(case.body) - - if case.status == self.STATUS_NEW: - notices = self.clerk_new_case(case, volunteers, signatures) - elif case.status == self.STATUS_OPEN: - notices = self.clerk_open_case(case, signatures) - elif case.status == self.STATUS_NEEDASSIST: - notices = self.clerk_needassist_case(case, volunteers, newsigs) - elif case.status == self.STATUS_STALE: - notices = self.clerk_stale_case(case, newsigs) - elif case.status == self.STATUS_REVIEW: - notices = self.clerk_review_case(case) - elif case.status in [self.STATUS_RESOLVED, self.STATUS_CLOSED]: - self.clerk_closed_case(case, signatures) - self.save_case_updates(conn, case, volunteers, signatures, storedsigs) - return notices - - def clerk_new_case(self, case, volunteers, signatures): - """Clerk a case in the "brand new" state. - - The case will be set to "open" if a volunteer edits it, or "needassist" - if it increases by over 15,000 bytes or goes by without any volunteer - edits for two days. - """ - notices = self.notify_parties(case) - if any([editor in volunteers for (editor, timestamp) in signatures]): - self.update_status(case, self.STATUS_OPEN) - else: - age = (datetime.utcnow() - case.file_time).total_seconds() - if age > 60 * 60 * 24 * 2: - self.update_status(case, self.STATUS_NEEDASSIST) - elif len(case.body) - case.last_volunteer_size > 15000: - self.update_status(case, self.STATUS_NEEDASSIST) - return notices - - def clerk_open_case(self, case, signatures): - """Clerk an open case (has been edited by a reviewer). - - The case will be set to "needassist" if 15,000 bytes have been added - since a volunteer last edited, "stale" if no edits have occured in two - days, or "review" if it has been open for over four days. - """ - if self.check_for_review(case): - return [] - if len(case.body) - case.last_volunteer_size > 15000: - self.update_status(case, self.STATUS_NEEDASSIST) - timestamps = [timestamp for (editor, timestamp) in signatures] - if timestamps: - age = (datetime.utcnow() - max(timestamps)).total_seconds() - if age > 60 * 60 * 24 * 2: - self.update_status(case, self.STATUS_STALE) - return [] - - def clerk_needassist_case(self, case, volunteers, newsigs): - """Clerk a "needassist" case (no volunteer edits in 15,000 bytes). - - The case will be set to "open" if a volunteer edits, or "review" if it - has been open for over four days. - """ - if self.check_for_review(case): - return [] - if any([editor in volunteers for (editor, timestamp) in newsigs]): - self.update_status(case, self.STATUS_OPEN) - return [] - - def clerk_stale_case(self, case, newsigs): - """Clerk a stale case (no edits in two days). - - The case will be set to "open" if anyone edits, or "review" if it has - been open for over four days. - """ - if self.check_for_review(case): - return [] - if newsigs: - self.update_status(case, self.STATUS_OPEN) - return [] - - def clerk_review_case(self, case): - """Clerk a "review" case (open for more than four days). - - A message will be set to the "very old notifiee", which is generally - [[User talk:Szhang (WMF)]], if the case has been open for more than - five days. - """ - age = (datetime.utcnow() - case.file_time).total_seconds() - if age > 60 * 60 * 24 * 5: - if not case.very_old_notified: - tmpl = self.tl_notify_stale - title = case.title.replace("|", "|") - template = "{{subst:" + tmpl + "|" + title + "}}" - miss = "".format(title) - notice = _Notice(self.very_old_title, template, miss) - case.very_old_notified = True - msg = u" {0}: will notify [[{1}]] with '{2}'" - log = msg.format(case.id, self.very_old_title, template) - self.logger.debug(log) - return [notice] - return [] - - def clerk_closed_case(self, case, signatures): - """Clerk a closed or resolved case. - - The case will be archived if it has been closed/resolved for more than - one day and no edits have been made in the meantime. "Archiving" is - the process of adding {{DRN archive top}}, {{DRN archive bottom}}, and - removing the [[User:DoNotArchiveUntil]] comment. - """ - if case.close_time == self.min_ts: - case.close_time = datetime.utcnow() - if case.archived: - return - timestamps = [timestamp for (editor, timestamp) in signatures] - closed_age = (datetime.utcnow() - case.close_time).total_seconds() - if timestamps: - modify_age = (datetime.utcnow() - max(timestamps)).total_seconds() - else: - modify_age = 0 - if closed_age > 60 * 60 * 24 and modify_age > 60 * 60 * 24: - arch_top = self.tl_archive_top - arch_bottom = self.tl_archive_bottom - reg = "()?" - if re.search(reg, case.body): - case.body = re.sub("\{\{" + arch_top + "\}\}", "", case.body) - case.body = re.sub(reg, "{{" + arch_top + "}}", case.body) - if not re.search(arch_bottom + "\s*\}\}\s*\Z", case.body): - case.body += "\n{{" + arch_bottom + "}}" - case.archived = True - self.logger.debug(u" {0}: archived case".format(case.id)) - - def check_for_review(self, case): - """Check whether a case is old enough to be set to "review".""" - age = (datetime.utcnow() - case.file_time).total_seconds() - if age > 60 * 60 * 24 * 4: - self.update_status(case, self.STATUS_REVIEW) - return True - return False - - def update_status(self, case, new): - """Safely update the status of a case, so we don't edit war.""" - old_n = self.ALIASES[case.status][0].upper() - new_n = self.ALIASES[new][0].upper() - old_n = "NEW" if not old_n else old_n - new_n = "NEW" if not new_n else new_n - if case.last_action != new: - case.status = new - log = u" {0}: {1} -> {2}" - self.logger.debug(log.format(case.id, old_n, new_n)) - return - log = u"Avoiding {0} {1} -> {2} because we already did this ('{3}')" - self.logger.info(log.format(case.id, old_n, new_n, case.title)) - - def read_signatures(self, text): - """Return a list of all parseable signatures in the body of a case. - - Signatures are returned as tuples of (editor, timestamp as datetime). - """ - regex = r"\[\[(?:User(?:\stalk)?\:|Special\:Contributions\/)" - regex += r"([^\n\[\]|]{,256}?)(?:\||\]\])" - regex += r"(?!.*?(?:User(?:\stalk)?\:|Special\:Contributions\/).*?)" - regex += r".{,256}?(\d{2}:\d{2},\s\d{1,2}\s\w+\s\d{4}\s\(UTC\))" - matches = re.findall(regex, text, re.U|re.I) - signatures = [] - for userlink, stamp in matches: - username = userlink.split("/", 1)[0].replace("_", " ").strip() - username = username[0].upper() + username[1:] - if username == "DoNotArchiveUntil": - continue - stamp = stamp.strip() - timestamp = datetime.strptime(stamp, "%H:%M, %d %B %Y (UTC)") - signatures.append((username, timestamp)) - return signatures - - def get_signatures_from_db(self, conn, case): - """Return a list of signatures in a case from the database. - - The return type is the same as read_signatures(). - """ - query = "SELECT signature_username, signature_timestamp FROM signatures WHERE signature_case = ?" - with conn.cursor() as cursor: - cursor.execute(query, (case.id,)) - return cursor.fetchall() - - def notify_parties(self, case): - """Schedule notices to be sent to all parties of a case.""" - if case.parties_notified: - return [] - - notices = [] - template = "{{subst:" + self.tl_notify_party - template += "|thread=" + case.title + "}} ~~~~" - too_late = "" - - re_parties = "'''Users involved'''(.*?)" - text = re.search(re_parties, case.body, re.S|re.U) - for line in text.group(1).splitlines(): - user = re.search("[:*#]{,5} \{\{User\|(.*?)\}\}", line) - if user: - party = user.group(1).replace("_", " ").strip() - if party: - party = party[0].upper() + party[1:] - if party == case.file_user: - continue - notice = _Notice("User talk:" + party, template, too_late) - notices.append(notice) - - case.parties_notified = True - log = u" {0}: will try to notify {1} parties with '{2}'" - self.logger.debug(log.format(case.id, len(notices), template)) - return notices - - def save_case_updates(self, conn, case, volunteers, sigs, storedsigs): - """Save any updates made to a case and signatures in the database.""" - if case.status != case.original_status: - case.last_action = case.status - new = self.ALIASES[case.status][0] - tl_status_esc = re.escape(self.tl_status) - search = "\{\{" + tl_status_esc + "(\|?.*?)\}\}" - repl = "{{" + self.tl_status + "|" + new + "}}" - case.body = re.sub(search, repl, case.body) - - if sigs: - newest_ts = max([stamp for (user, stamp) in sigs]) - newest_user = [usr for (usr, stamp) in sigs if stamp == newest_ts][0] - case.modify_time = newest_ts - case.modify_user = newest_user - - if any([usr in volunteers for (usr, stamp) in sigs]): - newest_vts = max([stamp for (usr, stamp) in sigs if usr in volunteers]) - newest_vuser = [usr for (usr, stamp) in sigs if stamp == newest_vts][0] - case.volunteer_time = newest_vts - case.volunteer_user = newest_vuser - - if case.new: - self.save_new_case(conn, case) - else: - self.save_existing_case(conn, case) - - with conn.cursor() as cursor: - query1 = "DELETE FROM signatures WHERE signature_case = ? AND signature_username = ? AND signature_timestamp = ?" - query2 = "INSERT INTO signatures (signature_case, signature_username, signature_timestamp) VALUES (?, ?, ?)" - removals = set(storedsigs) - set(sigs) - additions = set(sigs) - set(storedsigs) - if removals: - args = [(case.id, name, stamp) for (name, stamp) in removals] - cursor.executemany(query1, args) - if additions: - args = [] - for name, stamp in additions: - args.append((case.id, name, stamp)) - cursor.executemany(query2, args) - msg = u" {0}: added {1} signatures and removed {2}" - log = msg.format(case.id, len(additions), len(removals)) - self.logger.debug(log) - - def save_new_case(self, conn, case): - """Save a brand new case to the database.""" - args = (case.id, case.title, case.status, case.last_action, - case.file_user, case.file_time, case.modify_user, - case.modify_time, case.volunteer_user, case.volunteer_time, - case.close_time, case.parties_notified, - case.very_old_notified, case.archived, - case.last_volunteer_size) - with conn.cursor() as cursor: - query = "INSERT INTO cases VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" - cursor.execute(query, args) - log = u" {0}: inserted new case into database".format(case.id) - self.logger.debug(log) - - def save_existing_case(self, conn, case): - """Save an existing case to the database, updating as necessary.""" - with conn.cursor(oursql.DictCursor) as cursor: - query = "SELECT * FROM cases WHERE case_id = ?" - cursor.execute(query, (case.id,)) - stored = cursor.fetchone() - - with conn.cursor() as cursor: - changes, args = [], [] - fields_to_check = [ - ("case_status", case.status), - ("case_last_action", case.last_action), - ("case_file_user", case.file_user), - ("case_file_time", case.file_time), - ("case_modify_user", case.modify_user), - ("case_modify_time", case.modify_time), - ("case_volunteer_user", case.volunteer_user), - ("case_volunteer_time", case.volunteer_time), - ("case_close_time", case.close_time), - ("case_parties_notified", case.parties_notified), - ("case_very_old_notified", case.very_old_notified), - ("case_archived", case.archived), - ("case_last_volunteer_size", case.last_volunteer_size) - ] - for column, data in fields_to_check: - if data != stored[column]: - changes.append(column + " = ?") - args.append(data) - msg = u" {0}: will alter {1} ('{2}' -> '{3}')" - log = msg.format(case.id, column, stored[column], data) - self.logger.debug(log) - if changes: - changes = ", ".join(changes) - args.append(case.id) - query = "UPDATE cases SET {0} WHERE case_id = ?".format(changes) - cursor.execute(query, args) - else: - log = u" {0}: no changes to commit".format(case.id) - self.logger.debug(log) - - def save(self, page, cases, kwargs, start): - """Save any changes to the noticeboard.""" - newtext = text = page.get() - counter = 0 - for case in cases: - if case.old != case.body: - newtext = newtext.replace(case.old, case.body) - counter += 1 - if newtext == text: - self.logger.info(u"Nothing to edit on [[{0}]]".format(page.title)) - return True - - worktime = time() - start - if worktime < 60: - log = "Waiting {0} seconds to avoid edit conflicts" - self.logger.debug(log.format(int(60 - worktime))) - sleep(60 - worktime) - page.reload() - if page.get() != text: - log = "Someone has edited the page while we were working; restarting" - self.logger.warn(log) - self.run(**kwargs) - return False - summary = self.clerk_summary.replace("$3", str(counter)) - summary = summary.replace("$4", "" if counter == 1 else "s") - page.edit(newtext, summary, minor=True, bot=True) - log = u"Saved page [[{0}]] ({1} updates)" - self.logger.info(log.format(page.title, counter)) - return True - - def send_notices(self, site, notices): - """Send out any templated notices to users or pages.""" - if not notices: - self.logger.info("No notices to send") - return - for notice in notices: - target, template = notice.target, notice.template - log = u"Trying to notify [[{0}]] with '{1}'" - self.logger.debug(log.format(target, template)) - page = site.get_page(target) - if page.namespace == constants.NS_USER_TALK: - user = site.get_user(target.split(":", 1)[1]) - if not user.exists and not user.is_ip: - log = u"Skipping [[{0}]]; user does not exist and is not an IP" - self.logger.info(log.format(target)) - continue - try: - text = page.get() - except exceptions.PageNotFoundError: - text = "" - if notice.too_late and notice.too_late in text: - log = u"Skipping [[{0}]]; was already notified with '{1}'" - self.logger.info(log.format(page.title, template)) - continue - text += ("\n" if text else "") + template - try: - page.edit(text, self.notify_summary, minor=False, bot=True) - except exceptions.EditError as error: - name, msg = type(error).name, error.message - log = u"Couldn't leave notice on [[{0}]] because of {1}: {2}" - self.logger.error(log.format(page.title, name, msg)) - else: - log = u"Notified [[{0}]] with '{1}'" - self.logger.info(log.format(page.title, template)) - - self.logger.debug("Done sending notices") - - def update_chart(self, conn, site): - """Update the chart of open or recently closed cases.""" - page = site.get_page(self.chart_title) - self.logger.info(u"Updating case status at [[{0}]]".format(page.title)) - statuses = self.compile_chart(conn) - text = page.get() - newtext = re.sub(u"(.*?)", - "\n" + statuses + "\n", - text, flags=re.DOTALL) - if newtext == text: - self.logger.info("Chart unchanged; not saving") - return - - newtext = re.sub("(.*?)", - "~~~ at ~~~~~", - newtext) - page.edit(newtext, self.chart_summary, minor=True, bot=True) - self.logger.info(u"Chart saved to [[{0}]]".format(page.title)) - - def compile_chart(self, conn): - """Actually generate the chart from the database.""" - chart = "{{" + self.tl_chart_header + "|small={{{small|}}}}}\n" - query = "SELECT * FROM cases WHERE case_status != ?" - with conn.cursor(oursql.DictCursor) as cursor: - cursor.execute(query, (self.STATUS_UNKNOWN,)) - for case in cursor: - chart += self.compile_row(case) - chart += "{{" + self.tl_chart_footer + "|small={{{small|}}}}}" - return chart - - def compile_row(self, case): - """Generate a single row of the chart from a dict via the database.""" - data = u"|t={case_title}|d={title}|s={case_status}" - data += "|cu={case_file_user}|cs={file_sortkey}|ct={file_time}" - if case["case_volunteer_user"]: - data += "|vu={case_volunteer_user}|vs={volunteer_sortkey}|vt={volunteer_time}" - case["volunteer_time"] = self.format_time(case["case_volunteer_time"]) - case["volunteer_sortkey"] = int(mktime(case["case_volunteer_time"].timetuple())) - data += "|mu={case_modify_user}|ms={modify_sortkey}|mt={modify_time}" - - title = case["case_title"].replace("_", " ").replace("|", "|") - case["title"] = title[:47] + "..." if len(title) > 50 else title - case["file_time"] = self.format_time(case["case_file_time"]) - case["file_sortkey"] = int(mktime(case["case_file_time"].timetuple())) - case["modify_time"] = self.format_time(case["case_modify_time"]) - case["modify_sortkey"] = int(mktime(case["case_modify_time"].timetuple())) - row = "{{" + self.tl_chart_row + data.format(**case) - return row + "|sm={{{small|}}}}}\n" - - def format_time(self, dt): - """Return a string telling the time since datetime occured.""" - parts = [("year", 31536000), ("day", 86400), ("hour", 3600)] - seconds = int((datetime.utcnow() - dt).total_seconds()) - msg = [] - for name, size in parts: - num = seconds // size - seconds -= num * size - if num: - chunk = "{0} {1}".format(num, name if num == 1 else name + "s") - msg.append(chunk) - return ", ".join(msg) + " ago" if msg else "0 hours ago" - - def purge_old_data(self, conn): - """Delete old cases (> six months) from the database.""" - log = "Purging closed cases older than six months from the database" - self.logger.info(log) - query = """DELETE cases, signatures - FROM cases JOIN signatures ON case_id = signature_case - WHERE case_status = ? - AND case_file_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 180 DAY) - AND case_modify_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 180 DAY) - """ - with conn.cursor() as cursor: - cursor.execute(query, (self.STATUS_UNKNOWN,)) - - -class _Case(object): - """A object representing a dispute resolution case.""" - def __init__(self, id_, title, status, last_action, file_user, file_time, - modify_user, modify_time, volunteer_user, volunteer_time, - close_time, parties_notified, archived, very_old_notified, - last_volunteer_size, new=False): - self.id = id_ - self.title = title - self.status = status - self.last_action = last_action - self.file_user = file_user - self.file_time = file_time - self.modify_user = modify_user - self.modify_time = modify_time - self.volunteer_user = volunteer_user - self.volunteer_time = volunteer_time - self.close_time = close_time - self.parties_notified = parties_notified - self.very_old_notified = very_old_notified - self.archived = archived - self.last_volunteer_size = last_volunteer_size - self.new = new - - self.original_status = status - self.body = None - self.old = None - - -class _Notice(object): - """An object representing a notice to be sent to a user or a page.""" - def __init__(self, target, template, too_late=None): - self.target = target - self.template = template - self.too_late = too_late diff --git a/earwigbot/tasks/image_display_resize.py b/earwigbot/tasks/image_display_resize.py deleted file mode 100644 index 18b622c..0000000 --- a/earwigbot/tasks/image_display_resize.py +++ /dev/null @@ -1,33 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from earwigbot.tasks import Task - -class ImageDisplayResize(Task): - """A task to resize upscaled portraits in infoboxes.""" - name = "image_display_resize" - - def setup(self): - pass - - def run(self, **kwargs): - pass diff --git a/earwigbot/tasks/schema/afc_copyvios.sql b/earwigbot/tasks/schema/afc_copyvios.sql deleted file mode 100644 index 37a6729..0000000 --- a/earwigbot/tasks/schema/afc_copyvios.sql +++ /dev/null @@ -1,36 +0,0 @@ --- MySQL dump 10.13 Distrib 5.5.12, for solaris10 (i386) --- --- Host: sql Database: u_earwig_afc_copyvios --- ------------------------------------------------------ --- Server version 5.1.59 - -CREATE DATABASE `u_earwig_afc_copyvios` - DEFAULT CHARACTER SET utf8 - DEFAULT COLLATE utf8_unicode_ci; - --- --- Table structure for table `cache` --- - -DROP TABLE IF EXISTS `cache`; -CREATE TABLE `cache` ( - `cache_id` int(10) unsigned NOT NULL, - `cache_hash` char(64) COLLATE utf8_unicode_ci DEFAULT NULL, - `cache_url` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, - `cache_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', - `cache_queries` int(4) DEFAULT NULL, - `cache_process_time` float DEFAULT NULL, - PRIMARY KEY (`cache_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; - --- --- Table structure for table `processed` --- - -DROP TABLE IF EXISTS `processed`; -CREATE TABLE `processed` ( - `page_id` int(10) unsigned NOT NULL, - PRIMARY KEY (`page_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; - --- Dump completed on 2012-07-20 20:21:00 diff --git a/earwigbot/tasks/schema/afc_history.sql b/earwigbot/tasks/schema/afc_history.sql deleted file mode 100644 index 4fa20fd..0000000 --- a/earwigbot/tasks/schema/afc_history.sql +++ /dev/null @@ -1,23 +0,0 @@ --- MySQL dump 10.13 Distrib 5.5.12, for solaris10 (i386) --- --- Host: sql Database: u_earwig_afc_history --- ------------------------------------------------------ --- Server version 5.1.59 - -CREATE DATABASE `u_earwig_afc_history` - DEFAULT CHARACTER SET utf8 - DEFAULT COLLATE utf8_unicode_ci; - --- --- Table structure for table `page` --- - -DROP TABLE IF EXISTS `page`; -CREATE TABLE `page` ( - `page_id` int(10) unsigned NOT NULL, - `page_date` varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL, - `page_status` tinyint(3) unsigned DEFAULT NULL, - PRIMARY KEY (`page_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; - --- Dump completed on 2012-07-20 20:20:39 diff --git a/earwigbot/tasks/schema/afc_statistics.sql b/earwigbot/tasks/schema/afc_statistics.sql deleted file mode 100644 index 6e6e2dd..0000000 --- a/earwigbot/tasks/schema/afc_statistics.sql +++ /dev/null @@ -1,68 +0,0 @@ --- MySQL dump 10.13 Distrib 5.5.12, for solaris10 (i386) --- --- Host: sql Database: u_earwig_afc_statistics --- ------------------------------------------------------ --- Server version 5.1.59 - -CREATE DATABASE `u_earwig_afc_statistics` - DEFAULT CHARACTER SET utf8 - DEFAULT COLLATE utf8_unicode_ci; - --- --- Table structure for table `chart` --- - -DROP TABLE IF EXISTS `chart`; -CREATE TABLE `chart` ( - `chart_id` tinyint(3) unsigned NOT NULL AUTO_INCREMENT, - `chart_title` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, - `chart_special_title` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, - PRIMARY KEY (`chart_id`) -) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; - --- --- Dumping data for table `chart` --- - -LOCK TABLES `chart` WRITE; -INSERT INTO `chart` VALUES -(1,'Pending submissions','Submitted'), -(3,'Being reviewed','Reviewer'), -(4,'Recently accepted','Accepted'), -(5,'Recently declined','Declined'), -(6,'Misplaced submissions','Created'); -UNLOCK TABLES; - --- --- Table structure for table `row` --- - -DROP TABLE IF EXISTS `row`; -CREATE TABLE `row` ( - `row_id` int(10) unsigned NOT NULL, - `row_chart` tinyint(3) unsigned DEFAULT NULL, - PRIMARY KEY (`row_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; - --- --- Table structure for table `page` --- - -DROP TABLE IF EXISTS `page`; -CREATE TABLE `page` ( - `page_id` int(10) unsigned NOT NULL, - `page_status` varchar(16) COLLATE utf8_unicode_ci DEFAULT NULL, - `page_title` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, - `page_short` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, - `page_size` varchar(16) COLLATE utf8_unicode_ci DEFAULT NULL, - `page_notes` tinytext COLLATE utf8_unicode_ci, - `page_modify_user` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, - `page_modify_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', - `page_modify_oldid` int(10) unsigned DEFAULT NULL, - `page_special_user` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, - `page_special_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', - `page_special_oldid` int(10) unsigned DEFAULT NULL, - PRIMARY KEY (`page_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; - --- Dump completed on 2012-07-20 20:25:10 diff --git a/earwigbot/tasks/schema/drn_clerkbot.sql b/earwigbot/tasks/schema/drn_clerkbot.sql deleted file mode 100644 index c71c49b..0000000 --- a/earwigbot/tasks/schema/drn_clerkbot.sql +++ /dev/null @@ -1,59 +0,0 @@ --- MySQL dump 10.13 Distrib 5.5.12, for solaris10 (i386) --- --- Host: sql Database: u_earwig_drn_clerkbot --- ------------------------------------------------------ --- Server version 5.1.59 - -CREATE DATABASE `u_earwig_drn_clerkbot` - DEFAULT CHARACTER SET utf8 - DEFAULT COLLATE utf8_unicode_ci; - --- --- Table structure for table `case` --- - -DROP TABLE IF EXISTS `cases`; -CREATE TABLE `cases` ( - `case_id` int(10) unsigned NOT NULL, - `case_title` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, - `case_status` int(2) unsigned DEFAULT NULL, - `case_last_action` int(2) unsigned DEFAULT NULL, - `case_file_user` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, - `case_file_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', - `case_modify_user` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, - `case_modify_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', - `case_volunteer_user` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, - `case_volunteer_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', - `case_close_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', - `case_parties_notified` tinyint(1) unsigned DEFAULT NULL, - `case_very_old_notified` tinyint(1) unsigned DEFAULT NULL, - `case_archived` tinyint(1) unsigned DEFAULT NULL, - `case_last_volunteer_size` int(9) unsigned DEFAULT NULL, - PRIMARY KEY (`case_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; - --- --- Table structure for table `signature` --- - -DROP TABLE IF EXISTS `signatures`; -CREATE TABLE `signatures` ( - `signature_id` int(10) unsigned NOT NULL AUTO_INCREMENT, - `signature_case` int(10) unsigned NOT NULL, - `signature_username` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, - `signature_timestamp` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', - PRIMARY KEY (`signature_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; - --- --- Table structure for table `volunteer` --- - -DROP TABLE IF EXISTS `volunteers`; -CREATE TABLE `volunteers` ( - `volunteer_id` int(10) unsigned NOT NULL AUTO_INCREMENT, - `volunteer_username` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, - PRIMARY KEY (`volunteer_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; - --- Dump completed on 2012-07-31 1:34:28