diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4984243 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.pyc +*.egg +*.egg-info +.DS_Store +build +docs/_build diff --git a/commands/afc_pending.py b/commands/afc_pending.py new file mode 100644 index 0000000..32b1f5d --- /dev/null +++ b/commands/afc_pending.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from earwigbot.commands import Command + +class AFCPending(Command): + """Link the user to the pending AFC submissions page and category.""" + name = "pending" + commands = ["pending", "pend"] + + def process(self, data): + msg1 = "Pending submissions status page: http://enwp.org/WP:AFC/ST" + msg2 = "Pending submissions category: http://enwp.org/CAT:PEND" + self.reply(data, msg1) + self.reply(data, msg2) diff --git a/commands/afc_report.py b/commands/afc_report.py new file mode 100644 index 0000000..14ec082 --- /dev/null +++ b/commands/afc_report.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from earwigbot import wiki +from earwigbot.commands import Command + +class AFCReport(Command): + """Get information about an AFC submission by name.""" + name = "report" + + def process(self, data): + self.site = self.bot.wiki.get_site() + self.data = data + + try: + self.statistics = self.bot.tasks.get("afc_statistics") + except KeyError: + e = "Cannot run command: requires afc_statistics task (from earwigbot_plugins)" + self.logger.error(e) + msg = "command requires afc_statistics task (from earwigbot_plugins)" + self.reply(data, msg) + return + + if not data.args: + msg = "What submission do you want me to give information about?" + self.reply(data, msg) + return + + title = " ".join(data.args) + title = title.replace("http://en.wikipedia.org/wiki/", "") + title = title.replace("http://enwp.org/", "").strip() + + # Given '!report Foo', first try [[Foo]]: + page = self.get_page(title) + if page: + return self.report(page) + + # Then try [[Wikipedia:Articles for creation/Foo]]: + newtitle = "/".join(("Wikipedia:Articles for creation", title)) + page = self.get_page(newtitle) + if page: + return self.report(page) + + # Then try [[Wikipedia talk:Articles for creation/Foo]]: + newtitle = "/".join(("Wikipedia talk:Articles for creation", title)) + page = self.get_page(newtitle) + if page: + return self.report(page) + + self.reply(data, "Submission \x0302{0}\x0F not found.".format(title)) + + def get_page(self, title): + page = self.site.get_page(title, follow_redirects=False) + if page.exists == page.PAGE_EXISTS: + return page + + def report(self, page): + url = page.url.encode("utf8") + url = url.replace("en.wikipedia.org/wiki", "enwp.org") + short = self.statistics.get_short_title(page.title) + status = self.get_status(page) + user = page.get_creator() + user_name = user.name + user_url = user.get_talkpage().url.encode("utf8") + + msg1 = "AfC submission report for \x0302{0}\x0F ({1}):" + msg2 = "Status: \x0303{0}\x0F" + msg3 = "Submitted by \x0302{0}\x0F ({1})" + if status == "accepted": + msg3 = "Reviewed by \x0302{0}\x0F ({1})" + + self.reply(self.data, msg1.format(short, url)) + self.say(self.data.chan, msg2.format(status)) + self.say(self.data.chan, msg3.format(user_name, user_url)) + + def get_status(self, page): + if page.is_redirect: + target = page.get_redirect_target() + if self.site.get_page(target).namespace == wiki.NS_MAIN: + return "accepted" + return "redirect" + + statuses = self.statistics.get_statuses(page.get()) + if "R" in statuses: + return "being reviewed" + elif "H" in statuses: + return "pending draft" + elif "P" in statuses: + return "pending submission" + elif "T" in statuses: + return "unsubmitted draft" + elif "D" in statuses: + return "declined" + return "unkown" diff --git a/commands/afc_status.py b/commands/afc_status.py new file mode 100644 index 0000000..168c719 --- /dev/null +++ b/commands/afc_status.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import re + +from earwigbot.commands import Command + +class AFCStatus(Command): + """Get the number of pending AfC submissions, open redirect requests, and + open file upload requests.""" + name = "status" + commands = ["status", "count", "num", "number"] + hooks = ["join", "msg"] + + def check(self, data): + if data.is_command and data.command in self.commands: + return True + try: + if data.line[1] == "JOIN" and data.chan == "#wikipedia-en-afc": + if data.nick != self.config.irc["frontend"]["nick"]: + return True + except IndexError: + pass + return False + + def process(self, data): + self.site = self.bot.wiki.get_site() + + if data.line[1] == "JOIN": + status = " ".join(("\x02Current status:\x0F", self.get_status())) + self.notice(data.nick, status) + return + + if data.args: + action = data.args[0].lower() + if action.startswith("sub") or action == "s": + subs = self.count_submissions() + msg = "There are \x0305{0}\x0F pending AfC submissions (\x0302WP:AFC\x0F)." + self.reply(data, msg.format(subs)) + + elif action.startswith("redir") or action == "r": + redirs = self.count_redirects() + msg = "There are \x0305{0}\x0F open redirect requests (\x0302WP:AFC/R\x0F)." + self.reply(data, msg.format(redirs)) + + elif action.startswith("file") or action == "f": + files = self.count_redirects() + msg = "There are \x0305{0}\x0F open file upload requests (\x0302WP:FFU\x0F)." + self.reply(data, msg.format(files)) + + elif action.startswith("agg") or action == "a": + try: + agg_num = int(data.args[1]) + except IndexError: + agg_data = (self.count_submissions(), + self.count_redirects(), self.count_files()) + agg_num = self.get_aggregate_number(agg_data) + except ValueError: + msg = "\x0303{0}\x0F isn't a number!" + self.reply(data, msg.format(data.args[1])) + return + aggregate = self.get_aggregate(agg_num) + msg = "Aggregate is \x0305{0}\x0F (AfC {1})." + self.reply(data, msg.format(agg_num, aggregate)) + + elif action.startswith("nocolor") or action == "n": + self.reply(data, self.get_status(color=False)) + + else: + msg = "Unknown argument: \x0303{0}\x0F. Valid args are 'subs', 'redirs', 'files', 'agg', 'nocolor'." + self.reply(data, msg.format(data.args[0])) + + else: + self.reply(data, self.get_status()) + + def get_status(self, color=True): + subs = self.count_submissions() + redirs = self.count_redirects() + files = self.count_files() + agg_num = self.get_aggregate_number((subs, redirs, files)) + aggregate = self.get_aggregate(agg_num) + + if color: + msg = "Articles for creation {0} (\x0302AFC\x0F: \x0305{1}\x0F; \x0302AFC/R\x0F: \x0305{2}\x0F; \x0302FFU\x0F: \x0305{3}\x0F)." + else: + msg = "Articles for creation {0} (AFC: {1}; AFC/R: {2}; FFU: {3})." + return msg.format(aggregate, subs, redirs, files) + + def count_submissions(self): + """Returns the number of open AFC submissions (count of CAT:PEND).""" + # Subtract two for [[Wikipedia:Articles for creation/Redirects]] and + # [[Wikipedia:Files for upload]], which aren't real submissions: + return self.site.get_category("Pending AfC submissions").pages - 2 + + def count_redirects(self): + """Returns the number of open redirect submissions. Calculated as the + total number of submissions minus the closed ones.""" + title = "Wikipedia:Articles for creation/Redirects" + content = self.site.get_page(title).get() + total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE)) + closed = content.lower().count("{{afc-c|b}}") + redirs = total - closed + return redirs + + def count_files(self): + """Returns the number of open WP:FFU (Files For Upload) requests. + Calculated as the total number of requests minus the closed ones.""" + content = self.site.get_page("Wikipedia:Files for upload").get() + total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE)) + closed = content.lower().count("{{ifu-c|b}}") + files = total - closed + return files + + def get_aggregate(self, num): + """Returns a human-readable AFC status based on the number of pending + AFC submissions, open redirect requests, and open FFU requests. This + does not match {{AFC status}} directly because the algorithm factors in + WP:AFC/R and WP:FFU while the template only looks at the main + submissions. The reasoning is that AFC/R and FFU are still part of + the project, so even if there are no pending submissions, a backlog at + FFU (for example) indicates that our work is *not* done and the + project-wide backlog is most certainly *not* clear.""" + if num == 0: + return "is \x02\x0303clear\x0F" + elif num <= 200: + return "is \x0303almost clear\x0F" + elif num <= 400: + return "is \x0312normal\x0F" + elif num <= 600: + return "is \x0307lightly backlogged\x0F" + elif num <= 900: + return "is \x0304backlogged\x0F" + elif num <= 1200: + return "is \x02\x0304heavily backlogged\x0F" + else: + return "is \x02\x1F\x0304severely backlogged\x0F" + + def get_aggregate_number(self, (subs, redirs, files)): + """Returns an 'aggregate number' based on the real number of pending + submissions in CAT:PEND (subs), open redirect submissions in WP:AFC/R + (redirs), and open files-for-upload requests in WP:FFU (files).""" + num = subs + (redirs / 2) + (files / 2) + return num diff --git a/commands/afc_submissions.py b/commands/afc_submissions.py new file mode 100644 index 0000000..3c40774 --- /dev/null +++ b/commands/afc_submissions.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from earwigbot.commands import Command + +class AFCSubmissions(Command): + """Link the user directly to some pending AFC submissions.""" + name = "submissions" + commands = ["submissions", "subs"] + + def setup(self): + try: + self.ignore_list = self.config.commands[self.name]["ignoreList"] + except KeyError: + try: + ignores = self.config.tasks["afc_statistics"]["ignoreList"] + self.ignore_list = ignores + except KeyError: + self.ignore_list = [] + + def process(self, data): + if data.args: + try: + number = int(data.args[0]) + except ValueError: + self.reply(data, "Argument must be a number.") + return + if number > 5: + msg = "Cannot get more than five submissions at a time." + self.reply(data, msg) + return + else: + number = 3 + + site = self.bot.wiki.get_site() + category = site.get_category("Pending AfC submissions") + members = category.get_members(limit=number + len(self.ignore_list)) + urls = [member.url.encode("utf8") for member in members if member.title not in self.ignore_list] + pages = ", ".join(urls[:number]) + self.reply(data, "{0} pending AfC subs: {1}".format(number, pages)) diff --git a/commands/geolocate.py b/commands/geolocate.py new file mode 100644 index 0000000..6bb8327 --- /dev/null +++ b/commands/geolocate.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import json +import urllib2 + +from earwigbot.commands import Command + +class Geolocate(Command): + """Geolocate an IP address (via http://ipinfodb.com/).""" + name = "geolocate" + commands = ["geolocate", "locate", "geo", "ip"] + + def setup(self): + self.config.decrypt(self.config.commands, self.name, "apiKey") + try: + self.key = self.config.commands[self.name]["apiKey"] + except KeyError: + self.key = None + log = 'Cannot use without an API key for http://ipinfodb.com/ stored as config.commands["{0}"]["apiKey"]' + self.logger.warn(log.format(self.name)) + + def process(self, data): + if not data.args: + self.reply(data, "Please specify an IP to lookup.") + return + + if not self.key: + msg = 'I need an API key for http://ipinfodb.com/ stored as \x0303config.commands["{0}"]["apiKey"]\x0F.' + log = 'Need an API key for http://ipinfodb.com/ stored as config.commands["{0}"]["apiKey"]' + self.reply(data, msg.format(self.name) + ".") + self.logger.error(log.format(self.name)) + return + + address = data.args[0] + url = "http://api.ipinfodb.com/v3/ip-city/?key={0}&ip={1}&format=json" + query = urllib2.urlopen(url.format(self.key, address)).read() + res = json.loads(query) + + country = res["countryName"].title() + region = res["regionName"].title() + city = res["cityName"].title() + latitude = res["latitude"] + longitude = res["longitude"] + utcoffset = res["timeZone"] + if not country and not region and not city: + self.reply(data, "IP \x0302{0}\x0F not found.".format(address)) + return + if country == "-" and region == "-" and city == "-": + self.reply(data, "IP \x0302{0}\x0F is reserved.".format(address)) + return + + msg = "{0}, {1}, {2} ({3}, {4}), UTC {5}" + geo = msg.format(country, region, city, latitude, longitude, utcoffset) + self.reply(data, geo) diff --git a/commands/git_command.py b/commands/git_command.py new file mode 100644 index 0000000..cc6cbe5 --- /dev/null +++ b/commands/git_command.py @@ -0,0 +1,240 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import time + +import git + +from earwigbot.commands import Command + +class Git(Command): + """Commands to interface with the bot's git repository; use '!git' for a + sub-command list.""" + name = "git" + + def setup(self): + try: + self.repos = self.config.commands[self.name]["repos"] + except KeyError: + self.repos = None + + def process(self, data): + self.data = data + if not self.config.irc["permissions"].is_owner(data): + msg = "You must be a bot owner to use this command." + self.reply(data, msg) + return + if not data.args or data.args[0] == "help": + self.do_help() + return + if not self.repos: + self.reply(data, "No repos are specified in the config file.") + return + + command = data.args[0] + try: + repo_name = data.args[1] + except IndexError: + repos = self.get_repos() + msg = "Which repo do you want to work with (options are {0})?" + self.reply(data, msg.format(repos)) + return + if repo_name not in self.repos: + repos = self.get_repos() + msg = "Repository must be one of the following: {0}." + self.reply(data, msg.format(repos)) + return + self.repo = git.Repo(self.repos[repo_name]) + + if command == "branch": + self.do_branch() + elif command == "branches": + self.do_branches() + elif command == "checkout": + self.do_checkout() + elif command == "delete": + self.do_delete() + elif command == "pull": + self.do_pull() + elif command == "status": + self.do_status() + else: # They asked us to do something we don't know + msg = "Unknown argument: \x0303{0}\x0F.".format(data.args[0]) + self.reply(data, msg) + + def get_repos(self): + data = self.repos.iteritems() + repos = ["\x0302{0}\x0F ({1})".format(k, v) for k, v in data] + return ", ".join(repos) + + def get_remote(self): + try: + remote_name = self.data.args[2] + except IndexError: + remote_name = "origin" + try: + return getattr(self.repo.remotes, remote_name) + except AttributeError: + msg = "Unknown remote: \x0302{0}\x0F.".format(remote_name) + self.reply(self.data, msg) + + def get_time_since(self, date): + diff = time.mktime(time.gmtime()) - date + if diff < 60: + return "{0} seconds".format(int(diff)) + if diff < 60 * 60: + return "{0} minutes".format(int(diff / 60)) + if diff < 60 * 60 * 24: + return "{0} hours".format(int(diff / 60 / 60)) + return "{0} days".format(int(diff / 60 / 60 / 24)) + + def do_help(self): + """Display all commands.""" + help = { + "branch": "get current branch", + "branches": "get all branches", + "checkout": "switch branches", + "delete": "delete an old branch", + "pull": "update everything from the remote server", + "status": "check if we are up-to-date", + } + subcommands = "" + for key in sorted(help.keys()): + subcommands += "\x0303{0}\x0F ({1}), ".format(key, help[key]) + subcommands = subcommands[:-2] # Trim last comma and space + msg = "Sub-commands are: {0}; repos are: {1}. Syntax: !git \x0303subcommand\x0F \x0302repo\x0F." + self.reply(self.data, msg.format(subcommands, self.get_repos())) + + def do_branch(self): + """Get our current branch.""" + branch = self.repo.active_branch.name + msg = "Currently on branch \x0302{0}\x0F.".format(branch) + self.reply(self.data, msg) + + def do_branches(self): + """Get a list of branches.""" + branches = [branch.name for branch in self.repo.branches] + msg = "Branches: \x0302{0}\x0F.".format(", ".join(branches)) + self.reply(self.data, msg) + + def do_checkout(self): + """Switch branches.""" + try: + target = self.data.args[2] + except IndexError: # No branch name provided + self.reply(self.data, "Wwitch to which branch?") + return + + current_branch = self.repo.active_branch.name + if target == current_branch: + msg = "Already on \x0302{0}\x0F!".format(target) + self.reply(self.data, msg) + return + + try: + ref = getattr(self.repo.branches, target) + except AttributeError: + msg = "Branch \x0302{0}\x0F doesn't exist!".format(target) + self.reply(self.data, msg) + else: + ref.checkout() + ms = "Switched from branch \x0302{0}\x0F to \x0302{1}\x0F." + msg = ms.format(current_branch, target) + self.reply(self.data, msg) + log = "{0} checked out branch {1} of {2}" + logmsg = log.format(self.data.nick, target, self.repo.working_dir) + self.logger.info(logmsg) + + def do_delete(self): + """Delete a branch, while making sure that we are not already on it.""" + try: + target = self.data.args[2] + except IndexError: # No branch name provided + self.reply(self.data, "Delete which branch?") + return + + current_branch = self.repo.active_branch.name + if current_branch == target: + msg = "You're currently on this branch; please checkout to a different branch before deleting." + self.reply(self.data, msg) + return + + try: + ref = getattr(self.repo.branches, target) + except AttributeError: + msg = "Branch \x0302{0}\x0F doesn't exist!".format(target) + self.reply(self.data, msg) + else: + self.repo.git.branch("-d", ref) + msg = "Branch \x0302{0}\x0F has been deleted locally." + self.reply(self.data, msg.format(target)) + log = "{0} deleted branch {1} of {2}" + logmsg = log.format(self.data.nick, target, self.repo.working_dir) + self.logger.info(logmsg) + + def do_pull(self): + """Pull from our remote repository.""" + branch = self.repo.active_branch.name + msg = "Pulling from remote (currently on \x0302{0}\x0F)..." + self.reply(self.data, msg.format(branch)) + + remote = self.get_remote() + if not remote: + return + result = remote.pull() + updated = [info for info in result if info.flags != info.HEAD_UPTODATE] + + if updated: + branches = ", ".join([info.ref.remote_head for info in updated]) + msg = "Done; updates to \x0302{0}\x0F (from {1})." + self.reply(self.data, msg.format(branches, remote.url)) + log = "{0} pulled {1} of {2} (updates to {3})" + self.logger.info(log.format(self.data.nick, remote.name, + self.repo.working_dir, branches)) + else: + self.reply(self.data, "Done; no new changes.") + log = "{0} pulled {1} of {2} (no updates)" + self.logger.info(log.format(self.data.nick, remote.name, + self.repo.working_dir)) + + def do_status(self): + """Check if we have anything to pull.""" + remote = self.get_remote() + if not remote: + return + since = self.get_time_since(self.repo.head.object.committed_date) + result = remote.fetch(dry_run=True) + updated = [info for info in result if info.flags != info.HEAD_UPTODATE] + + if updated: + branches = ", ".join([info.ref.remote_head for info in updated]) + msg = "Last local commit was \x02{0}\x0F ago; updates to \x0302{1}\x0F." + self.reply(self.data, msg.format(since, branches)) + log = "{0} got status of {1} of {2} (updates to {3})" + self.logger.info(log.format(self.data.nick, remote.name, + self.repo.working_dir, branches)) + else: + msg = "Last commit was \x02{0}\x0F ago. Local copy is up-to-date with remote." + self.reply(self.data, msg.format(since)) + log = "{0} pulled {1} of {2} (no updates)" + self.logger.info(log.format(self.data.nick, remote.name, + self.repo.working_dir)) diff --git a/commands/praise.py b/commands/praise.py new file mode 100644 index 0000000..8c6c706 --- /dev/null +++ b/commands/praise.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from earwigbot.commands import Command + +class Praise(Command): + """Praise people!""" + name = "praise" + + def setup(self): + try: + self.praises = self.config.commands[self.name]["praises"] + except KeyError: + self.praises = [] + + def check(self, data): + check = data.command == "praise" or data.command in self.praises + return data.is_command and check + + def process(self, data): + if data.command in self.praises: + msg = self.praises[data.command] + self.say(data.chan, msg) + return + if not data.args: + msg = "You use this command to praise certain people. Who they are is a secret." + else: + msg = "You're doing it wrong." + self.reply(data, msg) diff --git a/tasks/afc_catdelink.py b/tasks/afc_catdelink.py new file mode 100644 index 0000000..5600003 --- /dev/null +++ b/tasks/afc_catdelink.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from earwigbot.tasks import Task + +class AFCCatDelink(Task): + """A task to delink mainspace categories in declined [[WP:AFC]] + submissions.""" + name = "afc_catdelink" + + def setup(self): + pass + + def run(self, **kwargs): + pass diff --git a/tasks/afc_copyvios.py b/tasks/afc_copyvios.py new file mode 100644 index 0000000..c13dce3 --- /dev/null +++ b/tasks/afc_copyvios.py @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from hashlib import sha256 +from os.path import expanduser +from threading import Lock +from urllib import quote + +import oursql + +from earwigbot.tasks import Task + +class AFCCopyvios(Task): + """A task to check newly-edited [[WP:AFC]] submissions for copyright + violations.""" + name = "afc_copyvios" + number = 1 + + def setup(self): + cfg = self.config.tasks.get(self.name, {}) + self.template = cfg.get("template", "AfC suspected copyvio") + self.ignore_list = cfg.get("ignoreList", []) + self.min_confidence = cfg.get("minConfidence", 0.5) + self.max_queries = cfg.get("maxQueries", 10) + self.cache_results = cfg.get("cacheResults", False) + default_summary = "Tagging suspected [[WP:COPYVIO|copyright violation]] of {url}." + self.summary = self.make_summary(cfg.get("summary", default_summary)) + + # Connection data for our SQL database: + kwargs = cfg.get("sql", {}) + kwargs["read_default_file"] = expanduser("~/.my.cnf") + self.conn_data = kwargs + self.db_access_lock = Lock() + + def run(self, **kwargs): + """Entry point for the bot task. + + Takes a page title in kwargs and checks it for copyvios, adding + {{self.template}} at the top if a copyvio has been detected. A page is + only checked once (processed pages are stored by page_id in an SQL + database). + """ + if self.shutoff_enabled(): + return + title = kwargs["page"] + page = self.bot.wiki.get_site().get_page(title) + with self.db_access_lock: + self.conn = oursql.connect(**self.conn_data) + self.process(page) + + def process(self, page): + """Detect copyvios in 'page' and add a note if any are found.""" + title = page.title + if title in self.ignore_list: + msg = u"Skipping page in ignore list: [[{0}]]" + self.logger.info(msg.format(title)) + return + + pageid = page.pageid + if self.has_been_processed(pageid): + msg = u"Skipping check on already processed page [[{0}]]" + self.logger.info(msg.format(title)) + return + + self.logger.info(u"Checking [[{0}]]".format(title)) + result = page.copyvio_check(self.min_confidence, self.max_queries) + url = result.url + orig_conf = "{0}%".format(round(result.confidence * 100, 2)) + + if result.violation: + # Things can change in the minute that it takes to do a check. + # Confirm that a violation still holds true: + page.load() + confirm = page.copyvio_compare(url, self.min_confidence) + new_conf = "{0}%".format(round(confirm.confidence * 100, 2)) + if not confirm.violation: + msg = u"A violation was detected in [[{0}]], but couldn't be confirmed." + msg += u" It may have just been edited (best: {1} at {2} -> {3} confidence)" + self.logger.info(msg.format(title, url, orig_conf, new_conf)) + + safeurl = quote(url.encode("utf8"), safe="/:").decode("utf8") + content = page.get() + template = u"\{\{{0}|url={1}|confidence={2}\}\}\n" + template = template.format(self.template, safeurl, new_conf) + newtext = template + content + if "{url}" in self.summary: + page.edit(newtext, self.summary.format(url=url)) + else: + page.edit(newtext, self.summary) + msg = u"Found violation: [[{0}]] -> {1} ({2} confidence)" + self.logger.info(msg.format(title, url, new_conf)) + else: + msg = u"No violations detected in [[{0}]] (best: {1} at {2} confidence)" + self.logger.info(msg.format(title, url, orig_conf)) + + self.log_processed(pageid) + if self.cache_results: + self.cache_result(page, result) + + def has_been_processed(self, pageid): + """Returns True if pageid was processed before, otherwise False.""" + query = "SELECT 1 FROM processed WHERE page_id = ?" + with self.conn.cursor() as cursor: + cursor.execute(query, (pageid,)) + results = cursor.fetchall() + return True if results else False + + def log_processed(self, pageid): + """Adds pageid to our database of processed pages. + + Raises an exception if the page has already been processed. + """ + query = "INSERT INTO processed VALUES (?)" + with self.conn.cursor() as cursor: + cursor.execute(query, (pageid,)) + + def cache_result(self, page, result): + """Store the check's result in a cache table temporarily. + + The cache contains the page's ID, a hash of its content, the URL of the + best match, the time of caching, and the number of queries used. It + will replace any existing cache entries for that page. + + The cache is intended for EarwigBot's complementary Toolserver web + interface, in which copyvio checks can be done separately from the bot. + The cache saves time and money by saving the result of the web search + but neither the result of the comparison nor any actual text (which + could violate data retention policy). Cache entries are (intended to + be) retained for three days; this task does not remove old entries + (that is handled by the Toolserver component). + + This will only be called if ``cache_results == True`` in the task's + config, which is ``False`` by default. + """ + pageid = page.pageid + hash = sha256(page.get()).hexdigest() + query1 = "SELECT 1 FROM cache WHERE cache_id = ?" + query2 = "DELETE FROM cache WHERE cache_id = ?" + query3 = "INSERT INTO cache VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?, ?)" + with self.conn.cursor() as cursor: + cursor.execute(query1, (pageid,)) + if cursor.fetchall(): + cursor.execute(query2, (pageid,)) + args = (pageid, hash, result.url, result.queries, 0) + cursor.execute(query3, args) diff --git a/tasks/afc_dailycats.py b/tasks/afc_dailycats.py new file mode 100644 index 0000000..dc8e769 --- /dev/null +++ b/tasks/afc_dailycats.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from earwigbot.tasks import Task + +class AFCDailyCats(Task): + """A task to create daily categories for [[WP:AFC]].""" + name = "afc_dailycats" + number = 3 + + def setup(self): + pass + + def run(self, **kwargs): + pass diff --git a/tasks/afc_history.py b/tasks/afc_history.py new file mode 100644 index 0000000..d623e31 --- /dev/null +++ b/tasks/afc_history.py @@ -0,0 +1,227 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from collections import OrderedDict +from datetime import datetime, timedelta +from itertools import count +from os.path import expanduser +from threading import Lock +from time import sleep + +from matplotlib import pyplot as plt +from numpy import arange +import oursql + +from earwigbot import wiki +from earwigbot.tasks import Task + +class AFCHistory(Task): + """A task to generate charts about AfC submissions over time. + + The main function of the task is to work through the "AfC submissions by + date" categories (e.g. [[Category:AfC submissions by date/12 July 2011]]) + and determine the number of declined, accepted, and currently pending + submissions every day. + + This information is saved to a MySQL database ("u_earwig_afc_history") and + used to generate a graph showing the number of AfC submissions by date + with matplotlib and numpy. The chart is saved as a PNG to + config.tasks["afc_history"]["graph"]["dest"], which defaults to + "afc_history.png". + """ + name = "afc_history" + + # Valid submission statuses: + STATUS_NONE = 0 + STATUS_PEND = 1 + STATUS_DECLINE = 2 + STATUS_ACCEPT = 3 + + def setup(self): + cfg = self.config.tasks.get(self.name, {}) + self.num_days = cfg.get("days", 90) + self.categories = cfg.get("categories", {}) + + # Graph stuff: + self.graph = cfg.get("graph", {}) + self.destination = self.graph.get("dest", "afc_history.png") + + # Connection data for our SQL database: + kwargs = cfg.get("sql", {}) + kwargs["read_default_file"] = expanduser("~/.my.cnf") + self.conn_data = kwargs + self.db_access_lock = Lock() + + def run(self, **kwargs): + self.site = self.bot.wiki.get_site() + with self.db_access_lock: + self.conn = oursql.connect(**self.conn_data) + + action = kwargs.get("action") + try: + num_days = int(kwargs.get("days", self.num_days)) + if action == "update": + self.update(num_days) + elif action == "generate": + self.generate(num_days) + finally: + self.conn.close() + + def update(self, num_days): + self.logger.info("Updating past {0} days".format(num_days)) + generator = self.backwards_cat_iterator() + for i in xrange(num_days): + category = generator.next() + date = category.title.split("/")[-1] + self.update_date(date, category) + sleep(10) + self.logger.info("Update complete") + + def generate(self, num_days): + self.logger.info("Generating chart for past {0} days".format(num_days)) + data = OrderedDict() + generator = self.backwards_cat_iterator() + for i in xrange(num_days): + category = generator.next() + date = category.title.split("/")[-1] + data[date] = self.get_date_counts(date) + + data = OrderedDict(reversed(data.items())) # Oldest to most recent + self.generate_chart(data) + dest = expanduser(self.destination) + plt.savefig(dest) + self.logger.info("Chart saved to {0}".format(dest)) + + def backwards_cat_iterator(self): + date_base = self.categories["dateBase"] + current = datetime.utcnow() + while 1: + subcat = current.strftime("%d %B %Y") + title = "/".join((date_base, subcat)) + yield self.site.get_category(title) + current -= timedelta(1) # Subtract one day from date + + def update_date(self, date, category): + msg = "Updating {0} ([[{1}]])".format(date, category.title) + self.logger.debug(msg) + + q_select = "SELECT page_date, page_status FROM page WHERE page_id = ?" + q_delete = "DELETE FROM page WHERE page_id = ?" + q_update = "UPDATE page SET page_date = ?, page_status = ? WHERE page_id = ?" + q_insert = "INSERT INTO page VALUES (?, ?, ?)" + members = category.get_members() + + with self.conn.cursor() as cursor: + for title, pageid in members: + cursor.execute(q_select, (pageid,)) + stored = cursor.fetchall() + status = self.get_status(title, pageid) + + if status == self.STATUS_NONE: + if stored: + cursor.execute(q_delete, (pageid,)) + continue + + if stored: + stored_date, stored_status = list(stored)[0] + if date != stored_date or status != stored_status: + cursor.execute(q_update, (date, status, pageid)) + + else: + cursor.execute(q_insert, (pageid, date, status)) + + def get_status(self, title, pageid): + page = self.site.get_page(title) + ns = page.namespace + + if ns == wiki.NS_FILE_TALK: # Ignore accepted FFU requests + return self.STATUS_NONE + + if ns == wiki.NS_TALK: + new_page = page.toggle_talk() + sleep(2) + if new_page.is_redirect: + return self.STATUS_NONE # Ignore accepted AFC/R requests + return self.STATUS_ACCEPT + + cats = self.categories + sq = self.site.sql_query + query = "SELECT 1 FROM categorylinks WHERE cl_to = ? AND cl_from = ?" + match = lambda cat: list(sq(query, (cat.replace(" ", "_"), pageid))) + + if match(cats["pending"]): + return self.STATUS_PEND + elif match(cats["unsubmitted"]): + return self.STATUS_NONE + elif match(cats["declined"]): + return self.STATUS_DECLINE + return self.STATUS_NONE + + def get_date_counts(self, date): + query = "SELECT COUNT(*) FROM page WHERE page_date = ? AND page_status = ?" + statuses = [self.STATUS_PEND, self.STATUS_DECLINE, self.STATUS_ACCEPT] + counts = {} + with self.conn.cursor() as cursor: + for status in statuses: + cursor.execute(query, (date, status)) + count = cursor.fetchall()[0][0] + counts[status] = count + return counts + + def generate_chart(self, data): + plt.title(self.graph.get("title", "AfC submissions by date")) + plt.xlabel(self.graph.get("xaxis", "Date")) + plt.ylabel(self.graph.get("yaxis", "Submissions")) + + pends = [d[self.STATUS_PEND] for d in data.itervalues()] + declines = [d[self.STATUS_DECLINE] for d in data.itervalues()] + accepts = [d[self.STATUS_ACCEPT] for d in data.itervalues()] + pends_declines = [p + d for p, d in zip(pends, declines)] + ind = arange(len(data)) + xsize = self.graph.get("xsize", 1200) + ysize = self.graph.get("ysize", 900) + width = self.graph.get("width", 1) + xstep = self.graph.get("xAxisStep", 6) + pcolor = self.graph.get("pendingColor", "#f0e460") + dcolor = self.graph.get("declinedColor", "#f291a6") + acolor = self.graph.get("acceptedColor", "#81fc4c") + + p1 = plt.bar(ind, pends, width, color=pcolor) + p2 = plt.bar(ind, declines, width, color=dcolor, bottom=pends) + p3 = plt.bar(ind, accepts, width, color=acolor, bottom=pends_declines) + + xticks = arange(xstep-1, ind.size+xstep-1, xstep) + width/2.0 + xlabels = [d for c, d in zip(count(1), data.keys()) if not c % xstep] + plt.xticks(xticks, xlabels) + plt.yticks(arange(0, plt.ylim()[1], 10)) + plt.tick_params(direction="out") + + leg = plt.legend((p1[0], p2[0], p3[0]), ("Pending", "Declined", + "Accepted"), loc="upper left", fancybox=True) + leg.get_frame().set_alpha(0.5) + + fig = plt.gcf() + fig.set_size_inches(xsize/100, ysize/100) + fig.autofmt_xdate() + + ax = plt.gca() + ax.yaxis.grid(True) diff --git a/tasks/afc_statistics.py b/tasks/afc_statistics.py new file mode 100644 index 0000000..55444d6 --- /dev/null +++ b/tasks/afc_statistics.py @@ -0,0 +1,739 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from datetime import datetime +import re +from os.path import expanduser +from threading import Lock +from time import sleep + +import oursql + +from earwigbot import exceptions +from earwigbot import wiki +from earwigbot.tasks import Task + +class AFCStatistics(Task): + """A task to generate statistics for WikiProject Articles for Creation. + + Statistics are stored in a MySQL database ("u_earwig_afc_statistics") + accessed with oursql. Statistics are synchronied with the live database + every four minutes and saved once an hour, on the hour, to self.pagename. + In the live bot, this is "Template:AFC statistics". + """ + name = "afc_statistics" + number = 2 + + # Chart status number constants: + CHART_NONE = 0 + CHART_PEND = 1 + CHART_DRAFT = 2 + CHART_REVIEW = 3 + CHART_ACCEPT = 4 + CHART_DECLINE = 5 + CHART_MISPLACE = 6 + + def setup(self): + self.cfg = cfg = self.config.tasks.get(self.name, {}) + + # Set some wiki-related attributes: + self.pagename = cfg.get("page", "Template:AFC statistics") + self.pending_cat = cfg.get("pending", "Pending AfC submissions") + self.ignore_list = cfg.get("ignoreList", []) + default_summary = "Updating statistics for [[WP:WPAFC|WikiProject Articles for creation]]." + self.summary = self.make_summary(cfg.get("summary", default_summary)) + + # Templates used in chart generation: + templates = cfg.get("templates", {}) + self.tl_header = templates.get("header", "AFC statistics/header") + self.tl_row = templates.get("row", "AFC statistics/row") + self.tl_footer = templates.get("footer", "AFC statistics/footer") + + # Connection data for our SQL database: + kwargs = cfg.get("sql", {}) + kwargs["read_default_file"] = expanduser("~/.my.cnf") + self.conn_data = kwargs + self.db_access_lock = Lock() + + def run(self, **kwargs): + """Entry point for a task event. + + Depending on the kwargs passed, we will either synchronize our local + statistics database with the site (self.sync()) or save it to the wiki + (self.save()). We will additionally create an SQL connection with our + local database. + """ + action = kwargs.get("action") + if not self.db_access_lock.acquire(False): # Non-blocking + if action == "sync": + self.logger.info("A sync is already ongoing; aborting") + return + self.logger.info("Waiting for database access lock") + self.db_access_lock.acquire() + + try: + self.site = self.bot.wiki.get_site() + self.conn = oursql.connect(**self.conn_data) + try: + if action == "save": + self.save(kwargs) + elif action == "sync": + self.sync(kwargs) + elif action == "update": + self.update(kwargs) + finally: + self.conn.close() + finally: + self.db_access_lock.release() + + def save(self, kwargs): + """Save our local statistics to the wiki. + + After checking for emergency shutoff, the statistics chart is compiled, + and then saved to self.pagename using self.summary iff it has changed + since last save. + """ + self.logger.info("Saving chart") + if kwargs.get("fromIRC"): + summary = self.summary + " (!earwigbot)" + else: + if self.shutoff_enabled(): + return + summary = self.summary + + statistics = self.compile_charts() + + page = self.site.get_page(self.pagename) + text = page.get() + newtext = re.sub(u"(.*?)", + "\n" + statistics + "\n", + text, flags=re.DOTALL) + if newtext == text: + self.logger.info("Chart unchanged; not saving") + return # Don't edit the page if we're not adding anything + + newtext = re.sub("(.*?)", + "~~~ at ~~~~~", + newtext) + page.edit(newtext, summary, minor=True, bot=True) + self.logger.info(u"Chart saved to [[{0}]]".format(page.title)) + + def compile_charts(self): + """Compile and return all statistics information from our local db.""" + stats = "" + with self.conn.cursor() as cursor: + cursor.execute("SELECT * FROM chart") + for chart in cursor: + stats += self.compile_chart(chart) + "\n" + return stats[:-1] # Drop the last newline + + def compile_chart(self, chart_info): + """Compile and return a single statistics chart.""" + chart_id, chart_title, special_title = chart_info + + chart = self.tl_header + "|" + chart_title + if special_title: + chart += "|" + special_title + chart = "{{" + chart + "}}" + + query = "SELECT * FROM page JOIN row ON page_id = row_id WHERE row_chart = ?" + with self.conn.cursor(oursql.DictCursor) as cursor: + cursor.execute(query, (chart_id,)) + for page in cursor: + chart += "\n" + self.compile_chart_row(page) + + chart += "\n{{" + self.tl_footer + "}}" + return chart + + def compile_chart_row(self, page): + """Compile and return a single chart row. + + 'page' is a dict of page information, taken as a row from the page + table, where keys are column names and values are their cell contents. + """ + row = u"{0}|s={page_status}|t={page_title}|h={page_short}|z={page_size}|" + if page["page_special_oldid"]: + row += "sr={page_special_user}|sd={page_special_time}|si={page_special_oldid}|" + row += "mr={page_modify_user}|md={page_modify_time}|mi={page_modify_oldid}" + + page["page_special_time"] = self.format_time(page["page_special_time"]) + page["page_modify_time"] = self.format_time(page["page_modify_time"]) + + if page["page_notes"]: + row += "|n=1{page_notes}" + + return "{{" + row.format(self.tl_row, **page) + "}}" + + def format_time(self, dt): + """Format a datetime into the standard MediaWiki timestamp format.""" + return dt.strftime("%H:%M, %d %b %Y") + + def sync(self, kwargs): + """Synchronize our local statistics database with the site. + + Syncing involves, in order, updating tracked submissions that have + been changed since last sync (self.update_tracked()), adding pending + submissions that are not tracked (self.add_untracked()), and removing + old submissions from the database (self.delete_old()). + + The sync will be canceled if SQL replication lag is greater than 600 + seconds, because this will lead to potential problems and outdated + data, not to mention putting demand on an already overloaded server. + Giving sync the kwarg "ignore_replag" will go around this restriction. + """ + self.logger.info("Starting sync") + + replag = self.site.get_replag() + self.logger.debug("Server replag is {0}".format(replag)) + if replag > 600 and not kwargs.get("ignore_replag"): + msg = "Sync canceled as replag ({0} secs) is greater than ten minutes" + self.logger.warn(msg.format(replag)) + return + + with self.conn.cursor() as cursor: + self.update_tracked(cursor) + self.add_untracked(cursor) + self.delete_old(cursor) + + self.logger.info("Sync completed") + + def update_tracked(self, cursor): + """Update tracked submissions that have been changed since last sync. + + This is done by iterating through every page in our database and + comparing our stored latest revision ID with the actual latest revision + ID from an SQL query. If they differ, we will update our information + about the page (self.update_page()). + + If the page does not exist, we will remove it from our database with + self.untrack_page(). + """ + self.logger.debug("Updating tracked submissions") + query1 = "SELECT page_id, page_title, page_modify_oldid FROM page" + query2 = """SELECT page_latest, page_title, page_namespace FROM page + WHERE page_id = ?""" + cursor.execute(query1) + + for pageid, title, oldid in cursor: + result = list(self.site.sql_query(query2, (pageid,))) + if not result: + self.untrack_page(cursor, pageid) + continue + + real_oldid = result[0][0] + if oldid != real_oldid: + msg = u"Updating page [[{0}]] (id: {1}) @ {2}" + self.logger.debug(msg.format(title, pageid, oldid)) + self.logger.debug(" {0} -> {1}".format(oldid, real_oldid)) + base = result[0][1].decode("utf8").replace("_", " ") + ns = self.site.namespace_id_to_name(result[0][2]) + if ns: + real_title = u":".join((ns, base)) + else: + real_title = base + try: + self.update_page(cursor, pageid, real_title) + except Exception: + e = u"Error updating page [[{0}]] (id: {1})" + self.logger.exception(e.format(real_title, pageid)) + + def add_untracked(self, cursor): + """Add pending submissions that are not yet tracked. + + This is done by compiling a list of all currently tracked submissions + and iterating through all members of self.pending_cat via SQL. If a + page in the pending category is not tracked and is not in + self.ignore_list, we will track it with self.track_page(). + """ + self.logger.debug("Adding untracked pending submissions") + cursor.execute("SELECT page_id FROM page") + tracked = [i[0] for i in cursor.fetchall()] + + category = self.site.get_category(self.pending_cat) + for page in category.get_members(): + title, pageid = page.title, page.pageid + if title in self.ignore_list: + continue + if pageid not in tracked: + msg = u"Tracking page [[{0}]] (id: {1})".format(title, pageid) + self.logger.debug(msg) + try: + self.track_page(cursor, pageid, title) + except Exception: + e = u"Error tracking page [[{0}]] (id: {1})" + self.logger.exception(e.format(title, pageid)) + + def delete_old(self, cursor): + """Remove old submissions from the database. + + "Old" is defined as a submission that has been declined or accepted + more than 36 hours ago. Pending submissions cannot be "old". + """ + self.logger.debug("Removing old submissions from chart") + query = """DELETE FROM page, row USING page JOIN row + ON page_id = row_id WHERE row_chart IN (?, ?) + AND ADDTIME(page_special_time, '36:00:00') < NOW()""" + cursor.execute(query, (self.CHART_ACCEPT, self.CHART_DECLINE)) + + def update(self, kwargs): + """Update a page by name, regardless of whether anything has changed. + + Mainly intended as a command to be used via IRC, e.g.: + !tasks start afc_statistics action=update page=Foobar + """ + title = kwargs.get("page") + if not title: + return + + title = title.replace("_", " ").decode("utf8") + query = "SELECT page_id, page_modify_oldid FROM page WHERE page_title = ?" + with self.conn.cursor() as cursor: + cursor.execute(query, (title,)) + try: + pageid, oldid = cursor.fetchall()[0] + except IndexError: + msg = u"Page [[{0}]] not found in database".format(title) + self.logger.error(msg) + + msg = u"Updating page [[{0}]] (id: {1}) @ {2}" + self.logger.info(msg.format(title, pageid, oldid)) + self.update_page(cursor, pageid, title) + + def untrack_page(self, cursor, pageid): + """Remove a page, given by ID, from our database.""" + self.logger.debug("Untracking page (id: {0})".format(pageid)) + query = """DELETE FROM page, row USING page JOIN row + ON page_id = row_id WHERE page_id = ?""" + cursor.execute(query, (pageid,)) + + def track_page(self, cursor, pageid, title): + """Update hook for when page is not in our database. + + A variety of SQL queries are used to gather information about the page, + which is then saved to our database. + """ + content = self.get_content(title) + if content is None: + msg = u"Could not get page content for [[{0}]]".format(title) + self.logger.error(msg) + return + + namespace = self.site.get_page(title).namespace + status, chart = self.get_status_and_chart(content, namespace) + if chart == self.CHART_NONE: + msg = u"Could not find a status for [[{0}]]".format(title) + self.logger.warn(msg) + return + + short = self.get_short_title(title) + size = self.get_size(content) + m_user, m_time, m_id = self.get_modify(pageid) + s_user, s_time, s_id = self.get_special(pageid, chart) + notes = self.get_notes(chart, content, m_time, s_user) + + query1 = "INSERT INTO row VALUES (?, ?)" + query2 = "INSERT INTO page VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" + cursor.execute(query1, (pageid, chart)) + cursor.execute(query2, (pageid, status, title, short, size, notes, + m_user, m_time, m_id, s_user, s_time, s_id)) + + def update_page(self, cursor, pageid, title): + """Update hook for when page is already in our database. + + A variety of SQL queries are used to gather information about the page, + which is compared against our stored information. Differing information + is then updated. + """ + content = self.get_content(title) + if content is None: + msg = u"Could not get page content for [[{0}]]".format(title) + self.logger.error(msg) + return + + namespace = self.site.get_page(title).namespace + status, chart = self.get_status_and_chart(content, namespace) + if chart == self.CHART_NONE: + self.untrack_page(cursor, pageid) + return + + query = "SELECT * FROM page JOIN row ON page_id = row_id WHERE page_id = ?" + with self.conn.cursor(oursql.DictCursor) as dict_cursor: + dict_cursor.execute(query, (pageid,)) + result = dict_cursor.fetchall()[0] + + size = self.get_size(content) + m_user, m_time, m_id = self.get_modify(pageid) + + if title != result["page_title"]: + self.update_page_title(cursor, result, pageid, title) + + if m_id != result["page_modify_oldid"]: + self.update_page_modify(cursor, result, pageid, size, m_user, + m_time, m_id) + + if status != result["page_status"]: + special = self.update_page_status(cursor, result, pageid, status, + chart) + s_user = special[0] + else: + s_user = result["page_special_user"] + + notes = self.get_notes(chart, content, m_time, s_user) + if notes != result["page_notes"]: + self.update_page_notes(cursor, result, pageid, notes) + + def update_page_title(self, cursor, result, pageid, title): + """Update the title and short_title of a page in our database.""" + query = "UPDATE page SET page_title = ?, page_short = ? WHERE page_id = ?" + short = self.get_short_title(title) + cursor.execute(query, (title, short, pageid)) + + msg = u" {0}: title: {1} -> {2}" + self.logger.debug(msg.format(pageid, result["page_title"], title)) + + def update_page_modify(self, cursor, result, pageid, size, m_user, m_time, m_id): + """Update the last modified information of a page in our database.""" + query = """UPDATE page SET page_size = ?, page_modify_user = ?, + page_modify_time = ?, page_modify_oldid = ? + WHERE page_id = ?""" + cursor.execute(query, (size, m_user, m_time, m_id, pageid)) + + msg = u" {0}: modify: {1} / {2} / {3} -> {4} / {5} / {6}" + msg = msg.format(pageid, result["page_modify_user"], + result["page_modify_time"], + result["page_modify_oldid"], m_user, m_time, m_id) + self.logger.debug(msg) + + def update_page_status(self, cursor, result, pageid, status, chart): + """Update the status and "specialed" information of a page.""" + query1 = """UPDATE page JOIN row ON page_id = row_id + SET page_status = ?, row_chart = ? WHERE page_id = ?""" + query2 = """UPDATE page SET page_special_user = ?, + page_special_time = ?, page_special_oldid = ? + WHERE page_id = ?""" + cursor.execute(query1, (status, chart, pageid)) + + msg = " {0}: status: {1} ({2}) -> {3} ({4})" + self.logger.debug(msg.format(pageid, result["page_status"], + result["row_chart"], status, chart)) + + s_user, s_time, s_id = self.get_special(pageid, chart) + if s_id != result["page_special_oldid"]: + cursor.execute(query2, (s_user, s_time, s_id, pageid)) + msg = u"{0}: special: {1} / {2} / {3} -> {4} / {5} / {6}" + msg = msg.format(pageid, result["page_special_user"], + result["page_special_time"], + result["page_special_oldid"], s_user, s_time, s_id) + self.logger.debug(msg) + + return s_user, s_time, s_id + + def update_page_notes(self, cursor, result, pageid, notes): + """Update the notes (or warnings) of a page in our database.""" + query = "UPDATE page SET page_notes = ? WHERE page_id = ?" + cursor.execute(query, (notes, pageid)) + msg = " {0}: notes: {1} -> {2}" + self.logger.debug(msg.format(pageid, result["page_notes"], notes)) + + def get_content(self, title): + """Get the current content of a page by title from the API. + + The page's current revision ID is retrieved from SQL, and then + an API query is made to get its content. This is the only API query + used in the task's code. + """ + query = "SELECT page_latest FROM page WHERE page_title = ? AND page_namespace = ?" + try: + namespace, base = title.split(":", 1) + except ValueError: + base = title + ns = wiki.NS_MAIN + else: + try: + ns = self.site.namespace_name_to_id(namespace) + except exceptions.NamespaceNotFoundError: + base = title + ns = wiki.NS_MAIN + + result = self.site.sql_query(query, (base.replace(" ", "_"), ns)) + try: + revid = int(list(result)[0][0]) + except IndexError: + return None + return self.get_revision_content(revid) + + def get_revision_content(self, revid, tries=1): + """Get the content of a revision by ID from the API.""" + res = self.site.api_query(action="query", prop="revisions", + revids=revid, rvprop="content") + try: + return res["query"]["pages"].values()[0]["revisions"][0]["*"] + except KeyError: + if tries > 0: + sleep(5) + return self.get_revision_content(revid, tries=tries - 1) + + def get_status_and_chart(self, content, namespace): + """Determine the status and chart number of an AFC submission. + + The methodology used here is the same one I've been using for years + (see also commands.afc_report), but with the new draft system taken + into account. The order here is important: if there is more than one + {{AFC submission}} template on a page, we need to know which one to + use (revision history search to find the most recent isn't a viable + idea :P). + """ + statuses = self.get_statuses(content) + + if "R" in statuses: + status, chart = "r", self.CHART_REVIEW + elif "H" in statuses: + status, chart = "p", self.CHART_DRAFT + elif "P" in statuses: + status, chart = "p", self.CHART_PEND + elif "T" in statuses: + status, chart = None, self.CHART_NONE + elif "D" in statuses: + status, chart = "d", self.CHART_DECLINE + else: + status, chart = None, self.CHART_NONE + + if namespace == wiki.NS_MAIN: + if not statuses: + status, chart = "a", self.CHART_ACCEPT + else: + status, chart = None, self.CHART_MISPLACE + + return status, chart + + def get_statuses(self, content): + """Return a list of all AFC submission statuses in a page's text.""" + re_has_templates = "\{\{[aA][fF][cC] submission\s*(\}\}|\||/)" + re_template = "\{\{[aA][fF][cC] submission\s*(.*?)\}\}" + re_remove_embed = "(\{\{[aA][fF][cC] submission\s*(.*?))\{\{(.*?)\}\}(.*?)\}\}" + valid = ["R", "H", "P", "T", "D"] + subtemps = { + "/reviewing": "R", + "/onhold": "H", + "/pending": "P", + "/draft": "T", + "/declined": "D" + } + statuses = [] + + while re.search(re_has_templates, content): + status = "P" + match = re.search(re_template, content, re.S) + if not match: + return statuses + temp = match.group(1) + limit = 0 + while "{{" in temp and limit < 50: + content = re.sub(re_remove_embed, "\\1\\4}}", content, 1, re.S) + match = re.search(re_template, content, re.S) + temp = match.group(1) + limit += 1 + params = temp.split("|") + try: + subtemp, params = params[0].strip(), params[1:] + except IndexError: + status = "P" + params = [] + else: + if subtemp: + status = subtemps.get(subtemp) + params = [] + for param in params: + param = param.strip().upper() + if "=" in param: + key, value = param.split("=", 1) + if key.strip() == "1": + status = value if value in valid else "P" + break + else: + status = param if param in valid else "P" + break + statuses.append(status) + content = re.sub(re_template, "", content, 1, re.S) + + return statuses + + def get_short_title(self, title): + """Shorten a title so we can display it in a chart using less space. + + Basically, this just means removing the "Wikipedia talk:Articles for + creation" part from the beginning. If it is longer than 50 characters, + we'll shorten it down to 47 and add an poor-man's ellipsis at the end. + """ + short = re.sub("Wikipedia(\s*talk)?\:Articles\sfor\screation\/", "", title) + if len(short) > 50: + short = short[:47] + "..." + return short + + def get_size(self, content): + """Return a page's size in a short, pretty format.""" + return "{0} kB".format(round(len(content) / 1000.0, 1)) + + def get_modify(self, pageid): + """Return information about a page's last edit ("modification"). + + This consists of the most recent editor, modification time, and the + lastest revision ID. + """ + query = """SELECT rev_user_text, rev_timestamp, rev_id FROM revision + JOIN page ON rev_id = page_latest WHERE page_id = ?""" + result = self.site.sql_query(query, (pageid,)) + m_user, m_time, m_id = list(result)[0] + timestamp = datetime.strptime(m_time, "%Y%m%d%H%M%S") + return m_user.decode("utf8"), timestamp, m_id + + def get_special(self, pageid, chart): + """Return information about a page's "special" edit. + + I tend to use the term "special" as a verb a lot, which is bound to + cause confusion. It is merely a short way of saying "the edit in which + a declined submission was declined, an accepted submission was + accepted, a submission in review was set as such, a pending submission + was submitted, and a "misplaced" submission was created." + + This "information" consists of the special edit's editor, its time, and + its revision ID. If the page's status is not something that involves + "special"-ing, we will return None for all three. The same will be + returned if we cannot determine when the page was "special"-ed, or if + it was "special"-ed more than 100 edits ago. + """ + if chart == self.CHART_NONE: + return None, None, None + elif chart == self.CHART_MISPLACE: + return self.get_create(pageid) + elif chart == self.CHART_ACCEPT: + search_for = None + search_not = ["R", "H", "P", "T", "D"] + elif chart == self.CHART_DRAFT: + search_for = "H" + search_not = [] + elif chart == self.CHART_PEND: + search_for = "P" + search_not = [] + elif chart == self.CHART_REVIEW: + search_for = "R" + search_not = [] + elif chart == self.CHART_DECLINE: + search_for = "D" + search_not = ["R", "H", "P", "T"] + + query = """SELECT rev_user_text, rev_timestamp, rev_id + FROM revision WHERE rev_page = ? ORDER BY rev_id DESC""" + result = self.site.sql_query(query, (pageid,)) + + counter = 0 + last = (None, None, None) + for user, ts, revid in result: + counter += 1 + if counter > 50: + msg = "Exceeded 50 content lookups while determining special for page (id: {0}, chart: {1})" + self.logger.warn(msg.format(pageid, chart)) + return None, None, None + try: + content = self.get_revision_content(revid) + except exceptions.APIError: + msg = "API error interrupted SQL query in get_special() for page (id: {0}, chart: {1})" + self.logger.exception(msg.format(pageid, chart)) + return None, None, None + statuses = self.get_statuses(content) + matches = [s in statuses for s in search_not] + if search_for: + if search_for not in statuses or any(matches): + return last + else: + if any(matches): + return last + timestamp = datetime.strptime(ts, "%Y%m%d%H%M%S") + last = (user.decode("utf8"), timestamp, revid) + + return last + + def get_create(self, pageid): + """Return information about a page's first edit ("creation"). + + This consists of the page creator, creation time, and the earliest + revision ID. + """ + query = """SELECT rev_user_text, rev_timestamp, rev_id + FROM revision WHERE rev_id = + (SELECT MIN(rev_id) FROM revision WHERE rev_page = ?)""" + result = self.site.sql_query(query, (pageid,)) + c_user, c_time, c_id = list(result)[0] + timestamp = datetime.strptime(c_time, "%Y%m%d%H%M%S") + return c_user.decode("utf8"), timestamp, c_id + + def get_notes(self, chart, content, m_time, s_user): + """Return any special notes or warnings about this page. + + copyvio: submission is a suspected copyright violation + unsourced: submission lacks references completely + no-inline: submission has no inline citations + short: submission is less than a kilobyte in length + resubmit: submission was resubmitted after a previous decline + old: submission has not been touched in > 4 days + blocked: submitter is currently blocked + """ + notes = "" + + ignored_charts = [self.CHART_NONE, self.CHART_ACCEPT, self.CHART_DECLINE] + if chart in ignored_charts: + return notes + + copyvios = self.config.tasks.get("afc_copyvios", {}) + regex = "\{\{\s*" + copyvios.get("template", "AfC suspected copyvio") + if re.search(regex, content): + notes += "|nc=1" # Submission is a suspected copyvio + + if not re.search("\(.*?)\", content, re.I | re.S): + regex = "(https?:)|\[//(?!{0})([^ \]\\t\\n\\r\\f\\v]+?)" + sitedomain = re.escape(self.site.domain) + if re.search(regex.format(sitedomain), content, re.I | re.S): + notes += "|ni=1" # Submission has no inline citations + else: + notes += "|nu=1" # Submission is completely unsourced + + if len(content) < 1000: + notes += "|ns=1" # Submission is short + + statuses = self.get_statuses(content) + if "D" in statuses and chart != self.CHART_MISPLACE: + notes += "|nr=1" # Submission was resubmitted + + time_since_modify = (datetime.utcnow() - m_time).total_seconds() + max_time = 4 * 24 * 60 * 60 + if time_since_modify > max_time: + notes += "|no=1" # Submission hasn't been touched in over 4 days + + if chart in [self.CHART_PEND, self.CHART_DRAFT] and s_user: + submitter = self.site.get_user(s_user) + try: + if submitter.blockinfo: + notes += "|nb=1" # Submitter is blocked + except exceptions.UserNotFoundError: # Likely an IP + pass + + return notes diff --git a/tasks/afc_undated.py b/tasks/afc_undated.py new file mode 100644 index 0000000..e897bd4 --- /dev/null +++ b/tasks/afc_undated.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from earwigbot.tasks import Task + +class AFCUndated(Task): + """A task to clear [[Category:Undated AfC submissions]].""" + name = "afc_undated" + + def setup(self): + pass + + def run(self, **kwargs): + pass diff --git a/tasks/blp_tag.py b/tasks/blp_tag.py new file mode 100644 index 0000000..3c5d6d1 --- /dev/null +++ b/tasks/blp_tag.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from earwigbot.tasks import Task + +class BLPTag(Task): + """A task to add |blp=yes to ``{{WPB}}`` or ``{{WPBS}}`` when it is used + along with ``{{WP Biography}}``.""" + name = "blp_tag" + + def setup(self): + pass + + def run(self, **kwargs): + pass diff --git a/tasks/drn_clerkbot.py b/tasks/drn_clerkbot.py new file mode 100644 index 0000000..d2a4204 --- /dev/null +++ b/tasks/drn_clerkbot.py @@ -0,0 +1,787 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from datetime import datetime +from os.path import expanduser +import re +from threading import RLock +from time import mktime, sleep, time + +import oursql + +from earwigbot import exceptions +from earwigbot.tasks import Task +from earwigbot.wiki import constants + +class DRNClerkBot(Task): + """A task to clerk for [[WP:DRN]].""" + name = "drn_clerkbot" + number = 19 + + # Case status: + STATUS_UNKNOWN = 0 + STATUS_NEW = 1 + STATUS_OPEN = 2 + STATUS_STALE = 3 + STATUS_NEEDASSIST = 4 + STATUS_REVIEW = 5 + STATUS_RESOLVED = 6 + STATUS_CLOSED = 7 + + ALIASES = { + STATUS_NEW: ("",), + STATUS_OPEN: ("open", "active", "inprogress"), + STATUS_STALE: ("stale",), + STATUS_NEEDASSIST: ("needassist", "relist", "relisted"), + STATUS_REVIEW: ("review",), + STATUS_RESOLVED: ("resolved", "resolve"), + STATUS_CLOSED: ("closed", "close"), + } + + def setup(self): + """Hook called immediately after the task is loaded.""" + cfg = self.config.tasks.get(self.name, {}) + + # Set some wiki-related attributes: + self.title = cfg.get("title", + "Wikipedia:Dispute resolution noticeboard") + self.chart_title = cfg.get("chartTitle", "Template:DRN case status") + self.volunteer_title = cfg.get("volunteers", + "Wikipedia:Dispute resolution noticeboard/Volunteering") + self.very_old_title = cfg.get("veryOldTitle", "User talk:Szhang (WMF)") + + clerk_summary = "Updating $3 case$4." + notify_summary = "Notifying user regarding [[WP:DRN|dispute resolution noticeboard]] case." + chart_summary = "Updating statistics for the [[WP:DRN|dispute resolution noticeboard]]." + self.clerk_summary = self.make_summary(cfg.get("clerkSummary", clerk_summary)) + self.notify_summary = self.make_summary(cfg.get("notifySummary", notify_summary)) + self.chart_summary = self.make_summary(cfg.get("chartSummary", chart_summary)) + + # Templates used: + templates = cfg.get("templates", {}) + self.tl_status = templates.get("status", "DR case status") + self.tl_notify_party = templates.get("notifyParty", "DRN-notice") + self.tl_notify_stale = templates.get("notifyStale", "DRN stale notice") + self.tl_archive_top = templates.get("archiveTop", "DRN archive top") + self.tl_archive_bottom = templates.get("archiveBottom", + "DRN archive bottom") + self.tl_chart_header = templates.get("chartHeader", + "DRN case status/header") + self.tl_chart_row = templates.get("chartRow", "DRN case status/row") + self.tl_chart_footer = templates.get("chartFooter", + "DRN case status/footer") + + # Connection data for our SQL database: + kwargs = cfg.get("sql", {}) + kwargs["read_default_file"] = expanduser("~/.my.cnf") + self.conn_data = kwargs + self.db_access_lock = RLock() + + # Minimum size a MySQL TIMESTAMP field can hold: + self.min_ts = datetime(1970, 1, 1, 0, 0, 1) + + def run(self, **kwargs): + """Entry point for a task event.""" + if not self.db_access_lock.acquire(False): # Non-blocking + self.logger.info("A job is already ongoing; aborting") + return + action = kwargs.get("action", "all") + try: + start = time() + conn = oursql.connect(**self.conn_data) + site = self.bot.wiki.get_site() + if action in ["all", "update_volunteers"]: + self.update_volunteers(conn, site) + if action in ["all", "clerk"]: + log = u"Starting update to [[{0}]]".format(self.title) + self.logger.info(log) + cases = self.read_database(conn) + page = site.get_page(self.title) + text = page.get() + self.read_page(conn, cases, text) + notices = self.clerk(conn, cases) + if self.shutoff_enabled(): + return + if not self.save(page, cases, kwargs, start): + return + self.send_notices(site, notices) + if action in ["all", "update_chart"]: + if self.shutoff_enabled(): + return + self.update_chart(conn, site) + if action in ["all", "purge"]: + self.purge_old_data(conn) + finally: + self.db_access_lock.release() + + def update_volunteers(self, conn, site): + """Updates and stores the list of dispute resolution volunteers.""" + log = u"Updating volunteer list from [[{0}]]" + self.logger.info(log.format(self.volunteer_title)) + page = site.get_page(self.volunteer_title) + try: + text = page.get() + except exceptions.PageNotFoundError: + text = "" + marker = "" + if marker not in text: + log = u"The marker ({0}) wasn't found in the volunteer list at [[{1}]]!" + self.logger.error(log.format(marker, page.title)) + return + text = text.split(marker)[1] + additions = set() + for line in text.splitlines(): + user = re.search("\# \{\{User\|(.+?)\}\}", line) + if user: + uname = user.group(1).replace("_", " ").strip() + additions.add((uname[0].upper() + uname[1:],)) + + removals = set() + query1 = "SELECT volunteer_username FROM volunteers" + query2 = "DELETE FROM volunteers WHERE volunteer_username = ?" + query3 = "INSERT INTO volunteers (volunteer_username) VALUES (?)" + with conn.cursor() as cursor: + cursor.execute(query1) + for row in cursor: + if row in additions: + additions.remove(row) + else: + removals.add(row) + if removals: + cursor.executemany(query2, removals) + if additions: + cursor.executemany(query3, additions) + + def read_database(self, conn): + """Return a list of _Cases from the database.""" + cases = [] + query = "SELECT * FROM cases" + with conn.cursor() as cursor: + cursor.execute(query) + for row in cursor: + case = _Case(*row) + cases.append(case) + log = "Read {0} cases from the database" + self.logger.debug(log.format(len(cases))) + return cases + + def read_page(self, conn, cases, text): + """Read the noticeboard content and update the list of _Cases.""" + nextid = self.select_next_id(conn) + tl_status_esc = re.escape(self.tl_status) + split = re.split("(^==\s*[^=]+?\s*==$)", text, flags=re.M|re.U) + for i in xrange(len(split)): + if i + 1 == len(split): + break + if not split[i].startswith("=="): + continue + title = split[i][2:-2].strip() + body = old = split[i + 1] + if not re.search("\s*\{\{" + tl_status_esc, body, re.U): + continue + status = self.read_status(body) + re_id = "" + try: + id_ = int(re.search(re_id, body).group(1)) + case = [case for case in cases if case.id == id_][0] + except (AttributeError, IndexError, ValueError): + id_ = nextid + nextid += 1 + re_id2 = "(\{\{" + tl_status_esc + re_id2 += r"(.*?)\}\})()?" + repl = ur"\1 " + body = re.sub(re_id2, repl.format(id_), body) + re_f = r"\{\{drn filing editor\|(.*?)\|" + re_f += r"(\d{2}:\d{2},\s\d{1,2}\s\w+\s\d{4}\s\(UTC\))\}\}" + match = re.search(re_f, body, re.U) + if match: + f_user = match.group(1).split("/", 1)[0].replace("_", " ") + f_user = f_user[0].upper() + f_user[1:] + strp = "%H:%M, %d %B %Y (UTC)" + f_time = datetime.strptime(match.group(2), strp) + else: + f_user, f_time = None, datetime.utcnow() + case = _Case(id_, title, status, self.STATUS_UNKNOWN, f_user, + f_time, f_user, f_time, "", self.min_ts, + self.min_ts, False, False, False, len(body), + new=True) + cases.append(case) + log = u"Added new case {0} ('{1}', status={2}, by {3})" + self.logger.debug(log.format(id_, title, status, f_user)) + else: + case.status = status + log = u"Read active case {0} ('{1}')".format(id_, title) + self.logger.debug(log) + if case.title != title: + self.update_case_title(conn, id_, title) + case.title = title + case.body, case.old = body, old + + for case in cases[:]: + if case.body is None: + if case.original_status == self.STATUS_UNKNOWN: + cases.remove(case) # Ignore archived case + else: + case.status = self.STATUS_UNKNOWN + log = u"Dropped case {0} because it is no longer on the page ('{1}')" + self.logger.debug(log.format(case.id, case.title)) + + self.logger.debug("Done reading cases from the noticeboard page") + + def select_next_id(self, conn): + """Return the next incremental ID for a case.""" + query = "SELECT MAX(case_id) FROM cases" + with conn.cursor() as cursor: + cursor.execute(query) + current = cursor.fetchone()[0] + if current: + return int(current) + 1 + return 1 + + def read_status(self, body): + """Parse the current status from a case body.""" + templ = re.escape(self.tl_status) + status = re.search("\{\{" + templ + "\|?(.*?)\}\}", body, re.S|re.U) + if not status: + return self.STATUS_NEW + for option, names in self.ALIASES.iteritems(): + if status.group(1).lower() in names: + return option + return self.STATUS_NEW + + def update_case_title(self, conn, id_, title): + """Update a case title in the database.""" + query = "UPDATE cases SET case_title = ? WHERE case_id = ?" + with conn.cursor() as cursor: + cursor.execute(query, (title, id_)) + log = u"Updated title of case {0} to '{1}'".format(id_, title) + self.logger.debug(log) + + def clerk(self, conn, cases): + """Actually go through cases and modify those to be updated.""" + query = "SELECT volunteer_username FROM volunteers" + with conn.cursor() as cursor: + cursor.execute(query) + volunteers = [name for (name,) in cursor.fetchall()] + notices = [] + for case in cases: + log = u"Clerking case {0} ('{1}')".format(case.id, case.title) + self.logger.debug(log) + if case.status == self.STATUS_UNKNOWN: + self.save_existing_case(conn, case) + else: + notices += self.clerk_case(conn, case, volunteers) + self.logger.debug("Done clerking cases") + return notices + + def clerk_case(self, conn, case, volunteers): + """Clerk a particular case and return a list of any notices to send.""" + notices = [] + signatures = self.read_signatures(case.body) + storedsigs = self.get_signatures_from_db(conn, case) + newsigs = set(signatures) - set(storedsigs) + if any([editor in volunteers for (editor, timestamp) in newsigs]): + case.last_volunteer_size = len(case.body) + + if case.status == self.STATUS_NEW: + notices = self.clerk_new_case(case, volunteers, signatures) + elif case.status == self.STATUS_OPEN: + notices = self.clerk_open_case(case, signatures) + elif case.status == self.STATUS_NEEDASSIST: + notices = self.clerk_needassist_case(case, volunteers, newsigs) + elif case.status == self.STATUS_STALE: + notices = self.clerk_stale_case(case, newsigs) + elif case.status == self.STATUS_REVIEW: + notices = self.clerk_review_case(case) + elif case.status in [self.STATUS_RESOLVED, self.STATUS_CLOSED]: + self.clerk_closed_case(case, signatures) + self.save_case_updates(conn, case, volunteers, signatures, storedsigs) + return notices + + def clerk_new_case(self, case, volunteers, signatures): + """Clerk a case in the "brand new" state. + + The case will be set to "open" if a volunteer edits it, or "needassist" + if it increases by over 15,000 bytes or goes by without any volunteer + edits for two days. + """ + notices = self.notify_parties(case) + if any([editor in volunteers for (editor, timestamp) in signatures]): + self.update_status(case, self.STATUS_OPEN) + else: + age = (datetime.utcnow() - case.file_time).total_seconds() + if age > 60 * 60 * 24 * 2: + self.update_status(case, self.STATUS_NEEDASSIST) + elif len(case.body) - case.last_volunteer_size > 15000: + self.update_status(case, self.STATUS_NEEDASSIST) + return notices + + def clerk_open_case(self, case, signatures): + """Clerk an open case (has been edited by a reviewer). + + The case will be set to "needassist" if 15,000 bytes have been added + since a volunteer last edited, "stale" if no edits have occured in two + days, or "review" if it has been open for over four days. + """ + if self.check_for_review(case): + return [] + if len(case.body) - case.last_volunteer_size > 15000: + self.update_status(case, self.STATUS_NEEDASSIST) + timestamps = [timestamp for (editor, timestamp) in signatures] + if timestamps: + age = (datetime.utcnow() - max(timestamps)).total_seconds() + if age > 60 * 60 * 24 * 2: + self.update_status(case, self.STATUS_STALE) + return [] + + def clerk_needassist_case(self, case, volunteers, newsigs): + """Clerk a "needassist" case (no volunteer edits in 15,000 bytes). + + The case will be set to "open" if a volunteer edits, or "review" if it + has been open for over four days. + """ + if self.check_for_review(case): + return [] + if any([editor in volunteers for (editor, timestamp) in newsigs]): + self.update_status(case, self.STATUS_OPEN) + return [] + + def clerk_stale_case(self, case, newsigs): + """Clerk a stale case (no edits in two days). + + The case will be set to "open" if anyone edits, or "review" if it has + been open for over four days. + """ + if self.check_for_review(case): + return [] + if newsigs: + self.update_status(case, self.STATUS_OPEN) + return [] + + def clerk_review_case(self, case): + """Clerk a "review" case (open for more than four days). + + A message will be set to the "very old notifiee", which is generally + [[User talk:Szhang (WMF)]], if the case has been open for more than + five days. + """ + age = (datetime.utcnow() - case.file_time).total_seconds() + if age > 60 * 60 * 24 * 5: + if not case.very_old_notified: + tmpl = self.tl_notify_stale + title = case.title.replace("|", "|") + template = "{{subst:" + tmpl + "|" + title + "}}" + miss = "".format(title) + notice = _Notice(self.very_old_title, template, miss) + case.very_old_notified = True + msg = u" {0}: will notify [[{1}]] with '{2}'" + log = msg.format(case.id, self.very_old_title, template) + self.logger.debug(log) + return [notice] + return [] + + def clerk_closed_case(self, case, signatures): + """Clerk a closed or resolved case. + + The case will be archived if it has been closed/resolved for more than + one day and no edits have been made in the meantime. "Archiving" is + the process of adding {{DRN archive top}}, {{DRN archive bottom}}, and + removing the [[User:DoNotArchiveUntil]] comment. + """ + if case.close_time == self.min_ts: + case.close_time = datetime.utcnow() + if case.archived: + return + timestamps = [timestamp for (editor, timestamp) in signatures] + closed_age = (datetime.utcnow() - case.close_time).total_seconds() + if timestamps: + modify_age = (datetime.utcnow() - max(timestamps)).total_seconds() + else: + modify_age = 0 + if closed_age > 60 * 60 * 24 and modify_age > 60 * 60 * 24: + arch_top = self.tl_archive_top + arch_bottom = self.tl_archive_bottom + reg = "()?" + if re.search(reg, case.body): + case.body = re.sub("\{\{" + arch_top + "\}\}", "", case.body) + case.body = re.sub(reg, "{{" + arch_top + "}}", case.body) + if not re.search(arch_bottom + "\s*\}\}\s*\Z", case.body): + case.body += "\n{{" + arch_bottom + "}}" + case.archived = True + self.logger.debug(u" {0}: archived case".format(case.id)) + + def check_for_review(self, case): + """Check whether a case is old enough to be set to "review".""" + age = (datetime.utcnow() - case.file_time).total_seconds() + if age > 60 * 60 * 24 * 4: + self.update_status(case, self.STATUS_REVIEW) + return True + return False + + def update_status(self, case, new): + """Safely update the status of a case, so we don't edit war.""" + old_n = self.ALIASES[case.status][0].upper() + new_n = self.ALIASES[new][0].upper() + old_n = "NEW" if not old_n else old_n + new_n = "NEW" if not new_n else new_n + if case.last_action != new: + case.status = new + log = u" {0}: {1} -> {2}" + self.logger.debug(log.format(case.id, old_n, new_n)) + return + log = u"Avoiding {0} {1} -> {2} because we already did this ('{3}')" + self.logger.info(log.format(case.id, old_n, new_n, case.title)) + + def read_signatures(self, text): + """Return a list of all parseable signatures in the body of a case. + + Signatures are returned as tuples of (editor, timestamp as datetime). + """ + regex = r"\[\[(?:User(?:\stalk)?\:|Special\:Contributions\/)" + regex += r"([^\n\[\]|]{,256}?)(?:\||\]\])" + regex += r"(?!.*?(?:User(?:\stalk)?\:|Special\:Contributions\/).*?)" + regex += r".{,256}?(\d{2}:\d{2},\s\d{1,2}\s\w+\s\d{4}\s\(UTC\))" + matches = re.findall(regex, text, re.U|re.I) + signatures = [] + for userlink, stamp in matches: + username = userlink.split("/", 1)[0].replace("_", " ").strip() + username = username[0].upper() + username[1:] + if username == "DoNotArchiveUntil": + continue + stamp = stamp.strip() + timestamp = datetime.strptime(stamp, "%H:%M, %d %B %Y (UTC)") + signatures.append((username, timestamp)) + return signatures + + def get_signatures_from_db(self, conn, case): + """Return a list of signatures in a case from the database. + + The return type is the same as read_signatures(). + """ + query = "SELECT signature_username, signature_timestamp FROM signatures WHERE signature_case = ?" + with conn.cursor() as cursor: + cursor.execute(query, (case.id,)) + return cursor.fetchall() + + def notify_parties(self, case): + """Schedule notices to be sent to all parties of a case.""" + if case.parties_notified: + return [] + + notices = [] + template = "{{subst:" + self.tl_notify_party + template += "|thread=" + case.title + "}} ~~~~" + too_late = "" + + re_parties = "'''Users involved'''(.*?)" + text = re.search(re_parties, case.body, re.S|re.U) + for line in text.group(1).splitlines(): + user = re.search("[:*#]{,5} \{\{User\|(.*?)\}\}", line) + if user: + party = user.group(1).replace("_", " ").strip() + if party: + party = party[0].upper() + party[1:] + if party == case.file_user: + continue + notice = _Notice("User talk:" + party, template, too_late) + notices.append(notice) + + case.parties_notified = True + log = u" {0}: will try to notify {1} parties with '{2}'" + self.logger.debug(log.format(case.id, len(notices), template)) + return notices + + def save_case_updates(self, conn, case, volunteers, sigs, storedsigs): + """Save any updates made to a case and signatures in the database.""" + if case.status != case.original_status: + case.last_action = case.status + new = self.ALIASES[case.status][0] + tl_status_esc = re.escape(self.tl_status) + search = "\{\{" + tl_status_esc + "(\|?.*?)\}\}" + repl = "{{" + self.tl_status + "|" + new + "}}" + case.body = re.sub(search, repl, case.body) + + if sigs: + newest_ts = max([stamp for (user, stamp) in sigs]) + newest_user = [usr for (usr, stamp) in sigs if stamp == newest_ts][0] + case.modify_time = newest_ts + case.modify_user = newest_user + + if any([usr in volunteers for (usr, stamp) in sigs]): + newest_vts = max([stamp for (usr, stamp) in sigs if usr in volunteers]) + newest_vuser = [usr for (usr, stamp) in sigs if stamp == newest_vts][0] + case.volunteer_time = newest_vts + case.volunteer_user = newest_vuser + + if case.new: + self.save_new_case(conn, case) + else: + self.save_existing_case(conn, case) + + with conn.cursor() as cursor: + query1 = "DELETE FROM signatures WHERE signature_case = ? AND signature_username = ? AND signature_timestamp = ?" + query2 = "INSERT INTO signatures (signature_case, signature_username, signature_timestamp) VALUES (?, ?, ?)" + removals = set(storedsigs) - set(sigs) + additions = set(sigs) - set(storedsigs) + if removals: + args = [(case.id, name, stamp) for (name, stamp) in removals] + cursor.executemany(query1, args) + if additions: + args = [] + for name, stamp in additions: + args.append((case.id, name, stamp)) + cursor.executemany(query2, args) + msg = u" {0}: added {1} signatures and removed {2}" + log = msg.format(case.id, len(additions), len(removals)) + self.logger.debug(log) + + def save_new_case(self, conn, case): + """Save a brand new case to the database.""" + args = (case.id, case.title, case.status, case.last_action, + case.file_user, case.file_time, case.modify_user, + case.modify_time, case.volunteer_user, case.volunteer_time, + case.close_time, case.parties_notified, + case.very_old_notified, case.archived, + case.last_volunteer_size) + with conn.cursor() as cursor: + query = "INSERT INTO cases VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" + cursor.execute(query, args) + log = u" {0}: inserted new case into database".format(case.id) + self.logger.debug(log) + + def save_existing_case(self, conn, case): + """Save an existing case to the database, updating as necessary.""" + with conn.cursor(oursql.DictCursor) as cursor: + query = "SELECT * FROM cases WHERE case_id = ?" + cursor.execute(query, (case.id,)) + stored = cursor.fetchone() + + with conn.cursor() as cursor: + changes, args = [], [] + fields_to_check = [ + ("case_status", case.status), + ("case_last_action", case.last_action), + ("case_file_user", case.file_user), + ("case_file_time", case.file_time), + ("case_modify_user", case.modify_user), + ("case_modify_time", case.modify_time), + ("case_volunteer_user", case.volunteer_user), + ("case_volunteer_time", case.volunteer_time), + ("case_close_time", case.close_time), + ("case_parties_notified", case.parties_notified), + ("case_very_old_notified", case.very_old_notified), + ("case_archived", case.archived), + ("case_last_volunteer_size", case.last_volunteer_size) + ] + for column, data in fields_to_check: + if data != stored[column]: + changes.append(column + " = ?") + args.append(data) + msg = u" {0}: will alter {1} ('{2}' -> '{3}')" + log = msg.format(case.id, column, stored[column], data) + self.logger.debug(log) + if changes: + changes = ", ".join(changes) + args.append(case.id) + query = "UPDATE cases SET {0} WHERE case_id = ?".format(changes) + cursor.execute(query, args) + else: + log = u" {0}: no changes to commit".format(case.id) + self.logger.debug(log) + + def save(self, page, cases, kwargs, start): + """Save any changes to the noticeboard.""" + newtext = text = page.get() + counter = 0 + for case in cases: + if case.old != case.body: + newtext = newtext.replace(case.old, case.body) + counter += 1 + if newtext == text: + self.logger.info(u"Nothing to edit on [[{0}]]".format(page.title)) + return True + + worktime = time() - start + if worktime < 60: + log = "Waiting {0} seconds to avoid edit conflicts" + self.logger.debug(log.format(int(60 - worktime))) + sleep(60 - worktime) + page.reload() + if page.get() != text: + log = "Someone has edited the page while we were working; restarting" + self.logger.warn(log) + self.run(**kwargs) + return False + summary = self.clerk_summary.replace("$3", str(counter)) + summary = summary.replace("$4", "" if counter == 1 else "s") + page.edit(newtext, summary, minor=True, bot=True) + log = u"Saved page [[{0}]] ({1} updates)" + self.logger.info(log.format(page.title, counter)) + return True + + def send_notices(self, site, notices): + """Send out any templated notices to users or pages.""" + if not notices: + self.logger.info("No notices to send") + return + for notice in notices: + target, template = notice.target, notice.template + log = u"Trying to notify [[{0}]] with '{1}'" + self.logger.debug(log.format(target, template)) + page = site.get_page(target) + if page.namespace == constants.NS_USER_TALK: + user = site.get_user(target.split(":", 1)[1]) + if not user.exists and not user.is_ip: + log = u"Skipping [[{0}]]; user does not exist and is not an IP" + self.logger.info(log.format(target)) + continue + try: + text = page.get() + except exceptions.PageNotFoundError: + text = "" + if notice.too_late and notice.too_late in text: + log = u"Skipping [[{0}]]; was already notified with '{1}'" + self.logger.info(log.format(page.title, template)) + continue + text += ("\n" if text else "") + template + try: + page.edit(text, self.notify_summary, minor=False, bot=True) + except exceptions.EditError as error: + name, msg = type(error).name, error.message + log = u"Couldn't leave notice on [[{0}]] because of {1}: {2}" + self.logger.error(log.format(page.title, name, msg)) + else: + log = u"Notified [[{0}]] with '{1}'" + self.logger.info(log.format(page.title, template)) + + self.logger.debug("Done sending notices") + + def update_chart(self, conn, site): + """Update the chart of open or recently closed cases.""" + page = site.get_page(self.chart_title) + self.logger.info(u"Updating case status at [[{0}]]".format(page.title)) + statuses = self.compile_chart(conn) + text = page.get() + newtext = re.sub(u"(.*?)", + "\n" + statuses + "\n", + text, flags=re.DOTALL) + if newtext == text: + self.logger.info("Chart unchanged; not saving") + return + + newtext = re.sub("(.*?)", + "~~~ at ~~~~~", + newtext) + page.edit(newtext, self.chart_summary, minor=True, bot=True) + self.logger.info(u"Chart saved to [[{0}]]".format(page.title)) + + def compile_chart(self, conn): + """Actually generate the chart from the database.""" + chart = "{{" + self.tl_chart_header + "|small={{{small|}}}}}\n" + query = "SELECT * FROM cases WHERE case_status != ?" + with conn.cursor(oursql.DictCursor) as cursor: + cursor.execute(query, (self.STATUS_UNKNOWN,)) + for case in cursor: + chart += self.compile_row(case) + chart += "{{" + self.tl_chart_footer + "|small={{{small|}}}}}" + return chart + + def compile_row(self, case): + """Generate a single row of the chart from a dict via the database.""" + data = u"|t={case_title}|d={title}|s={case_status}" + data += "|cu={case_file_user}|cs={file_sortkey}|ct={file_time}" + if case["case_volunteer_user"]: + data += "|vu={case_volunteer_user}|vs={volunteer_sortkey}|vt={volunteer_time}" + case["volunteer_time"] = self.format_time(case["case_volunteer_time"]) + case["volunteer_sortkey"] = int(mktime(case["case_volunteer_time"].timetuple())) + data += "|mu={case_modify_user}|ms={modify_sortkey}|mt={modify_time}" + + title = case["case_title"].replace("_", " ").replace("|", "|") + case["title"] = title[:47] + "..." if len(title) > 50 else title + case["file_time"] = self.format_time(case["case_file_time"]) + case["file_sortkey"] = int(mktime(case["case_file_time"].timetuple())) + case["modify_time"] = self.format_time(case["case_modify_time"]) + case["modify_sortkey"] = int(mktime(case["case_modify_time"].timetuple())) + row = "{{" + self.tl_chart_row + data.format(**case) + return row + "|sm={{{small|}}}}}\n" + + def format_time(self, dt): + """Return a string telling the time since datetime occured.""" + parts = [("year", 31536000), ("day", 86400), ("hour", 3600)] + seconds = int((datetime.utcnow() - dt).total_seconds()) + msg = [] + for name, size in parts: + num = seconds // size + seconds -= num * size + if num: + chunk = "{0} {1}".format(num, name if num == 1 else name + "s") + msg.append(chunk) + return ", ".join(msg) + " ago" if msg else "0 hours ago" + + def purge_old_data(self, conn): + """Delete old cases (> six months) from the database.""" + log = "Purging closed cases older than six months from the database" + self.logger.info(log) + query = """DELETE cases, signatures + FROM cases JOIN signatures ON case_id = signature_case + WHERE case_status = ? + AND case_file_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 180 DAY) + AND case_modify_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 180 DAY) + """ + with conn.cursor() as cursor: + cursor.execute(query, (self.STATUS_UNKNOWN,)) + + +class _Case(object): + """A object representing a dispute resolution case.""" + def __init__(self, id_, title, status, last_action, file_user, file_time, + modify_user, modify_time, volunteer_user, volunteer_time, + close_time, parties_notified, archived, very_old_notified, + last_volunteer_size, new=False): + self.id = id_ + self.title = title + self.status = status + self.last_action = last_action + self.file_user = file_user + self.file_time = file_time + self.modify_user = modify_user + self.modify_time = modify_time + self.volunteer_user = volunteer_user + self.volunteer_time = volunteer_time + self.close_time = close_time + self.parties_notified = parties_notified + self.very_old_notified = very_old_notified + self.archived = archived + self.last_volunteer_size = last_volunteer_size + self.new = new + + self.original_status = status + self.body = None + self.old = None + + +class _Notice(object): + """An object representing a notice to be sent to a user or a page.""" + def __init__(self, target, template, too_late=None): + self.target = target + self.template = template + self.too_late = too_late diff --git a/tasks/image_display_resize.py b/tasks/image_display_resize.py new file mode 100644 index 0000000..18b622c --- /dev/null +++ b/tasks/image_display_resize.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from earwigbot.tasks import Task + +class ImageDisplayResize(Task): + """A task to resize upscaled portraits in infoboxes.""" + name = "image_display_resize" + + def setup(self): + pass + + def run(self, **kwargs): + pass diff --git a/tasks/schema/afc_copyvios.sql b/tasks/schema/afc_copyvios.sql new file mode 100644 index 0000000..37a6729 --- /dev/null +++ b/tasks/schema/afc_copyvios.sql @@ -0,0 +1,36 @@ +-- MySQL dump 10.13 Distrib 5.5.12, for solaris10 (i386) +-- +-- Host: sql Database: u_earwig_afc_copyvios +-- ------------------------------------------------------ +-- Server version 5.1.59 + +CREATE DATABASE `u_earwig_afc_copyvios` + DEFAULT CHARACTER SET utf8 + DEFAULT COLLATE utf8_unicode_ci; + +-- +-- Table structure for table `cache` +-- + +DROP TABLE IF EXISTS `cache`; +CREATE TABLE `cache` ( + `cache_id` int(10) unsigned NOT NULL, + `cache_hash` char(64) COLLATE utf8_unicode_ci DEFAULT NULL, + `cache_url` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, + `cache_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', + `cache_queries` int(4) DEFAULT NULL, + `cache_process_time` float DEFAULT NULL, + PRIMARY KEY (`cache_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +-- +-- Table structure for table `processed` +-- + +DROP TABLE IF EXISTS `processed`; +CREATE TABLE `processed` ( + `page_id` int(10) unsigned NOT NULL, + PRIMARY KEY (`page_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +-- Dump completed on 2012-07-20 20:21:00 diff --git a/tasks/schema/afc_history.sql b/tasks/schema/afc_history.sql new file mode 100644 index 0000000..4fa20fd --- /dev/null +++ b/tasks/schema/afc_history.sql @@ -0,0 +1,23 @@ +-- MySQL dump 10.13 Distrib 5.5.12, for solaris10 (i386) +-- +-- Host: sql Database: u_earwig_afc_history +-- ------------------------------------------------------ +-- Server version 5.1.59 + +CREATE DATABASE `u_earwig_afc_history` + DEFAULT CHARACTER SET utf8 + DEFAULT COLLATE utf8_unicode_ci; + +-- +-- Table structure for table `page` +-- + +DROP TABLE IF EXISTS `page`; +CREATE TABLE `page` ( + `page_id` int(10) unsigned NOT NULL, + `page_date` varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL, + `page_status` tinyint(3) unsigned DEFAULT NULL, + PRIMARY KEY (`page_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +-- Dump completed on 2012-07-20 20:20:39 diff --git a/tasks/schema/afc_statistics.sql b/tasks/schema/afc_statistics.sql new file mode 100644 index 0000000..6e6e2dd --- /dev/null +++ b/tasks/schema/afc_statistics.sql @@ -0,0 +1,68 @@ +-- MySQL dump 10.13 Distrib 5.5.12, for solaris10 (i386) +-- +-- Host: sql Database: u_earwig_afc_statistics +-- ------------------------------------------------------ +-- Server version 5.1.59 + +CREATE DATABASE `u_earwig_afc_statistics` + DEFAULT CHARACTER SET utf8 + DEFAULT COLLATE utf8_unicode_ci; + +-- +-- Table structure for table `chart` +-- + +DROP TABLE IF EXISTS `chart`; +CREATE TABLE `chart` ( + `chart_id` tinyint(3) unsigned NOT NULL AUTO_INCREMENT, + `chart_title` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, + `chart_special_title` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, + PRIMARY KEY (`chart_id`) +) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +-- +-- Dumping data for table `chart` +-- + +LOCK TABLES `chart` WRITE; +INSERT INTO `chart` VALUES +(1,'Pending submissions','Submitted'), +(3,'Being reviewed','Reviewer'), +(4,'Recently accepted','Accepted'), +(5,'Recently declined','Declined'), +(6,'Misplaced submissions','Created'); +UNLOCK TABLES; + +-- +-- Table structure for table `row` +-- + +DROP TABLE IF EXISTS `row`; +CREATE TABLE `row` ( + `row_id` int(10) unsigned NOT NULL, + `row_chart` tinyint(3) unsigned DEFAULT NULL, + PRIMARY KEY (`row_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +-- +-- Table structure for table `page` +-- + +DROP TABLE IF EXISTS `page`; +CREATE TABLE `page` ( + `page_id` int(10) unsigned NOT NULL, + `page_status` varchar(16) COLLATE utf8_unicode_ci DEFAULT NULL, + `page_title` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, + `page_short` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, + `page_size` varchar(16) COLLATE utf8_unicode_ci DEFAULT NULL, + `page_notes` tinytext COLLATE utf8_unicode_ci, + `page_modify_user` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, + `page_modify_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', + `page_modify_oldid` int(10) unsigned DEFAULT NULL, + `page_special_user` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, + `page_special_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', + `page_special_oldid` int(10) unsigned DEFAULT NULL, + PRIMARY KEY (`page_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +-- Dump completed on 2012-07-20 20:25:10 diff --git a/tasks/schema/drn_clerkbot.sql b/tasks/schema/drn_clerkbot.sql new file mode 100644 index 0000000..c71c49b --- /dev/null +++ b/tasks/schema/drn_clerkbot.sql @@ -0,0 +1,59 @@ +-- MySQL dump 10.13 Distrib 5.5.12, for solaris10 (i386) +-- +-- Host: sql Database: u_earwig_drn_clerkbot +-- ------------------------------------------------------ +-- Server version 5.1.59 + +CREATE DATABASE `u_earwig_drn_clerkbot` + DEFAULT CHARACTER SET utf8 + DEFAULT COLLATE utf8_unicode_ci; + +-- +-- Table structure for table `case` +-- + +DROP TABLE IF EXISTS `cases`; +CREATE TABLE `cases` ( + `case_id` int(10) unsigned NOT NULL, + `case_title` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, + `case_status` int(2) unsigned DEFAULT NULL, + `case_last_action` int(2) unsigned DEFAULT NULL, + `case_file_user` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, + `case_file_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', + `case_modify_user` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, + `case_modify_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', + `case_volunteer_user` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, + `case_volunteer_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', + `case_close_time` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', + `case_parties_notified` tinyint(1) unsigned DEFAULT NULL, + `case_very_old_notified` tinyint(1) unsigned DEFAULT NULL, + `case_archived` tinyint(1) unsigned DEFAULT NULL, + `case_last_volunteer_size` int(9) unsigned DEFAULT NULL, + PRIMARY KEY (`case_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +-- +-- Table structure for table `signature` +-- + +DROP TABLE IF EXISTS `signatures`; +CREATE TABLE `signatures` ( + `signature_id` int(10) unsigned NOT NULL AUTO_INCREMENT, + `signature_case` int(10) unsigned NOT NULL, + `signature_username` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, + `signature_timestamp` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', + PRIMARY KEY (`signature_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +-- +-- Table structure for table `volunteer` +-- + +DROP TABLE IF EXISTS `volunteers`; +CREATE TABLE `volunteers` ( + `volunteer_id` int(10) unsigned NOT NULL AUTO_INCREMENT, + `volunteer_username` varchar(512) COLLATE utf8_unicode_ci DEFAULT NULL, + PRIMARY KEY (`volunteer_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; + +-- Dump completed on 2012-07-31 1:34:28