Browse Source

Code cleanup with ruff + add pre-commit hook

main
Ben Kurtovic 3 weeks ago
parent
commit
49a463a40a
27 changed files with 895 additions and 599 deletions
  1. +7
    -0
      .pre-commit-config.yaml
  2. +2
    -2
      commands/afc_pending.py
  3. +16
    -11
      commands/afc_report.py
  4. +34
    -28
      commands/afc_status.py
  5. +3
    -3
      commands/afc_submissions.py
  6. +22
    -11
      commands/block_monitor.py
  7. +12
    -13
      commands/geolocate.py
  8. +36
    -30
      commands/git_command.py
  9. +42
    -22
      commands/partwhen.py
  10. +2
    -2
      commands/praise.py
  11. +65
    -43
      commands/rc_monitor.py
  12. +17
    -13
      commands/stars.py
  13. +10
    -10
      commands/urbandictionary.py
  14. +61
    -48
      commands/weather.py
  15. +22
    -14
      commands/welcome.py
  16. +8
    -0
      pyproject.toml
  17. +2
    -2
      tasks/afc_catdelink.py
  18. +28
    -24
      tasks/afc_copyvios.py
  19. +7
    -5
      tasks/afc_dailycats.py
  20. +2
    -2
      tasks/afc_history.py
  21. +106
    -62
      tasks/afc_statistics.py
  22. +64
    -30
      tasks/afc_undated.py
  23. +21
    -11
      tasks/banner_untag.py
  24. +2
    -2
      tasks/blp_tag.py
  25. +149
    -84
      tasks/drn_clerkbot.py
  26. +31
    -23
      tasks/infobox_station.py
  27. +124
    -104
      tasks/synonym_authorities.py

+ 7
- 0
.pre-commit-config.yaml View File

@@ -0,0 +1,7 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.5
hooks:
- id: ruff
args: [--fix]
- id: ruff-format

+ 2
- 2
commands/afc_pending.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -22,8 +20,10 @@

from earwigbot.commands import Command


class AfCPending(Command):
"""Link the user to the pending AfC submissions page and category."""

name = "pending"
commands = ["pending", "pend"]



+ 16
- 11
commands/afc_report.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -23,8 +21,10 @@
from earwigbot import wiki
from earwigbot.commands import Command


class AfCReport(Command):
"""Get information about an AfC submission by name."""

name = "report"

def process(self, data):
@@ -45,19 +45,24 @@ class AfCReport(Command):
self.reply(data, msg)
return

title = " ".join(data.args).replace("http://en.wikipedia.org/wiki/",
"").replace("http://enwp.org/", "").strip()
title = (
" ".join(data.args)
.replace("http://en.wikipedia.org/wiki/", "")
.replace("http://enwp.org/", "")
.strip()
)
titles = [
title, "Draft:" + title,
title,
"Draft:" + title,
"Wikipedia:Articles for creation/" + title,
"Wikipedia talk:Articles for creation/" + title
"Wikipedia talk:Articles for creation/" + title,
]
for attempt in titles:
page = self.site.get_page(attempt, follow_redirects=False)
if page.exists == page.PAGE_EXISTS:
return self.report(page)

self.reply(data, "Submission \x0302{0}\x0F not found.".format(title))
self.reply(data, f"Submission \x0302{title}\x0f not found.")

def report(self, page):
url = page.url.encode("utf8")
@@ -67,11 +72,11 @@ class AfCReport(Command):
user_name = user.name
user_url = user.get_talkpage().url.encode("utf8")

msg1 = "AfC submission report for \x0302{0}\x0F ({1}):"
msg2 = "Status: \x0303{0}\x0F"
msg3 = "Submitted by \x0302{0}\x0F ({1})"
msg1 = "AfC submission report for \x0302{0}\x0f ({1}):"
msg2 = "Status: \x0303{0}\x0f"
msg3 = "Submitted by \x0302{0}\x0f ({1})"
if status == "accepted":
msg3 = "Reviewed by \x0302{0}\x0F ({1})"
msg3 = "Reviewed by \x0302{0}\x0f ({1})"

self.reply(self.data, msg1.format(page.title.encode("utf8"), url))
self.say(self.data.chan, msg2.format(status))


+ 34
- 28
commands/afc_status.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -24,9 +22,11 @@ import re

from earwigbot.commands import Command


class AfCStatus(Command):
"""Get the number of pending AfC submissions, open redirect requests, and
open file upload requests."""

name = "status"
commands = ["status", "count", "num", "number"]
hooks = ["join", "msg"]
@@ -56,7 +56,7 @@ class AfCStatus(Command):
self.site = self.bot.wiki.get_site()

if data.line[1] == "JOIN":
status = " ".join(("\x02Current status:\x0F", self.get_status()))
status = " ".join(("\x02Current status:\x0f", self.get_status()))
self.notice(data.nick, status)
return

@@ -64,51 +64,56 @@ class AfCStatus(Command):
action = data.args[0].lower()
if action.startswith("sub") or action == "s":
subs = self.count_submissions()
msg = "There are \x0305{0}\x0F pending AfC submissions (\x0302WP:AFC\x0F)."
msg = "There are \x0305{0}\x0f pending AfC submissions (\x0302WP:AFC\x0f)."
self.reply(data, msg.format(subs))

elif action.startswith("redir") or action == "r":
redirs = self.count_redirects()
msg = "There are \x0305{0}\x0F open redirect requests (\x0302WP:AFC/R\x0F)."
msg = "There are \x0305{0}\x0f open redirect requests (\x0302WP:AFC/R\x0f)."
self.reply(data, msg.format(redirs))

elif action.startswith("file") or action == "f":
files = self.count_redirects()
msg = "There are \x0305{0}\x0F open file upload requests (\x0302WP:FFU\x0F)."
msg = "There are \x0305{0}\x0f open file upload requests (\x0302WP:FFU\x0f)."
self.reply(data, msg.format(files))

elif action.startswith("agg") or action == "a":
try:
agg_num = int(data.args[1])
except IndexError:
agg_data = (self.count_submissions(),
self.count_redirects(), self.count_files())
agg_data = (
self.count_submissions(),
self.count_redirects(),
self.count_files(),
)
agg_num = self.get_aggregate_number(agg_data)
except ValueError:
msg = "\x0303{0}\x0F isn't a number!"
msg = "\x0303{0}\x0f isn't a number!"
self.reply(data, msg.format(data.args[1]))
return
aggregate = self.get_aggregate(agg_num)
msg = "Aggregate is \x0305{0}\x0F (AfC {1})."
msg = "Aggregate is \x0305{0}\x0f (AfC {1})."
self.reply(data, msg.format(agg_num, aggregate))

elif action.startswith("g13_e") or action.startswith("g13e"):
g13_eli = self.count_g13_eligible()
msg = "There are \x0305{0}\x0F CSD:G13-eligible pages."
msg = "There are \x0305{0}\x0f CSD:G13-eligible pages."
self.reply(data, msg.format(g13_eli))

elif action.startswith("g13_a") or action.startswith("g13a"):
g13_noms = self.count_g13_active()
msg = "There are \x0305{0}\x0F active CSD:G13 nominations."
msg = "There are \x0305{0}\x0f active CSD:G13 nominations."
self.reply(data, msg.format(g13_noms))

elif action.startswith("nocolor") or action == "n":
self.reply(data, self.get_status(color=False))

else:
msg = "Unknown argument: \x0303{0}\x0F. Valid args are " +\
"'subs', 'redirs', 'files', 'agg', 'nocolor', " +\
"'g13_eligible', 'g13_active'."
msg = (
"Unknown argument: \x0303{0}\x0f. Valid args are "
+ "'subs', 'redirs', 'files', 'agg', 'nocolor', "
+ "'g13_eligible', 'g13_active'."
)
self.reply(data, msg.format(data.args[0]))

else:
@@ -122,22 +127,22 @@ class AfCStatus(Command):
aggregate = self.get_aggregate(agg_num)

if color:
msg = "Articles for creation {0} (\x0302AFC\x0F: \x0305{1}\x0F; \x0302AFC/R\x0F: \x0305{2}\x0F; \x0302FFU\x0F: \x0305{3}\x0F)."
msg = "Articles for creation {0} (\x0302AFC\x0f: \x0305{1}\x0f; \x0302AFC/R\x0f: \x0305{2}\x0f; \x0302FFU\x0f: \x0305{3}\x0f)."
else:
msg = "Articles for creation {0} (AFC: {1}; AFC/R: {2}; FFU: {3})."
return msg.format(aggregate, subs, redirs, files)

def count_g13_eligible(self):
"""
Returns the number of G13 Eligible AfC Submissions (count of
Category:G13 eligible AfC submissions)
Returns the number of G13 Eligible AfC Submissions (count of
Category:G13 eligible AfC submissions)
"""
return self.site.get_category("G13 eligible AfC submissions").pages

def count_g13_active(self):
"""
Returns the number of active CSD:G13 nominations ( count of
Category:Candidates for speedy deletion as abandoned AfC submissions)
Returns the number of active CSD:G13 nominations ( count of
Category:Candidates for speedy deletion as abandoned AfC submissions)
"""
catname = "Candidates for speedy deletion as abandoned AfC submissions"
return self.site.get_category(catname).pages
@@ -176,23 +181,24 @@ class AfCStatus(Command):
FFU (for example) indicates that our work is *not* done and the
project-wide backlog is most certainly *not* clear."""
if num == 0:
return "is \x02\x0303clear\x0F"
return "is \x02\x0303clear\x0f"
elif num <= 200:
return "is \x0303almost clear\x0F"
return "is \x0303almost clear\x0f"
elif num <= 400:
return "is \x0312normal\x0F"
return "is \x0312normal\x0f"
elif num <= 600:
return "is \x0307lightly backlogged\x0F"
return "is \x0307lightly backlogged\x0f"
elif num <= 900:
return "is \x0304backlogged\x0F"
return "is \x0304backlogged\x0f"
elif num <= 1200:
return "is \x02\x0304heavily backlogged\x0F"
return "is \x02\x0304heavily backlogged\x0f"
else:
return "is \x02\x1F\x0304severely backlogged\x0F"
return "is \x02\x1f\x0304severely backlogged\x0f"

def get_aggregate_number(self, (subs, redirs, files)):
def get_aggregate_number(self, arg):
"""Returns an 'aggregate number' based on the real number of pending
submissions in CAT:PEND (subs), open redirect submissions in WP:AFC/R
(redirs), and open files-for-upload requests in WP:FFU (files)."""
(subs, redirs, files) = arg
num = subs + (redirs / 2) + (files / 2)
return num

+ 3
- 3
commands/afc_submissions.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -23,8 +21,10 @@
from earwigbot import wiki
from earwigbot.commands import Command


class AfCSubmissions(Command):
"""Link the user directly to some pending AfC submissions."""

name = "submissions"
commands = ["submissions", "subs"]

@@ -63,4 +63,4 @@ class AfCSubmissions(Command):
continue
urls.append(member.url.encode("utf8"))
pages = ", ".join(urls[:number])
self.reply(data, "{0} pending AfC subs: {1}".format(number, pages))
self.reply(data, f"{number} pending AfC subs: {pages}")

+ 22
- 11
commands/block_monitor.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2016 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -27,8 +25,10 @@ from time import time
from earwigbot.commands import Command
from earwigbot.exceptions import APIError


class BlockMonitor(Command):
"""Monitors for on-wiki blocked users joining a particular channel."""

name = "block_monitor"
hooks = ["join"]

@@ -43,8 +43,9 @@ class BlockMonitor(Command):
self._last = None

def check(self, data):
return (self._monitor_chan and self._report_chan and
data.chan == self._monitor_chan)
return (
self._monitor_chan and self._report_chan and data.chan == self._monitor_chan
)

def process(self, data):
ip = self._get_ip(data.host)
@@ -61,12 +62,16 @@ class BlockMonitor(Command):
if not block:
return

msg = ("\x02[{note}]\x0F Joined user \x02{nick}\x0F is {type}blocked "
"on-wiki ([[User:{user}]]) because: {reason}")
msg = (
"\x02[{note}]\x0f Joined user \x02{nick}\x0f is {type}blocked "
"on-wiki ([[User:{user}]]) because: {reason}"
)
self.say(self._report_chan, msg.format(nick=data.nick, **block))

log = ("Reporting block ({note}): {nick} is [[User:{user}]], "
"{type}blocked because: {reason}")
log = (
"Reporting block ({note}): {nick} is [[User:{user}]], "
"{type}blocked because: {reason}"
)
self.logger.info(log.format(nick=data.nick, **block))

def _get_ip(self, host):
@@ -84,9 +89,15 @@ class BlockMonitor(Command):
site = self.bot.wiki.get_site()
try:
result = site.api_query(
action="query", list="blocks|globalblocks", bkip=ip, bgip=ip,
bklimit=1, bglimit=1, bkprop="user|reason|range",
bgprop="address|reason|range")
action="query",
list="blocks|globalblocks",
bkip=ip,
bgip=ip,
bklimit=1,
bglimit=1,
bkprop="user|reason|range",
bgprop="address|reason|range",
)
except APIError:
return
lblocks = result["query"]["blocks"]


+ 12
- 13
commands/geolocate.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -21,14 +19,15 @@
# SOFTWARE.

from json import loads
from socket import (AF_INET, AF_INET6, error as socket_error, gethostbyname,
inet_pton)
from urllib2 import urlopen
from socket import AF_INET, AF_INET6, gethostbyname, inet_pton
from urllib.request import urlopen

from earwigbot.commands import Command


class Geolocate(Command):
"""Geolocate an IP address (via http://ipinfodb.com/)."""

name = "geolocate"
commands = ["geolocate", "locate", "geo", "ip"]

@@ -43,7 +42,7 @@ class Geolocate(Command):

def process(self, data):
if not self.key:
msg = 'I need an API key for http://ipinfodb.com/ stored as \x0303config.commands["{0}"]["apiKey"]\x0F.'
msg = 'I need an API key for http://ipinfodb.com/ stored as \x0303config.commands["{0}"]["apiKey"]\x0f.'
log = 'Need an API key for http://ipinfodb.com/ stored as config.commands["{0}"]["apiKey"]'
self.reply(data, msg.format(self.name))
self.logger.error(log.format(self.name))
@@ -54,12 +53,12 @@ class Geolocate(Command):
else:
try:
address = gethostbyname(data.host)
except socket_error:
msg = "Your hostname, \x0302{0}\x0F, is not an IP address!"
except OSError:
msg = "Your hostname, \x0302{0}\x0f, is not an IP address!"
self.reply(data, msg.format(data.host))
return
if not self.is_ip(address):
msg = "\x0302{0}\x0F is not an IP address!"
msg = "\x0302{0}\x0f is not an IP address!"
self.reply(data, msg.format(address))
return

@@ -74,10 +73,10 @@ class Geolocate(Command):
longitude = res["longitude"]
utcoffset = res["timeZone"]
if not country and not region and not city:
self.reply(data, "IP \x0302{0}\x0F not found.".format(address))
self.reply(data, f"IP \x0302{address}\x0f not found.")
return
if country == "-" and region == "-" and city == "-":
self.reply(data, "IP \x0302{0}\x0F is reserved.".format(address))
self.reply(data, f"IP \x0302{address}\x0f is reserved.")
return

msg = "{0}, {1}, {2} ({3}, {4}), UTC {5}"
@@ -91,9 +90,9 @@ class Geolocate(Command):
"""
try:
inet_pton(AF_INET, address)
except socket_error:
except OSError:
try:
inet_pton(AF_INET6, address)
except socket_error:
except OSError:
return False
return True

+ 36
- 30
commands/git_command.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -26,9 +24,11 @@ import git

from earwigbot.commands import Command


class Git(Command):
"""Commands to interface with configurable git repositories; use '!git' for
a sub-command list."""

name = "git"

def setup(self):
@@ -78,12 +78,12 @@ class Git(Command):
elif command == "status":
self.do_status()
else: # They asked us to do something we don't know
msg = "Unknown argument: \x0303{0}\x0F.".format(data.args[0])
msg = f"Unknown argument: \x0303{data.args[0]}\x0f."
self.reply(data, msg)

def get_repos(self):
data = self.repos.iteritems()
repos = ["\x0302{0}\x0F ({1})".format(k, v) for k, v in data]
repos = [f"\x0302{k}\x0f ({v})" for k, v in data]
return ", ".join(repos)

def get_remote(self):
@@ -94,18 +94,18 @@ class Git(Command):
try:
return getattr(self.repo.remotes, remote_name)
except AttributeError:
msg = "Unknown remote: \x0302{0}\x0F.".format(remote_name)
msg = f"Unknown remote: \x0302{remote_name}\x0f."
self.reply(self.data, msg)

def get_time_since(self, date):
diff = time.mktime(time.gmtime()) - date
if diff < 60:
return "{0} seconds".format(int(diff))
return f"{int(diff)} seconds"
if diff < 60 * 60:
return "{0} minutes".format(int(diff / 60))
return f"{int(diff / 60)} minutes"
if diff < 60 * 60 * 24:
return "{0} hours".format(int(diff / 60 / 60))
return "{0} days".format(int(diff / 60 / 60 / 24))
return f"{int(diff / 60 / 60)} hours"
return f"{int(diff / 60 / 60 / 24)} days"

def do_help(self):
"""Display all commands."""
@@ -119,21 +119,21 @@ class Git(Command):
}
subcommands = ""
for key in sorted(help.keys()):
subcommands += "\x0303{0}\x0F ({1}), ".format(key, help[key])
subcommands += f"\x0303{key}\x0f ({help[key]}), "
subcommands = subcommands[:-2] # Trim last comma and space
msg = "Sub-commands are: {0}; repos are: {1}. Syntax: !git \x0303subcommand\x0F \x0302repo\x0F."
msg = "Sub-commands are: {0}; repos are: {1}. Syntax: !git \x0303subcommand\x0f \x0302repo\x0f."
self.reply(self.data, msg.format(subcommands, self.get_repos()))

def do_branch(self):
"""Get our current branch."""
branch = self.repo.active_branch.name
msg = "Currently on branch \x0302{0}\x0F.".format(branch)
msg = f"Currently on branch \x0302{branch}\x0f."
self.reply(self.data, msg)

def do_branches(self):
"""Get a list of branches."""
branches = [branch.name for branch in self.repo.branches]
msg = "Branches: \x0302{0}\x0F.".format(", ".join(branches))
msg = "Branches: \x0302{}\x0f.".format(", ".join(branches))
self.reply(self.data, msg)

def do_checkout(self):
@@ -146,18 +146,18 @@ class Git(Command):

current_branch = self.repo.active_branch.name
if target == current_branch:
msg = "Already on \x0302{0}\x0F!".format(target)
msg = f"Already on \x0302{target}\x0f!"
self.reply(self.data, msg)
return

try:
ref = getattr(self.repo.branches, target)
except AttributeError:
msg = "Branch \x0302{0}\x0F doesn't exist!".format(target)
msg = f"Branch \x0302{target}\x0f doesn't exist!"
self.reply(self.data, msg)
else:
ref.checkout()
ms = "Switched from branch \x0302{0}\x0F to \x0302{1}\x0F."
ms = "Switched from branch \x0302{0}\x0f to \x0302{1}\x0f."
msg = ms.format(current_branch, target)
self.reply(self.data, msg)
log = "{0} checked out branch {1} of {2}"
@@ -181,11 +181,11 @@ class Git(Command):
try:
ref = getattr(self.repo.branches, target)
except AttributeError:
msg = "Branch \x0302{0}\x0F doesn't exist!".format(target)
msg = f"Branch \x0302{target}\x0f doesn't exist!"
self.reply(self.data, msg)
else:
self.repo.git.branch("-d", ref)
msg = "Branch \x0302{0}\x0F has been deleted locally."
msg = "Branch \x0302{0}\x0f has been deleted locally."
self.reply(self.data, msg.format(target))
log = "{0} deleted branch {1} of {2}"
logmsg = log.format(self.data.nick, target, self.repo.working_dir)
@@ -194,7 +194,7 @@ class Git(Command):
def do_pull(self):
"""Pull from our remote repository."""
branch = self.repo.active_branch.name
msg = "Pulling from remote (currently on \x0302{0}\x0F)..."
msg = "Pulling from remote (currently on \x0302{0}\x0f)..."
self.reply(self.data, msg.format(branch))

remote = self.get_remote()
@@ -205,16 +205,18 @@ class Git(Command):

if updated:
branches = ", ".join([info.ref.remote_head for info in updated])
msg = "Done; updates to \x0302{0}\x0F (from {1})."
msg = "Done; updates to \x0302{0}\x0f (from {1})."
self.reply(self.data, msg.format(branches, remote.url))
log = "{0} pulled {1} of {2} (updates to {3})"
self.logger.info(log.format(self.data.nick, remote.name,
self.repo.working_dir, branches))
self.logger.info(
log.format(self.data.nick, remote.name, self.repo.working_dir, branches)
)
else:
self.reply(self.data, "Done; no new changes.")
log = "{0} pulled {1} of {2} (no updates)"
self.logger.info(log.format(self.data.nick, remote.name,
self.repo.working_dir))
self.logger.info(
log.format(self.data.nick, remote.name, self.repo.working_dir)
)

def do_status(self):
"""Check if we have anything to pull."""
@@ -227,14 +229,18 @@ class Git(Command):

if updated:
branches = ", ".join([info.ref.remote_head for info in updated])
msg = "Last local commit was \x02{0}\x0F ago; updates to \x0302{1}\x0F."
msg = "Last local commit was \x02{0}\x0f ago; updates to \x0302{1}\x0f."
self.reply(self.data, msg.format(since, branches))
log = "{0} got status of {1} of {2} (updates to {3})"
self.logger.info(log.format(self.data.nick, remote.name,
self.repo.working_dir, branches))
self.logger.info(
log.format(self.data.nick, remote.name, self.repo.working_dir, branches)
)
else:
msg = "Last commit was \x02{0}\x0F ago. Local copy is up-to-date with remote."
msg = (
"Last commit was \x02{0}\x0f ago. Local copy is up-to-date with remote."
)
self.reply(self.data, msg.format(since))
log = "{0} pulled {1} of {2} (no updates)"
self.logger.info(log.format(self.data.nick, remote.name,
self.repo.working_dir))
self.logger.info(
log.format(self.data.nick, remote.name, self.repo.working_dir)
)

+ 42
- 22
commands/partwhen.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2021 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -21,14 +19,16 @@
# SOFTWARE.

import base64
import cPickle as pickle
import pickle
import re

from earwigbot.commands import Command
from earwigbot.config.permissions import User


class PartWhen(Command):
"""Ask the bot to part the channel when a condition is met."""

name = "partwhen"
commands = ["partwhen", "unpartwhen"]
hooks = ["join", "msg"]
@@ -67,54 +67,74 @@ class PartWhen(Command):
if self._conds.get(channel):
del self._conds[channel]
self._save_conditions()
self.reply(data, "Cleared part conditions for {0}.".format(
"this channel" if channel == data.chan else channel))
self.reply(
data,
"Cleared part conditions for {}.".format(
"this channel" if channel == data.chan else channel
),
)
else:
self.reply(data, "No part conditions set.")
return

if not args:
conds = self._conds.get(channel, {})
existing = "; ".join("{0} {1}".format(cond, ", ".join(str(user) for user in users))
for cond, users in conds.iteritems())
existing = "; ".join(
"{} {}".format(cond, ", ".join(str(user) for user in users))
for cond, users in conds.iteritems()
)
if existing:
status = "Current part conditions: {0}.".format(existing)
status = f"Current part conditions: {existing}."
else:
status = "No part conditions set for {0}.".format(
"this channel" if channel == data.chan else channel)
self.reply(data, "{0} Usage: !{1} [<channel>] <event> <args>...".format(
status, data.command))
status = "No part conditions set for {}.".format(
"this channel" if channel == data.chan else channel
)
self.reply(
data,
f"{status} Usage: !{data.command} [<channel>] <event> <args>...",
)
return

event = args[0]
args = args[1:]
if event == "join":
if not args:
self.reply(data, "Join event requires an argument for the user joining, "
"in nick!ident@host syntax.")
self.reply(
data,
"Join event requires an argument for the user joining, "
"in nick!ident@host syntax.",
)
return
cond = args[0]
match = re.match(r"(.*?)!(.*?)@(.*?)$", cond)
if not match:
self.reply(data, "User join pattern is invalid; should use "
"nick!ident@host syntax.")
self.reply(
data,
"User join pattern is invalid; should use "
"nick!ident@host syntax.",
)
return
conds = self._conds.setdefault(channel, {}).setdefault("join", [])
conds.append(User(match.group(1), match.group(2), match.group(3)))
self._save_conditions()
self.reply(data, "Okay, I will leave {0} when {1} joins.".format(
"the channel" if channel == data.chan else channel, cond))
self.reply(
data,
"Okay, I will leave {} when {} joins.".format(
"the channel" if channel == data.chan else channel, cond
),
)
else:
self.reply(data, "Unknown event: {0} (valid events: join).".format(event))
self.reply(data, f"Unknown event: {event} (valid events: join).")

def _handle_join(self, data):
user = User(data.nick, data.ident, data.host)
conds = self._conds.get(data.chan, {}).get("join", {})
for cond in conds:
if user in cond:
self.logger.info("Parting {0} because {1} met join condition {2}".format(
data.chan, str(user), str(cond)))
self.part(data.chan, "Requested to leave when {0} joined".format(data.nick))
self.logger.info(
f"Parting {data.chan} because {str(user)} met join condition {str(cond)}"
)
self.part(data.chan, f"Requested to leave when {data.nick} joined")
break

def _load_conditions(self):


+ 2
- 2
commands/praise.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -22,8 +20,10 @@

from earwigbot.commands import Command


class Praise(Command):
"""Praise people!"""

name = "praise"

def setup(self):


+ 65
- 43
commands/rc_monitor.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2016 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -20,11 +18,11 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re
from collections import namedtuple
from datetime import datetime
from difflib import ndiff
from Queue import Queue
import re
from queue import Queue
from threading import Thread

from earwigbot.commands import Command
@@ -34,9 +32,11 @@ from earwigbot.wiki import constants

_Diff = namedtuple("_Diff", ["added", "removed"])


class RCMonitor(Command):
"""Monitors the recent changes feed for certain edits and reports them to a
dedicated channel."""

name = "rc_monitor"
commands = ["rc_monitor", "rcm"]
hooks = ["msg", "rc"]
@@ -46,8 +46,10 @@ class RCMonitor(Command):
self._channel = self.config.commands[self.name]["channel"]
except KeyError:
self._channel = None
log = ('Cannot use without a report channel set as '
'config.commands["{0}"]["channel"]')
log = (
"Cannot use without a report channel set as "
'config.commands["{0}"]["channel"]'
)
self.logger.warn(log.format(self.name))
return

@@ -55,7 +57,7 @@ class RCMonitor(Command):
"start": datetime.utcnow(),
"edits": 0,
"hits": 0,
"max_backlog": 0
"max_backlog": 0,
}
self._levels = {}
self._issues = {}
@@ -73,7 +75,8 @@ class RCMonitor(Command):
if not self._channel:
return
return isinstance(data, RC) or (
data.is_command and data.command in self.commands)
data.is_command and data.command in self.commands
)

def process(self, data):
if isinstance(data, RC):
@@ -90,11 +93,17 @@ class RCMonitor(Command):
since = self._stats["start"].strftime("%H:%M:%S, %d %B %Y")
seconds = (datetime.utcnow() - self._stats["start"]).total_seconds()
rate = self._stats["edits"] / seconds
msg = ("\x02{edits:,}\x0F edits checked since {since} "
"(\x02{rate:.2f}\x0F edits/sec); \x02{hits:,}\x0F hits; "
"\x02{qsize:,}\x0F-edit backlog (\x02{max_backlog:,}\x0F max).")
self.reply(data, msg.format(
since=since, rate=rate, qsize=self._queue.qsize(), **self._stats))
msg = (
"\x02{edits:,}\x0f edits checked since {since} "
"(\x02{rate:.2f}\x0f edits/sec); \x02{hits:,}\x0f hits; "
"\x02{qsize:,}\x0f-edit backlog (\x02{max_backlog:,}\x0f max)."
)
self.reply(
data,
msg.format(
since=since, rate=rate, qsize=self._queue.qsize(), **self._stats
),
)

def unload(self):
self._thread.running = False
@@ -106,17 +115,9 @@ class RCMonitor(Command):
alert = 2
urgent = 3

self._levels = {
routine: "routine",
alert: "alert",
urgent: "URGENT"
}
self._issues = {
"g10": alert
}
self._descriptions = {
"g10": "CSD G10 nomination"
}
self._levels = {routine: "routine", alert: "alert", urgent: "URGENT"}
self._issues = {"g10": alert}
self._descriptions = {"g10": "CSD G10 nomination"}

def _get_diff(self, oldrev, newrev):
"""Return the difference between two revisions.
@@ -126,9 +127,12 @@ class RCMonitor(Command):
site = self.bot.wiki.get_site()
try:
result = site.api_query(
action="query", prop="revisions", rvprop="ids|content",
action="query",
prop="revisions",
rvprop="ids|content",
rvslots="main",
revids=(oldrev + "|" + newrev) if oldrev else newrev)
revids=(oldrev + "|" + newrev) if oldrev else newrev,
)
except APIError:
return None

@@ -148,10 +152,12 @@ class RCMonitor(Command):
return _Diff(text.splitlines(), [])

try:
oldtext = [rv["slots"]["main"]["*"] for rv in revs
if rv["revid"] == int(oldrev)][0]
newtext = [rv["slots"]["main"]["*"] for rv in revs
if rv["revid"] == int(newrev)][0]
oldtext = [
rv["slots"]["main"]["*"] for rv in revs if rv["revid"] == int(oldrev)
][0]
newtext = [
rv["slots"]["main"]["*"] for rv in revs if rv["revid"] == int(newrev)
][0]
except (IndexError, KeyError):
return None

@@ -165,14 +171,20 @@ class RCMonitor(Command):
site = self.bot.wiki.get_site()
try:
result = site.api_query(
action="query", list="backlinks", blfilterredir="redirects",
blnamespace=constants.NS_TEMPLATE, bllimit=50,
bltitle="Template:" + template)
action="query",
list="backlinks",
blfilterredir="redirects",
blnamespace=constants.NS_TEMPLATE,
bllimit=50,
bltitle="Template:" + template,
)
except APIError:
return []

redirs = {link["title"].split(":", 1)[1].lower()
for link in result["query"]["backlinks"]}
redirs = {
link["title"].split(":", 1)[1].lower()
for link in result["query"]["backlinks"]
}
redirs.add(template)
return redirs

@@ -184,9 +196,11 @@ class RCMonitor(Command):
return None
self._redirects[template] = redirects

search = "|".join(r"(template:)?" + re.escape(tmpl).replace(r"\ ", r"[ _]")
for tmpl in self._redirects[template])
return re.compile(r"\{\{\s*(" + search + r")\s*(\||\}\})", re.U|re.I)
search = "|".join(
r"(template:)?" + re.escape(tmpl).replace(r"\ ", r"[ _]")
for tmpl in self._redirects[template]
)
return re.compile(r"\{\{\s*(" + search + r")\s*(\||\}\})", re.U | re.I)

def _evaluate_csd(self, diff):
"""Evaluate a diff for CSD tagging."""
@@ -223,12 +237,20 @@ class RCMonitor(Command):
notify = " ".join("!rcm-" + issue for issue in report)
cmnt = rc.comment if len(rc.comment) <= 50 else rc.comment[:47] + "..."

msg = ("[\x02{level}\x0F] ({descr}) [\x02{notify}\x0F]\x0306 * "
"\x0314[[\x0307{title}\x0314]]\x0306 * \x0303{user}\x0306 * "
"\x0302{url}\x0306 * \x0310{comment}")
msg = (
"[\x02{level}\x0f] ({descr}) [\x02{notify}\x0f]\x0306 * "
"\x0314[[\x0307{title}\x0314]]\x0306 * \x0303{user}\x0306 * "
"\x0302{url}\x0306 * \x0310{comment}"
)
return msg.format(
level=level, descr=descr, notify=notify, title=rc.page,
user=rc.user, url=rc.url, comment=cmnt)
level=level,
descr=descr,
notify=notify,
title=rc.page,
user=rc.user,
url=rc.url,
comment=cmnt,
)

def _handle_event(self, event):
"""Process a recent change event."""


+ 17
- 13
commands/stars.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2015 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -21,12 +19,15 @@
# SOFTWARE.

from json import loads
from urllib2 import urlopen, HTTPError
from urllib.error import HTTPError
from urllib.request import urlopen

from earwigbot.commands import Command


class Stars(Command):
"""Get the number of stargazers for a given GitHub repository."""

name = "stars"
commands = ["stars", "stargazers"]
API_REPOS = "https://api.github.com/repos/{repo}"
@@ -35,7 +36,7 @@ class Stars(Command):

def process(self, data):
if not data.args:
msg = "Which GitHub repository or user should I look up? Example: \x0306{0}\x0F."
msg = "Which GitHub repository or user should I look up? Example: \x0306{0}\x0f."
self.reply(data, msg.format(self.EXAMPLE))
return

@@ -55,9 +56,8 @@ class Stars(Command):
count = int(repo["stargazers_count"])
plural = "" if count == 1 else "s"

msg = "\x0303{0}\x0F has \x02{1}\x0F stargazer{2}: {3}"
self.reply(data, msg.format(
repo["full_name"], count, plural, repo["html_url"]))
msg = "\x0303{0}\x0f has \x02{1}\x0f stargazer{2}: {3}"
self.reply(data, msg.format(repo["full_name"], count, plural, repo["html_url"]))

def handle_user(self, data, arg):
"""Handle !stars <user>."""
@@ -71,18 +71,22 @@ class Stars(Command):
star_plural = "" if star_count == 1 else "s"
repo_plural = "" if len(repos) == 1 else "s"
if len(repos) == 100:
star_count = "{0}+".format(star_count)
repo_count = "{0}+".format(repo_count)
star_count = f"{star_count}+"
repo_count = f"{repo_count}+"
if len(repos) > 0:
name = repos[0]["owner"]["login"]
url = repos[0]["owner"]["html_url"]
else:
name = arg
url = "https://github.com/{0}".format(name)
url = f"https://github.com/{name}"

msg = "\x0303{0}\x0F has \x02{1}\x0F stargazer{2} across \x02{3}\x0F repo{4}: {5}"
self.reply(data, msg.format(
name, star_count, star_plural, repo_count, repo_plural, url))
msg = (
"\x0303{0}\x0f has \x02{1}\x0f stargazer{2} across \x02{3}\x0f repo{4}: {5}"
)
self.reply(
data,
msg.format(name, star_count, star_plural, repo_count, repo_plural, url),
)

def get_repo(self, repo):
"""Return the API JSON dump for a given repository.


+ 10
- 10
commands/urbandictionary.py View File

@@ -1,17 +1,16 @@
# -*- coding: utf-8 -*-
#
# Public domain, 2013 Legoktm; 2013, 2018 Ben Kurtovic
#

from json import loads
import re
from urllib import quote
from urllib2 import urlopen
from json import loads
from urllib.parse import quote
from urllib.request import urlopen

from earwigbot.commands import Command


class UrbanDictionary(Command):
"""Get the definition of a word or phrase using Urban Dictionary."""

name = "urban"
commands = ["urban", "urbandictionary", "dct", "ud"]

@@ -34,7 +33,7 @@ class UrbanDictionary(Command):
res = loads(query)
results = res.get("list")
if not results:
self.reply(data, 'Sorry, no results found.')
self.reply(data, "Sorry, no results found.")
return

result = results[0]
@@ -44,9 +43,10 @@ class UrbanDictionary(Command):
if definition and definition[-1] not in (".", "!", "?"):
definition += "."

msg = "{0} \x02Example\x0F: {1} {2}".format(
definition.encode("utf8"), example.encode("utf8"), url)
msg = "{} \x02Example\x0f: {} {}".format(
definition.encode("utf8"), example.encode("utf8"), url
)
if self._normalize_term(result["word"]) != self._normalize_term(arg):
msg = "\x02{0}\x0F: {1}".format(result["word"].encode("utf8"), msg)
msg = "\x02{}\x0f: {}".format(result["word"].encode("utf8"), msg)

self.reply(data, msg)

+ 61
- 48
commands/weather.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -22,13 +20,15 @@

from datetime import datetime
from json import loads
from urllib import quote
from urllib2 import urlopen
from urllib.parse import quote
from urllib.request import urlopen

from earwigbot.commands import Command


class Weather(Command):
"""Get a weather forecast (via http://www.wunderground.com/)."""

name = "weather"
commands = ["weather", "weat", "forecast", "temperature", "temp"]

@@ -39,15 +39,15 @@ class Weather(Command):
except KeyError:
self.key = None
addr = "http://wunderground.com/weather/api/"
config = 'config.commands["{0}"]["apiKey"]'.format(self.name)
config = f'config.commands["{self.name}"]["apiKey"]'
log = "Cannot use without an API key from {0} stored as {1}"
self.logger.warn(log.format(addr, config))

def process(self, data):
if not self.key:
addr = "http://wunderground.com/weather/api/"
config = 'config.commands["{0}"]["apiKey"]'.format(self.name)
msg = "I need an API key from {0} stored as \x0303{1}\x0F."
config = f'config.commands["{self.name}"]["apiKey"]'
msg = "I need an API key from {0} stored as \x0303{1}\x0f."
log = "Need an API key from {0} stored as {1}"
self.reply(data, msg.format(addr, config))
self.logger.error(log.format(addr, config))
@@ -58,21 +58,25 @@ class Weather(Command):
if permdb.has_attr(data.host, "weather"):
location = permdb.get_attr(data.host, "weather")
else:
msg = " ".join(("Where do you want the weather of? You can",
"set a default with '!{0} default City,",
"State' (or 'City, Country' if non-US)."))
msg = " ".join(
(
"Where do you want the weather of? You can",
"set a default with '!{0} default City,",
"State' (or 'City, Country' if non-US).",
)
)
self.reply(data, msg.format(data.command))
return
elif data.args[0] == "default":
if data.args[1:]:
value = " ".join(data.args[1:])
permdb.set_attr(data.host, "weather", value)
msg = "\x0302{0}\x0F's default set to \x02{1}\x0F."
msg = "\x0302{0}\x0f's default set to \x02{1}\x0f."
self.reply(data, msg.format(data.host, value))
else:
if permdb.has_attr(data.host, "weather"):
value = permdb.get_attr(data.host, "weather")
msg = "\x0302{0}\x0F's default is \x02{1}\x0F."
msg = "\x0302{0}\x0f's default is \x02{1}\x0f."
self.reply(data, msg.format(data.host, value))
else:
self.reply(data, "I need a value to set as your default.")
@@ -107,73 +111,82 @@ class Weather(Command):
"""Format the weather (as dict *data*) to be sent through IRC."""
data = res["current_observation"]
place = data["display_location"]["full"]
icon = self.get_icon(data["icon"], data["local_time_rfc822"],
res["sun_phase"]).encode("utf8")
icon = self.get_icon(
data["icon"], data["local_time_rfc822"], res["sun_phase"]
).encode("utf8")
weather = data["weather"]
temp_f, temp_c = data["temp_f"], data["temp_c"]
humidity = data["relative_humidity"]
wind_dir = data["wind_dir"]
if wind_dir in ("North", "South", "East", "West"):
wind_dir = wind_dir.lower()
wind = "{0} {1} mph".format(wind_dir, data["wind_mph"])
wind = "{} {} mph".format(wind_dir, data["wind_mph"])
if float(data["wind_gust_mph"]) > float(data["wind_mph"]):
wind += " ({0} mph gusts)".format(data["wind_gust_mph"])
wind += " ({} mph gusts)".format(data["wind_gust_mph"])

msg = "\x02{0}\x0F: {1} {2}; {3}°F ({4}°C); {5} humidity; wind {6}"
msg = "\x02{0}\x0f: {1} {2}; {3}°F ({4}°C); {5} humidity; wind {6}"
msg = msg.format(place, icon, weather, temp_f, temp_c, humidity, wind)
if data["precip_today_in"] and float(data["precip_today_in"]) > 0:
msg += "; {0}″ precipitation today".format(data["precip_today_in"])
msg += "; {}″ precipitation today".format(data["precip_today_in"])
if data["precip_1hr_in"] and float(data["precip_1hr_in"]) > 0:
msg += " ({0}″ past hour)".format(data["precip_1hr_in"])
msg += " ({}″ past hour)".format(data["precip_1hr_in"])
return msg

def get_icon(self, condition, local_time, sun_phase):
"""Return a unicode icon to describe the given weather condition."""
icons = {
"chanceflurries": u"☃",
"chancerain": u"☂",
"chancesleet": u"☃",
"chancesnow": u"☃",
"chancetstorms": u"☂",
"clear": u"☽☀",
"cloudy": u"☁",
"flurries": u"☃",
"fog": u"☁",
"hazy": u"☁",
"mostlycloudy": u"☁",
"mostlysunny": u"☽☀",
"partlycloudy": u"☁",
"partlysunny": u"☽☀",
"rain": u"☂",
"sleet": u"☃",
"snow": u"☃",
"sunny": u"☽☀",
"tstorms": u"☂",
"chanceflurries": "☃",
"chancerain": "☂",
"chancesleet": "☃",
"chancesnow": "☃",
"chancetstorms": "☂",
"clear": "☽☀",
"cloudy": "☁",
"flurries": "☃",
"fog": "☁",
"hazy": "☁",
"mostlycloudy": "☁",
"mostlysunny": "☽☀",
"partlycloudy": "☁",
"partlysunny": "☽☀",
"rain": "☂",
"sleet": "☃",
"snow": "☃",
"sunny": "☽☀",
"tstorms": "☂",
}
try:
icon = icons[condition]
if len(icon) == 2:
lt_no_tz = local_time.rsplit(" ", 1)[0]
dt = datetime.strptime(lt_no_tz, "%a, %d %b %Y %H:%M:%S")
srise = datetime(year=dt.year, month=dt.month, day=dt.day,
hour=int(sun_phase["sunrise"]["hour"]),
minute=int(sun_phase["sunrise"]["minute"]))
sset = datetime(year=dt.year, month=dt.month, day=dt.day,
hour=int(sun_phase["sunset"]["hour"]),
minute=int(sun_phase["sunset"]["minute"]))
srise = datetime(
year=dt.year,
month=dt.month,
day=dt.day,
hour=int(sun_phase["sunrise"]["hour"]),
minute=int(sun_phase["sunrise"]["minute"]),
)
sset = datetime(
year=dt.year,
month=dt.month,
day=dt.day,
hour=int(sun_phase["sunset"]["hour"]),
minute=int(sun_phase["sunset"]["minute"]),
)
return icon[int(srise < dt < sset)]
return icon
except KeyError:
return u"?"
return "?"

def format_ambiguous_result(self, res):
"""Format a message when there are multiple possible results."""
results = []
for place in res["response"]["results"]:
extra = place["state" if place["state"] else "country"]
results.append("{0}, {1}".format(place["city"], extra))
results.append("{}, {}".format(place["city"], extra))
if len(results) > 21:
extra = len(results) - 20
res = "; ".join(results[:20])
return "Did you mean: {0}... ({1} others)?".format(res, extra)
return "Did you mean: {0}?".format("; ".join(results))
return f"Did you mean: {res}... ({extra} others)?"
return "Did you mean: {}?".format("; ".join(results))

+ 22
- 14
commands/welcome.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -25,8 +23,10 @@ from time import sleep, time

from earwigbot.commands import Command


class Welcome(Command):
"""Welcome people who enter certain channels."""

name = "welcome"
commands = ["welcome", "greet"]
hooks = ["join", "part", "msg"]
@@ -76,7 +76,7 @@ class Welcome(Command):
if not data.host.startswith("gateway/web/"):
return

t_id = "welcome-{0}-{1}".format(data.chan.replace("#", ""), data.nick)
t_id = "welcome-{}-{}".format(data.chan.replace("#", ""), data.nick)
thread = Thread(target=self._callback, name=t_id, args=(data,))
thread.daemon = True
thread.start()
@@ -107,32 +107,40 @@ class Welcome(Command):
if len(data.args) < 2:
self.reply(data, "Which channel should I disable?")
elif data.args[1] in self.disabled:
msg = "Welcoming in \x02{0}\x0F is already disabled."
msg = "Welcoming in \x02{0}\x0f is already disabled."
self.reply(data, msg.format(data.args[1]))
elif data.args[1] not in self.channels:
msg = ("I'm not welcoming people in \x02{0}\x0F. "
"Only the bot owner can add new channels.")
msg = (
"I'm not welcoming people in \x02{0}\x0f. "
"Only the bot owner can add new channels."
)
self.reply(data, msg.format(data.args[1]))
else:
self.disabled.append(data.args[1])
msg = ("Disabled welcoming in \x02{0}\x0F. Re-enable with "
"\x0306!welcome enable {0}\x0F.")
msg = (
"Disabled welcoming in \x02{0}\x0f. Re-enable with "
"\x0306!welcome enable {0}\x0f."
)
self.reply(data, msg.format(data.args[1]))
elif data.args[0] == "enable":
if len(data.args) < 2:
self.reply(data, "Which channel should I enable?")
elif data.args[1] not in self.disabled:
msg = ("I don't have welcoming disabled in \x02{0}\x0F. "
"Only the bot owner can add new channels.")
msg = (
"I don't have welcoming disabled in \x02{0}\x0f. "
"Only the bot owner can add new channels."
)
self.reply(data, msg.format(data.args[1]))
else:
self.disabled.remove(data.args[1])
msg = "Enabled welcoming in \x02{0}\x0F."
msg = "Enabled welcoming in \x02{0}\x0f."
self.reply(data, msg.format(data.args[1]))
else:
self.reply(data, "I don't understand that command.")
else:
msg = ("This command welcomes people who enter certain channels. "
"I am welcoming people in: {0}. A bot admin can disable me "
"with \x0306!welcome disable [channel]\x0F.")
msg = (
"This command welcomes people who enter certain channels. "
"I am welcoming people in: {0}. A bot admin can disable me "
"with \x0306!welcome disable [channel]\x0f."
)
self.reply(data, msg.format(", ".join(self.channels.keys())))

+ 8
- 0
pyproject.toml View File

@@ -0,0 +1,8 @@
[tool.ruff]
target-version = "py311"

[tool.ruff.lint]
select = ["E4", "E7", "E9", "F", "I", "UP"]

[tool.ruff.lint.isort]
known-first-party = ["earwigbot"]

+ 2
- 2
tasks/afc_catdelink.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -22,9 +20,11 @@

from earwigbot.tasks import Task


class AfCCatDelink(Task):
"""A task to delink mainspace categories in declined [[WP:AFC]]
submissions."""

name = "afc_catdelink"
number = 8



+ 28
- 24
tasks/afc_copyvios.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -30,9 +28,11 @@ import oursql

from earwigbot.tasks import Task


class AfCCopyvios(Task):
"""A task to check newly-edited [[WP:AFC]] submissions for copyright
violations."""

name = "afc_copyvios"
number = 1

@@ -44,10 +44,11 @@ class AfCCopyvios(Task):
self.max_queries = cfg.get("maxQueries", 10)
self.max_time = cfg.get("maxTime", 150)
self.cache_results = cfg.get("cacheResults", False)
default_summary = "Tagging suspected [[WP:COPYVIO|copyright violation]] of {url}."
default_summary = (
"Tagging suspected [[WP:COPYVIO|copyright violation]] of {url}."
)
self.summary = self.make_summary(cfg.get("summary", default_summary))
default_tags = [
"Db-g12", "Db-copyvio", "Copyvio", "Copyviocore" "Copypaste"]
default_tags = ["Db-g12", "Db-copyvio", "Copyvio", "Copyviocore" "Copypaste"]
self.tags = default_tags + cfg.get("tags", [])

# Connection data for our SQL database:
@@ -76,38 +77,39 @@ class AfCCopyvios(Task):
"""Detect copyvios in 'page' and add a note if any are found."""
title = page.title
if title in self.ignore_list:
msg = u"Skipping [[{0}]], in ignore list"
msg = "Skipping [[{0}]], in ignore list"
self.logger.info(msg.format(title))
return

pageid = page.pageid
if self.has_been_processed(pageid):
msg = u"Skipping [[{0}]], already processed"
msg = "Skipping [[{0}]], already processed"
self.logger.info(msg.format(title))
return
code = mwparserfromhell.parse(page.get())
if not self.is_pending(code):
msg = u"Skipping [[{0}]], not a pending submission"
msg = "Skipping [[{0}]], not a pending submission"
self.logger.info(msg.format(title))
return
tag = self.is_tagged(code)
if tag:
msg = u"Skipping [[{0}]], already tagged with '{1}'"
msg = "Skipping [[{0}]], already tagged with '{1}'"
self.logger.info(msg.format(title, tag))
return

self.logger.info(u"Checking [[{0}]]".format(title))
result = page.copyvio_check(self.min_confidence, self.max_queries,
self.max_time)
self.logger.info(f"Checking [[{title}]]")
result = page.copyvio_check(
self.min_confidence, self.max_queries, self.max_time
)
url = result.url
orig_conf = "{0}%".format(round(result.confidence * 100, 2))
orig_conf = f"{round(result.confidence * 100, 2)}%"

if result.violation:
if self.handle_violation(title, page, url, orig_conf):
self.log_processed(pageid)
return
else:
msg = u"No violations detected in [[{0}]] (best: {1} at {2} confidence)"
msg = "No violations detected in [[{0}]] (best: {1} at {2} confidence)"
self.logger.info(msg.format(title, url, orig_conf))

self.log_processed(pageid)
@@ -122,22 +124,22 @@ class AfCCopyvios(Task):
content = page.get()
tag = self.is_tagged(mwparserfromhell.parse(content))
if tag:
msg = u"A violation was detected in [[{0}]], but it was tagged"
msg += u" in the mean time with '{1}' (best: {2} at {3} confidence)"
msg = "A violation was detected in [[{0}]], but it was tagged"
msg += " in the mean time with '{1}' (best: {2} at {3} confidence)"
self.logger.info(msg.format(title, tag, url, orig_conf))
return True
confirm = page.copyvio_compare(url, self.min_confidence)
new_conf = "{0}%".format(round(confirm.confidence * 100, 2))
new_conf = f"{round(confirm.confidence * 100, 2)}%"
if not confirm.violation:
msg = u"A violation was detected in [[{0}]], but couldn't be confirmed."
msg += u" It may have just been edited (best: {1} at {2} -> {3} confidence)"
msg = "A violation was detected in [[{0}]], but couldn't be confirmed."
msg += " It may have just been edited (best: {1} at {2} -> {3} confidence)"
self.logger.info(msg.format(title, url, orig_conf, new_conf))
return True

msg = u"Found violation: [[{0}]] -> {1} ({2} confidence)"
msg = "Found violation: [[{0}]] -> {1} ({2} confidence)"
self.logger.info(msg.format(title, url, new_conf))
safeurl = quote(url.encode("utf8"), safe="/:").decode("utf8")
template = u"\{\{{0}|url={1}|confidence={2}\}\}\n"
template = "\{\{{0}|url={1}|confidence={2}\}\}\n"
template = template.format(self.template, safeurl, new_conf)
newtext = template + content
if "{url}" in self.summary:
@@ -206,9 +208,11 @@ class AfCCopyvios(Task):
query1 = "DELETE FROM cache WHERE cache_id = ?"
query2 = "INSERT INTO cache VALUES (?, DEFAULT, ?, ?)"
query3 = "INSERT INTO cache_data VALUES (DEFAULT, ?, ?, ?, ?)"
cache_id = buffer(sha256("1:1:" + page.get().encode("utf8")).digest())
data = [(cache_id, source.url, source.confidence, source.skipped)
for source in result.sources]
cache_id = sha256("1:1:" + page.get().encode("utf8")).digest()
data = [
(cache_id, source.url, source.confidence, source.skipped)
for source in result.sources
]
with self.conn.cursor() as cursor:
cursor.execute("START TRANSACTION")
cursor.execute(query1, (cache_id,))


+ 7
- 5
tasks/afc_dailycats.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -24,8 +22,10 @@ from datetime import datetime, timedelta

from earwigbot.tasks import Task


class AfCDailyCats(Task):
"""A task to create daily categories for [[WP:AFC]]."""

name = "afc_dailycats"
number = 3

@@ -33,7 +33,9 @@ class AfCDailyCats(Task):
cfg = self.config.tasks.get(self.name, {})
self.prefix = cfg.get("prefix", "Category:AfC submissions by date/")
self.content = cfg.get("content", "{{AfC submission category header}}")
default_summary = "Creating {0} category page for [[WP:AFC|Articles for creation]]."
default_summary = (
"Creating {0} category page for [[WP:AFC|Articles for creation]]."
)
self.summary = self.make_summary(cfg.get("summary", default_summary))

def run(self, **kwargs):
@@ -57,6 +59,6 @@ class AfCDailyCats(Task):
page = self.site.get_page(self.prefix + suffix)
if page.exists == page.PAGE_MISSING:
page.edit(self.content, self.summary.format(word))
self.logger.info(u"Creating [[{0}]]".format(page.title))
self.logger.info(f"Creating [[{page.title}]]")
else:
self.logger.debug(u"Skipping [[{0}]], exists".format(page.title))
self.logger.debug(f"Skipping [[{page.title}]], exists")

+ 2
- 2
tasks/afc_history.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -22,8 +20,10 @@

from earwigbot.tasks import Task


class AfCHistory(Task):
"""A task to generate information about AfC submissions over time."""

name = "afc_history"

def setup(self):


+ 106
- 62
tasks/afc_statistics.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -20,9 +18,9 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re
from collections import OrderedDict
from datetime import datetime
import re
from os.path import expanduser
from threading import Lock
from time import sleep
@@ -30,8 +28,7 @@ from time import sleep
import mwparserfromhell
import oursql

from earwigbot import exceptions
from earwigbot import wiki
from earwigbot import exceptions, wiki
from earwigbot.tasks import Task

_DEFAULT_PAGE_TEXT = """<noinclude><!-- You can edit anything on this page \
@@ -47,6 +44,7 @@ templates it uses, documented in [[Template:AfC statistics/doc]]. -->

_PER_CHART_LIMIT = 1000


class AfCStatistics(Task):
"""A task to generate statistics for WikiProject Articles for Creation.

@@ -55,6 +53,7 @@ class AfCStatistics(Task):
every four minutes and saved once an hour, on the hour, to subpages of
self.pageroot. In the live bot, this is "Template:AfC statistics".
"""

name = "afc_statistics"
number = 2

@@ -75,7 +74,9 @@ class AfCStatistics(Task):
self.pageroot = cfg.get("page", "Template:AfC statistics")
self.pending_cat = cfg.get("pending", "Pending AfC submissions")
self.ignore_list = cfg.get("ignoreList", [])
default_summary = "Updating statistics for [[WP:WPAFC|WikiProject Articles for creation]]."
default_summary = (
"Updating statistics for [[WP:WPAFC|WikiProject Articles for creation]]."
)
self.summary = self.make_summary(cfg.get("summary", default_summary))

# Templates used in chart generation:
@@ -143,24 +144,29 @@ class AfCStatistics(Task):

def _save_page(self, name, chart, summary):
"""Save a statistics chart to a single page."""
page = self.site.get_page(u"{}/{}".format(self.pageroot, name))
page = self.site.get_page(f"{self.pageroot}/{name}")
try:
text = page.get()
except exceptions.PageNotFoundError:
text = _DEFAULT_PAGE_TEXT % {"pageroot": self.pageroot}

newtext = re.sub(u"<!-- stat begin -->(.*?)<!-- stat end -->",
"<!-- stat begin -->" + chart + "<!-- stat end -->",
text, flags=re.DOTALL)
newtext = re.sub(
"<!-- stat begin -->(.*?)<!-- stat end -->",
"<!-- stat begin -->" + chart + "<!-- stat end -->",
text,
flags=re.DOTALL,
)
if newtext == text:
self.logger.info(u"Chart for {} unchanged; not saving".format(name))
self.logger.info(f"Chart for {name} unchanged; not saving")
return

newtext = re.sub("<!-- sig begin -->(.*?)<!-- sig end -->",
"<!-- sig begin -->~~~ at ~~~~~<!-- sig end -->",
newtext)
newtext = re.sub(
"<!-- sig begin -->(.*?)<!-- sig end -->",
"<!-- sig begin -->~~~ at ~~~~~<!-- sig end -->",
newtext,
)
page.edit(newtext, summary, minor=True, bot=True)
self.logger.info(u"Chart for {} saved to [[{}]]".format(name, page.title))
self.logger.info(f"Chart for {name} saved to [[{page.title}]]")

def _compile_charts(self):
"""Compile and return all statistics information from our local db."""
@@ -168,20 +174,20 @@ class AfCStatistics(Task):
with self.conn.cursor(oursql.DictCursor) as cursor:
cursor.execute("SELECT * FROM chart")
for chart in cursor:
name = chart['chart_name']
name = chart["chart_name"]
stats[name] = self._compile_chart(chart)
return stats

def _compile_chart(self, chart_info):
"""Compile and return a single statistics chart."""
chart = self.tl_header + "|" + chart_info['chart_title']
if chart_info['chart_special_title']:
chart += "|" + chart_info['chart_special_title']
chart = self.tl_header + "|" + chart_info["chart_title"]
if chart_info["chart_special_title"]:
chart += "|" + chart_info["chart_special_title"]
chart = "{{" + chart + "}}"

query = "SELECT * FROM page JOIN row ON page_id = row_id WHERE row_chart = ?"
with self.conn.cursor(oursql.DictCursor) as cursor:
cursor.execute(query, (chart_info['chart_id'],))
cursor.execute(query, (chart_info["chart_id"],))
rows = cursor.fetchall()
skipped = max(0, len(rows) - _PER_CHART_LIMIT)
rows = rows[:_PER_CHART_LIMIT]
@@ -190,7 +196,7 @@ class AfCStatistics(Task):

footer = "{{" + self.tl_footer
if skipped:
footer += "|skip={}".format(skipped)
footer += f"|skip={skipped}"
footer += "}}"
chart += "\n" + footer + "\n"
return chart
@@ -201,9 +207,11 @@ class AfCStatistics(Task):
'page' is a dict of page information, taken as a row from the page
table, where keys are column names and values are their cell contents.
"""
row = u"{0}|s={page_status}|t={page_title}|z={page_size}|"
row = "{0}|s={page_status}|t={page_title}|z={page_size}|"
if page["page_special_oldid"]:
row += "sr={page_special_user}|sd={page_special_time}|si={page_special_oldid}|"
row += (
"sr={page_special_user}|sd={page_special_time}|si={page_special_oldid}|"
)
row += "mr={page_modify_user}|md={page_modify_time}|mi={page_modify_oldid}"

page["page_special_time"] = self._fmt_time(page["page_special_time"])
@@ -236,7 +244,7 @@ class AfCStatistics(Task):
self.logger.info("Starting sync")

replag = self.site.get_replag()
self.logger.debug("Server replag is {0}".format(replag))
self.logger.debug(f"Server replag is {replag}")
if replag > 600 and not kwargs.get("ignore_replag"):
msg = "Sync canceled as replag ({0} secs) is greater than ten minutes"
self.logger.warn(msg.format(replag))
@@ -277,18 +285,18 @@ class AfCStatistics(Task):
if oldid == real_oldid:
continue

msg = u"Updating page [[{0}]] (id: {1}) @ {2}"
msg = "Updating page [[{0}]] (id: {1}) @ {2}"
self.logger.debug(msg.format(title, pageid, oldid))
msg = u" {0}: oldid: {1} -> {2}"
msg = " {0}: oldid: {1} -> {2}"
self.logger.debug(msg.format(pageid, oldid, real_oldid))
real_title = real_title.decode("utf8").replace("_", " ")
ns = self.site.namespace_id_to_name(real_ns)
if ns:
real_title = u":".join((ns, real_title))
real_title = ":".join((ns, real_title))
try:
self._update_page(cursor, pageid, real_title)
except Exception:
e = u"Error updating page [[{0}]] (id: {1})"
e = "Error updating page [[{0}]] (id: {1})"
self.logger.exception(e.format(real_title, pageid))

def _add_untracked(self, cursor):
@@ -317,15 +325,15 @@ class AfCStatistics(Task):
title = title.decode("utf8").replace("_", " ")
ns_name = self.site.namespace_id_to_name(ns)
if ns_name:
title = u":".join((ns_name, title))
title = ":".join((ns_name, title))
if title in self.ignore_list or ns == wiki.NS_CATEGORY:
continue
msg = u"Tracking page [[{0}]] (id: {1})".format(title, pageid)
msg = f"Tracking page [[{title}]] (id: {pageid})"
self.logger.debug(msg)
try:
self._track_page(cursor, pageid, title)
except Exception:
e = u"Error tracking page [[{0}]] (id: {1})"
e = "Error tracking page [[{0}]] (id: {1})"
self.logger.exception(e.format(title, pageid))

def _update_stale(self, cursor):
@@ -345,12 +353,12 @@ class AfCStatistics(Task):
cursor.execute(query)

for pageid, title, oldid in cursor:
msg = u"Updating page [[{0}]] (id: {1}) @ {2}"
msg = "Updating page [[{0}]] (id: {1}) @ {2}"
self.logger.debug(msg.format(title, pageid, oldid))
try:
self._update_page(cursor, pageid, title)
except Exception:
e = u"Error updating page [[{0}]] (id: {1})"
e = "Error updating page [[{0}]] (id: {1})"
self.logger.exception(e.format(title, pageid))

def _delete_old(self, cursor):
@@ -370,7 +378,7 @@ class AfCStatistics(Task):

def _untrack_page(self, cursor, pageid):
"""Remove a page, given by ID, from our database."""
self.logger.debug("Untracking page (id: {0})".format(pageid))
self.logger.debug(f"Untracking page (id: {pageid})")
query = """DELETE FROM page, row, updatelog USING page JOIN row
ON page_id = row_id JOIN updatelog ON page_id = update_id
WHERE page_id = ?"""
@@ -384,14 +392,14 @@ class AfCStatistics(Task):
"""
content = self._get_content(pageid)
if content is None:
msg = u"Could not get page content for [[{0}]]".format(title)
msg = f"Could not get page content for [[{title}]]"
self.logger.error(msg)
return

namespace = self.site.get_page(title).namespace
status, chart = self._get_status_and_chart(content, namespace)
if chart == self.CHART_NONE:
msg = u"Could not find a status for [[{0}]]".format(title)
msg = f"Could not find a status for [[{title}]]"
self.logger.warn(msg)
return

@@ -403,8 +411,22 @@ class AfCStatistics(Task):
query2 = "INSERT INTO page VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
query3 = "INSERT INTO updatelog VALUES (?, ?)"
cursor.execute(query1, (pageid, chart))
cursor.execute(query2, (pageid, status, title, len(content), notes,
m_user, m_time, m_id, s_user, s_time, s_id))
cursor.execute(
query2,
(
pageid,
status,
title,
len(content),
notes,
m_user,
m_time,
m_id,
s_user,
s_time,
s_id,
),
)
cursor.execute(query3, (pageid, datetime.utcnow()))

def _update_page(self, cursor, pageid, title):
@@ -416,7 +438,7 @@ class AfCStatistics(Task):
"""
content = self._get_content(pageid)
if content is None:
msg = u"Could not get page content for [[{0}]]".format(title)
msg = f"Could not get page content for [[{title}]]"
self.logger.error(msg)
return

@@ -437,12 +459,14 @@ class AfCStatistics(Task):
self._update_page_title(cursor, result, pageid, title)

if m_id != result["page_modify_oldid"]:
self._update_page_modify(cursor, result, pageid, len(content),
m_user, m_time, m_id)
self._update_page_modify(
cursor, result, pageid, len(content), m_user, m_time, m_id
)

if status != result["page_status"]:
special = self._update_page_status(cursor, result, pageid, content,
status, chart)
special = self._update_page_status(
cursor, result, pageid, content, status, chart
)
s_user = special[0]
else:
s_user = result["page_special_user"]
@@ -461,7 +485,7 @@ class AfCStatistics(Task):
query = "UPDATE page SET page_title = ? WHERE page_id = ?"
cursor.execute(query, (title, pageid))

msg = u" {0}: title: {1} -> {2}"
msg = " {0}: title: {1} -> {2}"
self.logger.debug(msg.format(pageid, result["page_title"], title))

def _update_page_modify(self, cursor, result, pageid, size, m_user, m_time, m_id):
@@ -471,10 +495,16 @@ class AfCStatistics(Task):
WHERE page_id = ?"""
cursor.execute(query, (size, m_user, m_time, m_id, pageid))

msg = u" {0}: modify: {1} / {2} / {3} -> {4} / {5} / {6}"
msg = msg.format(pageid, result["page_modify_user"],
result["page_modify_time"],
result["page_modify_oldid"], m_user, m_time, m_id)
msg = " {0}: modify: {1} / {2} / {3} -> {4} / {5} / {6}"
msg = msg.format(
pageid,
result["page_modify_user"],
result["page_modify_time"],
result["page_modify_oldid"],
m_user,
m_time,
m_id,
)
self.logger.debug(msg)

def _update_page_status(self, cursor, result, pageid, content, status, chart):
@@ -487,16 +517,25 @@ class AfCStatistics(Task):
cursor.execute(query1, (status, chart, pageid))

msg = " {0}: status: {1} ({2}) -> {3} ({4})"
self.logger.debug(msg.format(pageid, result["page_status"],
result["row_chart"], status, chart))
self.logger.debug(
msg.format(
pageid, result["page_status"], result["row_chart"], status, chart
)
)

s_user, s_time, s_id = self._get_special(pageid, content, chart)
if s_id != result["page_special_oldid"]:
cursor.execute(query2, (s_user, s_time, s_id, pageid))
msg = u" {0}: special: {1} / {2} / {3} -> {4} / {5} / {6}"
msg = msg.format(pageid, result["page_special_user"],
result["page_special_time"],
result["page_special_oldid"], s_user, s_time, s_id)
msg = " {0}: special: {1} / {2} / {3} -> {4} / {5} / {6}"
msg = msg.format(
pageid,
result["page_special_user"],
result["page_special_time"],
result["page_special_oldid"],
s_user,
s_time,
s_id,
)
self.logger.debug(msg)

return s_user, s_time, s_id
@@ -529,9 +568,13 @@ class AfCStatistics(Task):
"""Get the content of a revision by ID from the API."""
if revid in self.revision_cache:
return self.revision_cache[revid]
res = self.site.api_query(action="query", prop="revisions",
rvprop="content", rvslots="main",
revids=revid)
res = self.site.api_query(
action="query",
prop="revisions",
rvprop="content",
rvslots="main",
revids=revid,
)
try:
revision = res["query"]["pages"].values()[0]["revisions"][0]
content = revision["slots"]["main"]["*"]
@@ -577,7 +620,7 @@ class AfCStatistics(Task):
"afc submission/reviewing": "R",
"afc submission/pending": "P",
"afc submission/draft": "T",
"afc submission/declined": "D"
"afc submission/declined": "D",
}
statuses = []
code = mwparserfromhell.parse(content)
@@ -629,7 +672,7 @@ class AfCStatistics(Task):
self.CHART_ACCEPT: self.get_accepted,
self.CHART_REVIEW: self.get_reviewing,
self.CHART_PEND: self.get_pending,
self.CHART_DECLINE: self.get_decline
self.CHART_DECLINE: self.get_decline,
}
return charts[chart](pageid, content)

@@ -675,7 +718,8 @@ class AfCStatistics(Task):
params = ("decliner", "declinets")
res = self._get_status_helper(pageid, content, ("D"), params)
return res or self._search_history(
pageid, self.CHART_DECLINE, ["D"], ["R", "P", "T"])
pageid, self.CHART_DECLINE, ["D"], ["R", "P", "T"]
)

def _get_status_helper(self, pageid, content, statuses, params):
"""Helper function for get_pending() and get_decline()."""
@@ -686,7 +730,7 @@ class AfCStatistics(Task):
if tmpl.name.strip().lower() == "afc submission":
if all([tmpl.has(par, ignore_empty=True) for par in params]):
if status in statuses:
data = [unicode(tmpl.get(par).value) for par in params]
data = [str(tmpl.get(par).value) for par in params]
submits.append(data)
if not submits:
return None
@@ -774,7 +818,7 @@ class AfCStatistics(Task):
if re.search(regex, content):
notes += "|nc=1" # Submission is a suspected copyvio

if not re.search(r"\<ref\s*(.*?)\>(.*?)\</ref\>", content, re.I|re.S):
if not re.search(r"\<ref\s*(.*?)\>(.*?)\</ref\>", content, re.I | re.S):
regex = r"(https?:)|\[//(?!{0})([^ \]\t\n\r\f\v]+?)"
sitedomain = re.escape(self.site.domain)
if re.search(regex.format(sitedomain), content, re.I | re.S):


+ 64
- 30
tasks/afc_undated.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -25,28 +23,49 @@ from datetime import datetime
import mwparserfromhell

from earwigbot.tasks import Task
from earwigbot.wiki.constants import *
from earwigbot.wiki.constants import (
NS_CATEGORY,
NS_CATEGORY_TALK,
NS_FILE,
NS_FILE_TALK,
NS_HELP_TALK,
NS_PROJECT,
NS_PROJECT_TALK,
NS_TALK,
NS_TEMPLATE,
NS_TEMPLATE_TALK,
NS_USER,
)

NS_DRAFT = 118


class AfCUndated(Task):
"""A task to clear [[Category:Undated AfC submissions]]."""

name = "afc_undated"
number = 5

def setup(self):
cfg = self.config.tasks.get(self.name, {})
self.category = cfg.get("category", "Undated AfC submissions")
default_summary = "Adding timestamp to undated [[WP:AFC|Articles for creation]] submission."
default_summary = (
"Adding timestamp to undated [[WP:AFC|Articles for creation]] submission."
)
self.summary = self.make_summary(cfg.get("summary", default_summary))
self.namespaces = {
"submission": [NS_USER, NS_PROJECT, NS_PROJECT_TALK, NS_DRAFT],
"talk": [NS_TALK, NS_FILE_TALK, NS_TEMPLATE_TALK, NS_HELP_TALK,
NS_CATEGORY_TALK]
"talk": [
NS_TALK,
NS_FILE_TALK,
NS_TEMPLATE_TALK,
NS_HELP_TALK,
NS_CATEGORY_TALK,
],
}
self.aliases = {
"submission": ["AfC submission"],
"talk": ["WikiProject Articles for creation"]
"talk": ["WikiProject Articles for creation"],
}

def run(self, **kwargs):
@@ -59,7 +78,7 @@ class AfCUndated(Task):

self.site = self.bot.wiki.get_site()
category = self.site.get_category(self.category)
logmsg = u"Undated category [[{0}]] has {1} members"
logmsg = "Undated category [[{0}]] has {1} members"
self.logger.info(logmsg.format(category.title, category.size))
if category.size:
self._build_aliases()
@@ -77,8 +96,12 @@ class AfCUndated(Task):
base = self.aliases[key][0]
aliases = [base, "Template:" + base]
result = self.site.api_query(
action="query", list="backlinks", bllimit=50,
blfilterredir="redirects", bltitle=aliases[1])
action="query",
list="backlinks",
bllimit=50,
blfilterredir="redirects",
bltitle=aliases[1],
)
for data in result["query"]["backlinks"]:
redir = self.site.get_page(data["title"])
aliases.append(redir.title)
@@ -89,7 +112,7 @@ class AfCUndated(Task):
def _process_page(self, page):
"""Date the necessary templates inside a page object."""
if not page.check_exclusion():
msg = u"Skipping [[{0}]]; bot excluded from editing"
msg = "Skipping [[{0}]]; bot excluded from editing"
self.logger.info(msg.format(page.title))
return

@@ -102,7 +125,7 @@ class AfCUndated(Task):
aliases = self.aliases["talk"]
timestamp, reviewer = self._get_talkdata(page)
else:
msg = u"[[{0}]] is undated, but in a namespace I don't know how to process"
msg = "[[{0}]] is undated, but in a namespace I don't know how to process"
self.logger.warn(msg.format(page.title))
return
if not timestamp:
@@ -120,22 +143,27 @@ class AfCUndated(Task):
changes += 1

if changes:
msg = u"Dating [[{0}]]: {1}x {2}"
msg = "Dating [[{0}]]: {1}x {2}"
self.logger.info(msg.format(page.title, changes, aliases[0]))
page.edit(unicode(code), self.summary)
page.edit(str(code), self.summary)
else:
msg = u"[[{0}]] is undated, but I can't figure out what to replace"
msg = "[[{0}]] is undated, but I can't figure out what to replace"
self.logger.warn(msg.format(page.title))

def _get_timestamp(self, page):
"""Get the timestamp associated with a particular submission."""
self.logger.debug(u"[[{0}]]: Getting timestamp".format(page.title))
self.logger.debug(f"[[{page.title}]]: Getting timestamp")
result = self.site.api_query(
action="query", prop="revisions", rvprop="timestamp", rvlimit=1,
rvdir="newer", titles=page.title)
action="query",
prop="revisions",
rvprop="timestamp",
rvlimit=1,
rvdir="newer",
titles=page.title,
)
data = result["query"]["pages"].values()[0]
if "revisions" not in data:
log = u"Couldn't get timestamp for [[{0}]]"
log = "Couldn't get timestamp for [[{0}]]"
self.logger.warn(log.format(page.title))
return None
raw = data["revisions"][0]["timestamp"]
@@ -150,32 +178,33 @@ class AfCUndated(Task):
"""
subject = page.toggle_talk()
if subject.exists == subject.PAGE_MISSING:
log = u"Couldn't process [[{0}]]: subject page doesn't exist"
log = "Couldn't process [[{0}]]: subject page doesn't exist"
self.logger.warn(log.format(page.title))
return None, None
if subject.namespace == NS_FILE:
self.logger.debug(u"[[{0}]]: Getting filedata".format(page.title))
self.logger.debug(f"[[{page.title}]]: Getting filedata")
return self._get_filedata(subject)

self.logger.debug(u"[[{0}]]: Getting talkdata".format(page.title))
self.logger.debug(f"[[{page.title}]]: Getting talkdata")
user, ts, revid = self.statistics.get_accepted(subject.pageid)
if not ts:
if subject.is_redirect or subject.namespace == NS_CATEGORY:
log = u"[[{0}]]: Couldn't get talkdata; trying redir/cat data"
log = "[[{0}]]: Couldn't get talkdata; trying redir/cat data"
self.logger.debug(log.format(page.title))
return self._get_redirdata(subject)
log = u"Couldn't get talkdata for [[{0}]]"
log = "Couldn't get talkdata for [[{0}]]"
self.logger.warn(log.format(page.title))
return None, None
return ts.strftime("%Y%m%d%H%M%S"), user

def _get_filedata(self, page):
"""Get the timestamp and reviewer associated with a file talkpage."""
result = self.site.api_query(action="query", prop="imageinfo",
titles=page.title)
result = self.site.api_query(
action="query", prop="imageinfo", titles=page.title
)
data = result["query"]["pages"].values()[0]
if "imageinfo" not in data:
log = u"Couldn't get filedata for [[{0}]]"
log = "Couldn't get filedata for [[{0}]]"
self.logger.warn(log.format(page.title))
return None, None
info = data["imageinfo"][0]
@@ -185,10 +214,15 @@ class AfCUndated(Task):
def _get_redirdata(self, page):
"""Get the timestamp and reviewer for a redirect/category talkpage."""
result = self.site.api_query(
action="query", prop="revisions", rvprop="timestamp|user",
rvlimit=1, rvdir="newer", titles=page.title)
action="query",
prop="revisions",
rvprop="timestamp|user",
rvlimit=1,
rvdir="newer",
titles=page.title,
)
if "batchcomplete" not in result:
log = u"Couldn't get redir/cat talkdata for [[{0}]]: has multiple revisions"
log = "Couldn't get redir/cat talkdata for [[{0}]]: has multiple revisions"
self.logger.warn(log.format(page.title))
return None, None
rev = result["query"]["pages"].values()[0]["revisions"][0]


+ 21
- 11
tasks/banner_untag.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2017 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -24,8 +22,10 @@ import time

from earwigbot.tasks import Task


class BannerUntag(Task):
"""A task to undo mistaken tagging edits made by wikiproject_tagger."""

name = "banner_untag"
number = 14

@@ -42,8 +42,9 @@ class BannerUntag(Task):
done = [int(line) for line in donefp.read().splitlines()]

with open(rev_file) as fp:
data = [[int(x) for x in line.split("\t")]
for line in fp.read().splitlines()]
data = [
[int(x) for x in line.split("\t")] for line in fp.read().splitlines()
]
data = [item for item in data if item[0] not in done]

with open(error_file, "a") as errfp:
@@ -53,7 +54,7 @@ class BannerUntag(Task):
def _process_data(self, data, errfile, donefile):
chunksize = 50
for chunkidx in range((len(data) + chunksize - 1) / chunksize):
chunk = data[chunkidx*chunksize:(chunkidx+1)*chunksize]
chunk = data[chunkidx * chunksize : (chunkidx + 1) * chunksize]
if self.shutoff_enabled():
return
self._process_chunk(chunk, errfile, donefile)
@@ -61,8 +62,12 @@ class BannerUntag(Task):
def _process_chunk(self, chunk, errfile, donefile):
pageids_to_revids = dict(chunk)
res = self.site.api_query(
action="query", prop="revisions", rvprop="ids",
pageids="|".join(str(item[0]) for item in chunk), formatversion=2)
action="query",
prop="revisions",
rvprop="ids",
pageids="|".join(str(item[0]) for item in chunk),
formatversion=2,
)

stage2 = []
for pagedata in res["query"]["pages"]:
@@ -78,7 +83,7 @@ class BannerUntag(Task):
if pageids_to_revids[pageid] == revid:
stage2.append(str(parentid))
else:
self.logger.info(u"Skipping [[%s]], not latest edit" % title)
self.logger.info("Skipping [[%s]], not latest edit" % title)
donefile.write("%d\n" % pageid)
errfile.write("%s\n" % title.encode("utf8"))

@@ -86,8 +91,13 @@ class BannerUntag(Task):
return

res2 = self.site.api_query(
action="query", prop="revisions", rvprop="content", rvslots="main",
revids="|".join(stage2), formatversion=2)
action="query",
prop="revisions",
rvprop="content",
rvslots="main",
revids="|".join(stage2),
formatversion=2,
)

for pagedata in res2["query"]["pages"]:
revision = pagedata["revisions"][0]["slots"]["main"]
@@ -97,7 +107,7 @@ class BannerUntag(Task):
title = pagedata["title"]
content = revision["content"]

self.logger.debug(u"Reverting one edit on [[%s]]" % title)
self.logger.debug("Reverting one edit on [[%s]]" % title)
page = self.site.get_page(title)
page.edit(content, self.make_summary(self.summary), minor=True)



+ 2
- 2
tasks/blp_tag.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -22,9 +20,11 @@

from earwigbot.tasks import Task


class BLPTag(Task):
"""A task to add |blp=yes to ``{{WPB}}`` or ``{{WPBS}}`` when it is used
along with ``{{WP Biography}}``."""

name = "blp_tag"
number = 12



+ 149
- 84
tasks/drn_clerkbot.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -20,21 +18,23 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re
from datetime import datetime
from os.path import expanduser
import re
from threading import RLock
from time import mktime, sleep, time

from mwparserfromhell import parse as mw_parse
import oursql
from mwparserfromhell import parse as mw_parse

from earwigbot import exceptions
from earwigbot.tasks import Task
from earwigbot.wiki import constants


class DRNClerkBot(Task):
"""A task to clerk for [[WP:DRN]]."""

name = "drn_clerkbot"
number = 19

@@ -63,19 +63,25 @@ class DRNClerkBot(Task):
cfg = self.config.tasks.get(self.name, {})

# Set some wiki-related attributes:
self.title = cfg.get("title",
"Wikipedia:Dispute resolution noticeboard")
self.title = cfg.get("title", "Wikipedia:Dispute resolution noticeboard")
self.chart_title = cfg.get("chartTitle", "Template:DRN case status")
self.volunteer_title = cfg.get("volunteers",
"Wikipedia:Dispute resolution noticeboard/Volunteering")
self.volunteer_title = cfg.get(
"volunteers", "Wikipedia:Dispute resolution noticeboard/Volunteering"
)
self.very_old_title = cfg.get("veryOldTitle", "User talk:Szhang (WMF)")
self.notify_stale_cases = cfg.get("notifyStaleCases", False)

clerk_summary = "Updating $3 case$4."
notify_summary = "Notifying user regarding [[WP:DRN|dispute resolution noticeboard]] case."
chart_summary = "Updating statistics for the [[WP:DRN|dispute resolution noticeboard]]."
notify_summary = (
"Notifying user regarding [[WP:DRN|dispute resolution noticeboard]] case."
)
chart_summary = (
"Updating statistics for the [[WP:DRN|dispute resolution noticeboard]]."
)
self.clerk_summary = self.make_summary(cfg.get("clerkSummary", clerk_summary))
self.notify_summary = self.make_summary(cfg.get("notifySummary", notify_summary))
self.notify_summary = self.make_summary(
cfg.get("notifySummary", notify_summary)
)
self.chart_summary = self.make_summary(cfg.get("chartSummary", chart_summary))

# Templates used:
@@ -84,13 +90,10 @@ class DRNClerkBot(Task):
self.tl_notify_party = templates.get("notifyParty", "DRN-notice")
self.tl_notify_stale = templates.get("notifyStale", "DRN stale notice")
self.tl_archive_top = templates.get("archiveTop", "DRN archive top")
self.tl_archive_bottom = templates.get("archiveBottom",
"DRN archive bottom")
self.tl_chart_header = templates.get("chartHeader",
"DRN case status/header")
self.tl_archive_bottom = templates.get("archiveBottom", "DRN archive bottom")
self.tl_chart_header = templates.get("chartHeader", "DRN case status/header")
self.tl_chart_row = templates.get("chartRow", "DRN case status/row")
self.tl_chart_footer = templates.get("chartFooter",
"DRN case status/footer")
self.tl_chart_footer = templates.get("chartFooter", "DRN case status/footer")

# Connection data for our SQL database:
kwargs = cfg.get("sql", {})
@@ -114,7 +117,7 @@ class DRNClerkBot(Task):
if action in ["all", "update_volunteers"]:
self.update_volunteers(conn, site)
if action in ["all", "clerk"]:
log = u"Starting update to [[{0}]]".format(self.title)
log = f"Starting update to [[{self.title}]]"
self.logger.info(log)
cases = self.read_database(conn)
page = site.get_page(self.title)
@@ -137,7 +140,7 @@ class DRNClerkBot(Task):

def update_volunteers(self, conn, site):
"""Updates and stores the list of dispute resolution volunteers."""
log = u"Updating volunteer list from [[{0}]]"
log = "Updating volunteer list from [[{0}]]"
self.logger.info(log.format(self.volunteer_title))
page = site.get_page(self.volunteer_title)
try:
@@ -146,7 +149,7 @@ class DRNClerkBot(Task):
text = ""
marker = "<!-- please don't remove this comment (used by EarwigBot) -->"
if marker not in text:
log = u"The marker ({0}) wasn't found in the volunteer list at [[{1}]]!"
log = "The marker ({0}) wasn't found in the volunteer list at [[{1}]]!"
self.logger.error(log.format(marker, page.title))
return
text = text.split(marker)[1]
@@ -190,8 +193,8 @@ class DRNClerkBot(Task):
"""Read the noticeboard content and update the list of _Cases."""
nextid = self.select_next_id(conn)
tl_status_esc = re.escape(self.tl_status)
split = re.split("(^==\s*[^=]+?\s*==$)", text, flags=re.M|re.U)
for i in xrange(len(split)):
split = re.split("(^==\s*[^=]+?\s*==$)", text, flags=re.M | re.U)
for i in range(len(split)):
if i + 1 == len(split):
break
if not split[i].startswith("=="):
@@ -209,8 +212,10 @@ class DRNClerkBot(Task):
id_ = nextid
nextid += 1
re_id2 = "(\{\{" + tl_status_esc
re_id2 += r"(.*?)\}\})(<!-- Bot Case ID \(please don't modify\): .*? -->)?"
repl = ur"\1 <!-- Bot Case ID (please don't modify): {0} -->"
re_id2 += (
r"(.*?)\}\})(<!-- Bot Case ID \(please don't modify\): .*? -->)?"
)
repl = r"\1 <!-- Bot Case ID (please don't modify): {0} -->"
body = re.sub(re_id2, repl.format(id_), body)
re_f = r"\{\{drn filing editor\|(.*?)\|"
re_f += r"(\d{2}:\d{2},\s\d{1,2}\s\w+\s\d{4}\s\(UTC\))\}\}"
@@ -222,16 +227,30 @@ class DRNClerkBot(Task):
f_time = datetime.strptime(match.group(2), strp)
else:
f_user, f_time = None, datetime.utcnow()
case = _Case(id_, title, status, self.STATUS_UNKNOWN, f_user,
f_time, f_user, f_time, "", self.min_ts,
self.min_ts, False, False, False, len(body),
new=True)
case = _Case(
id_,
title,
status,
self.STATUS_UNKNOWN,
f_user,
f_time,
f_user,
f_time,
"",
self.min_ts,
self.min_ts,
False,
False,
False,
len(body),
new=True,
)
cases.append(case)
log = u"Added new case {0} ('{1}', status={2}, by {3})"
log = "Added new case {0} ('{1}', status={2}, by {3})"
self.logger.debug(log.format(id_, title, status, f_user))
else:
case.status = status
log = u"Read active case {0} ('{1}')".format(id_, title)
log = f"Read active case {id_} ('{title}')"
self.logger.debug(log)
if case.title != title:
self.update_case_title(conn, id_, title)
@@ -244,7 +263,7 @@ class DRNClerkBot(Task):
cases.remove(case) # Ignore archived case
else:
case.status = self.STATUS_UNKNOWN
log = u"Dropped case {0} because it is no longer on the page ('{1}')"
log = "Dropped case {0} because it is no longer on the page ('{1}')"
self.logger.debug(log.format(case.id, case.title))

self.logger.debug("Done reading cases from the noticeboard page")
@@ -262,7 +281,7 @@ class DRNClerkBot(Task):
def read_status(self, body):
"""Parse the current status from a case body."""
templ = re.escape(self.tl_status)
status = re.search("\{\{" + templ + "\|?(.*?)\}\}", body, re.S|re.U)
status = re.search("\{\{" + templ + "\|?(.*?)\}\}", body, re.S | re.U)
if not status:
return self.STATUS_NEW
for option, names in self.ALIASES.iteritems():
@@ -275,7 +294,7 @@ class DRNClerkBot(Task):
query = "UPDATE cases SET case_title = ? WHERE case_id = ?"
with conn.cursor() as cursor:
cursor.execute(query, (title, id_))
log = u"Updated title of case {0} to '{1}'".format(id_, title)
log = f"Updated title of case {id_} to '{title}'"
self.logger.debug(log)

def clerk(self, conn, cases):
@@ -286,7 +305,7 @@ class DRNClerkBot(Task):
volunteers = [name for (name,) in cursor.fetchall()]
notices = []
for case in cases:
log = u"Clerking case {0} ('{1}')".format(case.id, case.title)
log = f"Clerking case {case.id} ('{case.title}')"
self.logger.debug(log)
if case.status == self.STATUS_UNKNOWN:
self.save_existing_case(conn, case)
@@ -312,8 +331,11 @@ class DRNClerkBot(Task):
notices = self.clerk_needassist_case(case, volunteers, newsigs)
elif case.status == self.STATUS_STALE:
notices = self.clerk_stale_case(case, newsigs)
if case.status in [self.STATUS_RESOLVED, self.STATUS_CLOSED,
self.STATUS_FAILED]:
if case.status in [
self.STATUS_RESOLVED,
self.STATUS_CLOSED,
self.STATUS_FAILED,
]:
self.clerk_closed_case(case, signatures)
else:
self.add_missing_reflist(case)
@@ -374,10 +396,10 @@ class DRNClerkBot(Task):
tmpl = self.tl_notify_stale
title = case.title.replace("|", "&#124;")
template = "{{subst:" + tmpl + "|" + title + "}}"
miss = "<!-- Template:DRN stale notice | {0} -->".format(title)
miss = f"<!-- Template:DRN stale notice | {title} -->"
notice = _Notice(self.very_old_title, template, miss)
case.very_old_notified = True
msg = u" {0}: will notify [[{1}]] with '{2}'"
msg = " {0}: will notify [[{1}]] with '{2}'"
log = msg.format(case.id, self.very_old_title, template)
self.logger.debug(log)
return [notice]
@@ -428,7 +450,7 @@ class DRNClerkBot(Task):
if not re.search(arch_bottom + r"\s*\}\}\s*\Z", case.body):
case.body += "\n{{" + arch_bottom + "}}"
case.archived = True
self.logger.debug(u" {0}: archived case".format(case.id))
self.logger.debug(f" {case.id}: archived case")

def check_for_needassist(self, case):
"""Check whether a case is old enough to be set to "needassist"."""
@@ -446,10 +468,10 @@ class DRNClerkBot(Task):
new_n = "NEW" if not new_n else new_n
if case.last_action != new:
case.status = new
log = u" {0}: {1} -> {2}"
log = " {0}: {1} -> {2}"
self.logger.debug(log.format(case.id, old_n, new_n))
return
log = u"Avoiding {0} {1} -> {2} because we already did this ('{3}')"
log = "Avoiding {0} {1} -> {2} because we already did this ('{3}')"
self.logger.info(log.format(case.id, old_n, new_n, case.title))

def read_signatures(self, text):
@@ -461,7 +483,7 @@ class DRNClerkBot(Task):
regex += r"([^\n\[\]|]{,256}?)(?:\||\]\])"
regex += r"(?!.*?(?:User(?:\stalk)?\:|Special\:Contributions\/).*?)"
regex += r".{,256}?(\d{2}:\d{2},\s\d{1,2}\s\w+\s\d{4}\s\(UTC\))"
matches = re.findall(regex, text, re.U|re.I)
matches = re.findall(regex, text, re.U | re.I)
signatures = []
for userlink, stamp in matches:
username = userlink.split("/", 1)[0].replace("_", " ").strip()
@@ -494,13 +516,13 @@ class DRNClerkBot(Task):
too_late = "<!--Template:DRN-notice-->"

re_parties = "<span.*?>'''Users involved'''</span>(.*?)<span.*?>"
text = re.search(re_parties, case.body, re.S|re.U)
text = re.search(re_parties, case.body, re.S | re.U)
for line in text.group(1).splitlines():
user = re.search("[:*#]{,5} \{\{User\|(.*?)\}\}", line)
if user:
party = user.group(1).replace("_", " ").strip()
if party.startswith("User:"):
party = party[len("User:"):]
party = party[len("User:") :]
if party:
party = party[0].upper() + party[1:]
if party == case.file_user:
@@ -509,7 +531,7 @@ class DRNClerkBot(Task):
notices.append(notice)

case.parties_notified = True
log = u" {0}: will try to notify {1} parties with '{2}'"
log = " {0}: will try to notify {1} parties with '{2}'"
self.logger.debug(log.format(case.id, len(notices), template))
return notices

@@ -562,22 +584,35 @@ class DRNClerkBot(Task):
for name, stamp in additions:
args.append((case.id, name, stamp))
cursor.executemany(query2, args)
msg = u" {0}: added {1} signatures and removed {2}"
msg = " {0}: added {1} signatures and removed {2}"
log = msg.format(case.id, len(additions), len(removals))
self.logger.debug(log)

def save_new_case(self, conn, case):
"""Save a brand new case to the database."""
args = (case.id, case.title, case.status, case.last_action,
case.file_user, case.file_time, case.modify_user,
case.modify_time, case.volunteer_user, case.volunteer_time,
case.close_time, case.parties_notified,
case.very_old_notified, case.archived,
case.last_volunteer_size)
args = (
case.id,
case.title,
case.status,
case.last_action,
case.file_user,
case.file_time,
case.modify_user,
case.modify_time,
case.volunteer_user,
case.volunteer_time,
case.close_time,
case.parties_notified,
case.very_old_notified,
case.archived,
case.last_volunteer_size,
)
with conn.cursor() as cursor:
query = "INSERT INTO cases VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
query = (
"INSERT INTO cases VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
)
cursor.execute(query, args)
log = u" {0}: inserted new case into database".format(case.id)
log = f" {case.id}: inserted new case into database"
self.logger.debug(log)

def save_existing_case(self, conn, case):
@@ -602,22 +637,22 @@ class DRNClerkBot(Task):
("case_parties_notified", case.parties_notified),
("case_very_old_notified", case.very_old_notified),
("case_archived", case.archived),
("case_last_volunteer_size", case.last_volunteer_size)
("case_last_volunteer_size", case.last_volunteer_size),
]
for column, data in fields_to_check:
if data != stored[column]:
changes.append(column + " = ?")
args.append(data)
msg = u" {0}: will alter {1} ('{2}' -> '{3}')"
msg = " {0}: will alter {1} ('{2}' -> '{3}')"
log = msg.format(case.id, column, stored[column], data)
self.logger.debug(log)
if changes:
changes = ", ".join(changes)
args.append(case.id)
query = "UPDATE cases SET {0} WHERE case_id = ?".format(changes)
query = f"UPDATE cases SET {changes} WHERE case_id = ?"
cursor.execute(query, args)
else:
log = u" {0}: no changes to commit".format(case.id)
log = f" {case.id}: no changes to commit"
self.logger.debug(log)

def save(self, page, cases, kwargs, start):
@@ -629,7 +664,7 @@ class DRNClerkBot(Task):
newtext = newtext.replace(case.old, case.body)
counter += 1
if newtext == text:
self.logger.info(u"Nothing to edit on [[{0}]]".format(page.title))
self.logger.info(f"Nothing to edit on [[{page.title}]]")
return True

worktime = time() - start
@@ -646,7 +681,7 @@ class DRNClerkBot(Task):
summary = self.clerk_summary.replace("$3", str(counter))
summary = summary.replace("$4", "" if counter == 1 else "s")
page.edit(newtext, summary, minor=True, bot=True)
log = u"Saved page [[{0}]] ({1} updates)"
log = "Saved page [[{0}]] ({1} updates)"
self.logger.info(log.format(page.title, counter))
return True

@@ -657,13 +692,13 @@ class DRNClerkBot(Task):
return
for notice in notices:
target, template = notice.target, notice.template
log = u"Trying to notify [[{0}]] with '{1}'"
log = "Trying to notify [[{0}]] with '{1}'"
self.logger.debug(log.format(target, template))
page = site.get_page(target, follow_redirects=True)
if page.namespace == constants.NS_USER_TALK:
user = site.get_user(target.split(":", 1)[1])
if not user.exists and not user.is_ip:
log = u"Skipping [[{0}]]; user does not exist and is not an IP"
log = "Skipping [[{0}]]; user does not exist and is not an IP"
self.logger.info(log.format(target))
continue
try:
@@ -671,7 +706,7 @@ class DRNClerkBot(Task):
except exceptions.PageNotFoundError:
text = ""
if notice.too_late and notice.too_late in text:
log = u"Skipping [[{0}]]; was already notified with '{1}'"
log = "Skipping [[{0}]]; was already notified with '{1}'"
self.logger.info(log.format(page.title, template))
continue
text += ("\n" if text else "") + template
@@ -679,10 +714,10 @@ class DRNClerkBot(Task):
page.edit(text, self.notify_summary, minor=False, bot=True)
except exceptions.EditError as error:
name, msg = type(error).name, error.message
log = u"Couldn't leave notice on [[{0}]] because of {1}: {2}"
log = "Couldn't leave notice on [[{0}]] because of {1}: {2}"
self.logger.error(log.format(page.title, name, msg))
else:
log = u"Notified [[{0}]] with '{1}'"
log = "Notified [[{0}]] with '{1}'"
self.logger.info(log.format(page.title, template))

self.logger.debug("Done sending notices")
@@ -690,25 +725,34 @@ class DRNClerkBot(Task):
def update_chart(self, conn, site):
"""Update the chart of open or recently closed cases."""
page = site.get_page(self.chart_title)
self.logger.info(u"Updating case status at [[{0}]]".format(page.title))
self.logger.info(f"Updating case status at [[{page.title}]]")
statuses = self.compile_chart(conn)
text = page.get()
newtext = re.sub(u"<!-- status begin -->(.*?)<!-- status end -->",
"<!-- status begin -->\n" + statuses + "\n<!-- status end -->",
text, flags=re.DOTALL)
newtext = re.sub(
"<!-- status begin -->(.*?)<!-- status end -->",
"<!-- status begin -->\n" + statuses + "\n<!-- status end -->",
text,
flags=re.DOTALL,
)
if newtext == text:
self.logger.info("Chart unchanged; not saving")
return

newtext = re.sub("<!-- sig begin -->(.*?)<!-- sig end -->",
"<!-- sig begin -->~~~ at ~~~~~<!-- sig end -->",
newtext)
newtext = re.sub(
"<!-- sig begin -->(.*?)<!-- sig end -->",
"<!-- sig begin -->~~~ at ~~~~~<!-- sig end -->",
newtext,
)
page.edit(newtext, self.chart_summary, minor=True, bot=True)
self.logger.info(u"Chart saved to [[{0}]]".format(page.title))
self.logger.info(f"Chart saved to [[{page.title}]]")

def compile_chart(self, conn):
"""Actually generate the chart from the database."""
chart = "{{" + self.tl_chart_header + "|small={{{small|}}}|collapsed={{{collapsed|}}}}}\n"
chart = (
"{{"
+ self.tl_chart_header
+ "|small={{{small|}}}|collapsed={{{collapsed|}}}}}\n"
)
query = "SELECT * FROM cases WHERE case_status != ?"
with conn.cursor(oursql.DictCursor) as cursor:
cursor.execute(query, (self.STATUS_UNKNOWN,))
@@ -719,12 +763,16 @@ class DRNClerkBot(Task):

def compile_row(self, case):
"""Generate a single row of the chart from a dict via the database."""
data = u"|t={case_title}|d={title}|s={case_status}"
data = "|t={case_title}|d={title}|s={case_status}"
data += "|cu={case_file_user}|cs={file_sortkey}|ct={file_time}"
if case["case_volunteer_user"]:
data += "|vu={case_volunteer_user}|vs={volunteer_sortkey}|vt={volunteer_time}"
data += (
"|vu={case_volunteer_user}|vs={volunteer_sortkey}|vt={volunteer_time}"
)
case["volunteer_time"] = self.format_time(case["case_volunteer_time"])
case["volunteer_sortkey"] = int(mktime(case["case_volunteer_time"].timetuple()))
case["volunteer_sortkey"] = int(
mktime(case["case_volunteer_time"].timetuple())
)
data += "|mu={case_modify_user}|ms={modify_sortkey}|mt={modify_time}"

case["case_title"] = mw_parse(case["case_title"]).strip_code()
@@ -748,7 +796,7 @@ class DRNClerkBot(Task):
num = seconds // size
seconds -= num * size
if num:
chunk = "{0} {1}".format(num, name if num == 1 else name + "s")
chunk = "{} {}".format(num, name if num == 1 else name + "s")
msg.append(chunk)
return ", ".join(msg) + " ago" if msg else "0 hours ago"

@@ -766,12 +814,28 @@ class DRNClerkBot(Task):
cursor.execute(query, (self.STATUS_UNKNOWN,))


class _Case(object):
class _Case:
"""A object representing a dispute resolution case."""
def __init__(self, id_, title, status, last_action, file_user, file_time,
modify_user, modify_time, volunteer_user, volunteer_time,
close_time, parties_notified, archived, very_old_notified,
last_volunteer_size, new=False):

def __init__(
self,
id_,
title,
status,
last_action,
file_user,
file_time,
modify_user,
modify_time,
volunteer_user,
volunteer_time,
close_time,
parties_notified,
archived,
very_old_notified,
last_volunteer_size,
new=False,
):
self.id = id_
self.title = title
self.status = status
@@ -794,8 +858,9 @@ class _Case(object):
self.old = None


class _Notice(object):
class _Notice:
"""An object representing a notice to be sent to a user or a page."""

def __init__(self, target, template, too_late=None):
self.target = target
self.template = template


+ 31
- 23
tasks/infobox_station.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2015 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -20,19 +18,20 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import unicode_literals
from time import sleep

import mwparserfromhell

from earwigbot.tasks import Task
from earwigbot.wiki import constants

import mwparserfromhell

class InfoboxStation(Task):
"""
A task to replace ``{{Infobox China station}}`` and
``{{Infobox Japan station}}`` with ``{{Infobox station}}``.
"""

name = "infobox_station"
number = 20

@@ -43,19 +42,20 @@ class InfoboxStation(Task):
["Infobox China station", "Infobox china station"],
"Infobox China station/sandbox",
"Infobox China station/sandbox/cats",
"Wikipedia:Templates for discussion/Log/2015 February 8#Template:Infobox China station"
"Wikipedia:Templates for discussion/Log/2015 February 8#Template:Infobox China station",
),
"Japan": (
["Infobox Japan station", "Infobox japan station"],
"Infobox Japan station/sandbox",
"Infobox Japan station/sandbox/cats",
"Wikipedia:Templates for discussion/Log/2015 May 9#Template:Infobox Japan station"
"Wikipedia:Templates for discussion/Log/2015 May 9#Template:Infobox Japan station",
),
}
self._replacement = "{{Infobox station}}"
self._sleep_time = 2
self.summary = self.make_summary(
"Replacing {source} with {dest} per [[{discussion}|TfD]].")
"Replacing {source} with {dest} per [[{discussion}|TfD]]."
)

def run(self, **kwargs):
limit = int(kwargs.get("limit", kwargs.get("edits", 0)))
@@ -68,7 +68,7 @@ class InfoboxStation(Task):
"""
Replace a template in all pages that transclude it.
"""
self.logger.info("Replacing {0} infobox template".format(name))
self.logger.info(f"Replacing {name} infobox template")

count = 0
for title in self._get_transclusions(args[0][0]):
@@ -82,15 +82,15 @@ class InfoboxStation(Task):
page = self.site.get_page(title)
self._process_page(page, args)

self.logger.info("All {0} infoboxes updated".format(name))
self.logger.info(f"All {name} infoboxes updated")

def _process_page(self, page, args):
"""
Process a single page to replace a template.
"""
self.logger.debug("Processing [[{0}]]".format(page.title))
self.logger.debug(f"Processing [[{page.title}]]")
if not page.check_exclusion():
self.logger.warn("Bot excluded from [[{0}]]".format(page.title))
self.logger.warn(f"Bot excluded from [[{page.title}]]")
return

code = mwparserfromhell.parse(page.get(), skip_style_tags=True)
@@ -98,7 +98,7 @@ class InfoboxStation(Task):
for tmpl in code.filter_templates():
if tmpl.name.matches(args[0]):
tmpl.name = "subst:" + args[2]
cats.extend(self._get_cats(page, unicode(tmpl)))
cats.extend(self._get_cats(page, str(tmpl)))
tmpl.name = "subst:" + args[1]

self._add_cats(code, cats)
@@ -108,19 +108,25 @@ class InfoboxStation(Task):
return

summary = self.summary.format(
source="{{" + args[0][0] + "}}", dest=self._replacement,
discussion=args[3])
page.edit(unicode(code), summary, minor=True)
source="{{" + args[0][0] + "}}", dest=self._replacement, discussion=args[3]
)
page.edit(str(code), summary, minor=True)
sleep(self._sleep_time)

def _add_cats(self, code, cats):
"""Add category data (*cats*) to wikicode."""
current_cats = code.filter_wikilinks(
matches=lambda link: link.title.lower().startswith("category:"))
norm = lambda cat: cat.title.lower()[len("category:"):].strip()
matches=lambda link: link.title.lower().startswith("category:")
)

def norm(cat):
return cat.title.lower()[len("category:") :].strip()

catlist = [unicode(cat) for cat in cats if not any(
norm(cur) == norm(cat) for cur in current_cats)]
catlist = [
str(cat)
for cat in cats
if not any(norm(cur) == norm(cat) for cur in current_cats)
]
if not catlist:
return
text = "\n".join(catlist)
@@ -140,8 +146,9 @@ class InfoboxStation(Task):
"""
Return the categories that should be added to the page.
"""
result = self.site.api_query(action="parse", title=page.title,
prop="text", onlypst=1, text=tmpl)
result = self.site.api_query(
action="parse", title=page.title, prop="text", onlypst=1, text=tmpl
)
text = result["parse"]["text"]["*"]
return mwparserfromhell.parse(text).filter_wikilinks()

@@ -154,6 +161,7 @@ class InfoboxStation(Task):
LEFT JOIN page ON tl_from = page_id
WHERE tl_namespace = ? AND tl_title = ? AND tl_from_namespace = ?"""

results = self.site.sql_query(query, (
constants.NS_TEMPLATE, tmpl.replace(" ", "_"), constants.NS_MAIN))
results = self.site.sql_query(
query, (constants.NS_TEMPLATE, tmpl.replace(" ", "_"), constants.NS_MAIN)
)
return [title.decode("utf8").replace("_", " ") for (title,) in results]

+ 124
- 104
tasks/synonym_authorities.py View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2021 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -33,41 +31,43 @@ import unidecode

from earwigbot.tasks import Task


class SynonymAuthorities(Task):
"""
Correct mismatched synonym authorities in taxon articles created by Qbugbot.
"""
name = 'synonym_authorities'

name = "synonym_authorities"
number = 21
base_summary = (
'Fix {changes} mismatched synonym authorities per ITIS '
'([[Wikipedia:Bots/Requests for approval/EarwigBot 21|more info]])'
"Fix {changes} mismatched synonym authorities per ITIS "
"([[Wikipedia:Bots/Requests for approval/EarwigBot 21|more info]])"
)

def setup(self):
self.site = self.bot.wiki.get_site()
self.creator = 'Qbugbot'
self.pages_path = 'qbugbot_pages.json'
self.synonyms_path = 'qbugbot_synonyms.json'
self.edits_path = 'qbugbot_edits.json'
self.itis_path = 'itis.db'
self.creator = "Qbugbot"
self.pages_path = "qbugbot_pages.json"
self.synonyms_path = "qbugbot_synonyms.json"
self.edits_path = "qbugbot_edits.json"
self.itis_path = "itis.db"
self.summary = self.make_summary(self.base_summary)

def run(self, action=None):
if action == 'fetch_pages':
if action == "fetch_pages":
self.fetch_pages()
elif action == 'fetch_synonyms':
elif action == "fetch_synonyms":
self.fetch_synonyms()
elif action == 'prepare_edits':
elif action == "prepare_edits":
self.prepare_edits()
elif action == 'view_edits':
elif action == "view_edits":
self.view_edits()
elif action == 'save_edits':
elif action == "save_edits":
self.save_edits()
elif action is None:
raise RuntimeError(f'This task requires an action')
raise RuntimeError("This task requires an action")
else:
raise RuntimeError(f'No such action: {action}')
raise RuntimeError(f"No such action: {action}")

def fetch_pages(self):
"""
@@ -77,49 +77,49 @@ class SynonymAuthorities(Task):
for chunk in more_itertools.chunked(self._iter_creations(), 500):
pages.update(self._fetch_chunk(chunk))

self.logger.info(f'Fetched {len(pages)} pages')
with open(self.pages_path, 'w') as fp:
self.logger.info(f"Fetched {len(pages)} pages")
with open(self.pages_path, "w") as fp:
json.dump(pages, fp)

def _iter_creations(self):
# TODO: include converted redirects ([[Category:Articles created by Qbugbot]])
params = {
'action': 'query',
'list': 'usercontribs',
'ucuser': self.creator,
'uclimit': 5000,
'ucnamespace': 0,
'ucprop': 'ids',
'ucshow': 'new',
'formatversion': 2,
"action": "query",
"list": "usercontribs",
"ucuser": self.creator,
"uclimit": 5000,
"ucnamespace": 0,
"ucprop": "ids",
"ucshow": "new",
"formatversion": 2,
}

results = self.site.api_query(**params)
while contribs := results['query']['usercontribs']:
while contribs := results["query"]["usercontribs"]:
yield from contribs
if 'continue' not in results:
if "continue" not in results:
break
params.update(results['continue'])
params.update(results["continue"])
results = self.site.api_query(**params)

def _fetch_chunk(self, chunk):
result = self.site.api_query(
action='query',
prop='revisions',
rvprop='ids|content',
rvslots='main',
pageids='|'.join(str(page['pageid']) for page in chunk),
action="query",
prop="revisions",
rvprop="ids|content",
rvslots="main",
pageids="|".join(str(page["pageid"]) for page in chunk),
formatversion=2,
)

pages = result['query']['pages']
pages = result["query"]["pages"]
assert len(pages) == len(chunk)

return {
page['pageid']: {
'title': page['title'],
'content': page['revisions'][0]['slots']['main']['content'],
'revid': page['revisions'][0]['revid'],
page["pageid"]: {
"title": page["title"],
"content": page["revisions"][0]["slots"]["main"]["content"],
"revid": page["revisions"][0]["revid"],
}
for page in pages
}
@@ -130,37 +130,38 @@ class SynonymAuthorities(Task):
"""
with open(self.pages_path) as fp:
pages = json.load(fp)
wikidata = self.bot.wiki.get_site('wikidatawiki')
itis_property = 'P815'
wikidata = self.bot.wiki.get_site("wikidatawiki")
itis_property = "P815"
conn = sqlite3.connect(self.itis_path)
cur = conn.cursor()

synonyms = {}
for chunk in more_itertools.chunked(pages.items(), 50):
titles = {page['title']: pageid for pageid, page in chunk}
titles = {page["title"]: pageid for pageid, page in chunk}
result = wikidata.api_query(
action='wbgetentities',
sites='enwiki',
titles='|'.join(titles),
props='claims|sitelinks',
languages='en',
sitefilter='enwiki',
action="wbgetentities",
sites="enwiki",
titles="|".join(titles),
props="claims|sitelinks",
languages="en",
sitefilter="enwiki",
)

for item in result['entities'].values():
if 'sitelinks' not in item:
self.logger.warning(f'No sitelinks for item: {item}')
for item in result["entities"].values():
if "sitelinks" not in item:
self.logger.warning(f"No sitelinks for item: {item}")
continue
title = item['sitelinks']['enwiki']['title']
title = item["sitelinks"]["enwiki"]["title"]
pageid = titles[title]
if itis_property not in item['claims']:
self.logger.warning(f'No ITIS ID for [[{title}]]')
if itis_property not in item["claims"]:
self.logger.warning(f"No ITIS ID for [[{title}]]")
continue
claims = item['claims'][itis_property]
claims = item["claims"][itis_property]
assert len(claims) == 1, (title, claims)
itis_id = claims[0]['mainsnak']['datavalue']['value']
itis_id = claims[0]["mainsnak"]["datavalue"]["value"]

cur.execute("""
cur.execute(
"""
SELECT synonym.complete_name, authors.taxon_author
FROM synonym_links sl
INNER JOIN taxonomic_units accepted ON sl.tsn_accepted = accepted.tsn
@@ -172,11 +173,13 @@ class SynonymAuthorities(Task):
FROM taxonomic_units accepted
LEFT JOIN taxon_authors_lkp authors USING (taxon_author_id)
WHERE accepted.tsn = ?;
""", (itis_id, itis_id))
""",
(itis_id, itis_id),
)
synonyms[pageid] = cur.fetchall()

self.logger.info(f'Fetched {len(synonyms)} synonym lists')
with open(self.synonyms_path, 'w') as fp:
self.logger.info(f"Fetched {len(synonyms)} synonym lists")
with open(self.synonyms_path, "w") as fp:
json.dump(synonyms, fp)

def prepare_edits(self):
@@ -192,65 +195,73 @@ class SynonymAuthorities(Task):
for pageid, pageinfo in pages.items():
if pageid not in synonyms:
continue
wikitext = mwparserfromhell.parse(pageinfo['content'])
wikitext = mwparserfromhell.parse(pageinfo["content"])
try:
changes = self._update_synonyms(pageinfo['title'], wikitext, synonyms[pageid])
changes = self._update_synonyms(
pageinfo["title"], wikitext, synonyms[pageid]
)
if not changes:
continue
except Exception:
self.logger.error(f'Failed to update synonyms for [[{pageinfo["title"]}]]')
self.logger.error(
f'Failed to update synonyms for [[{pageinfo["title"]}]]'
)
raise
edits[pageid] = {
'title': pageinfo['title'],
'revid': pageinfo['revid'],
'original': pageinfo['content'],
'content': str(wikitext),
'changes': changes,
"title": pageinfo["title"],
"revid": pageinfo["revid"],
"original": pageinfo["content"],
"content": str(wikitext),
"changes": changes,
}

with open(self.edits_path, 'w') as fp:
with open(self.edits_path, "w") as fp:
json.dump(edits, fp)

def _update_synonyms(self, title, wikitext, synonyms):
if len(synonyms) <= 1:
return False
if wikitext.split('\n', 1)[0].upper().startswith('#REDIRECT'):
self.logger.debug(f'[[{title}]]: Skipping redirect')
if wikitext.split("\n", 1)[0].upper().startswith("#REDIRECT"):
self.logger.debug(f"[[{title}]]: Skipping redirect")
return False

taxoboxes = wikitext.filter_templates(
matches=lambda tmpl: tmpl.name.matches(('Speciesbox', 'Automatic taxobox')))
matches=lambda tmpl: tmpl.name.matches(("Speciesbox", "Automatic taxobox"))
)
if not taxoboxes:
self.logger.warning(f'[[{title}]]: No taxoboxes found')
self.logger.warning(f"[[{title}]]: No taxoboxes found")
return False
if len(taxoboxes) > 1:
self.logger.warning(f'[[{title}]]: Multiple taxoboxes found')
self.logger.warning(f"[[{title}]]: Multiple taxoboxes found")
return False

try:
syn_param = taxoboxes[0].get('synonyms')
syn_param = taxoboxes[0].get("synonyms")
except ValueError:
self.logger.debug(f'[[{title}]]: No synonyms parameter in taxobox')
self.logger.debug(f"[[{title}]]: No synonyms parameter in taxobox")
return False

tmpls = syn_param.value.filter_templates(
matches=lambda tmpl: tmpl.name.matches(('Species list', 'Taxon list')))
matches=lambda tmpl: tmpl.name.matches(("Species list", "Taxon list"))
)
if not tmpls:
# This means the bot's original work is no longer there. In most cases, this is
# an unrelated synonym list added by another editor and there is nothing to check,
# but it's possible someone converted the bot's list into a different format without
# checking the authorities. Those cases need to be manually checked.
self.logger.warning(f'[[{title}]]: Could not find a taxa list in taxobox')
self.logger.warning(f"[[{title}]]: Could not find a taxa list in taxobox")
return False
if len(tmpls) > 1:
self.logger.warning(f'[[{title}]]: Multiple taxa lists found in taxobox')
self.logger.warning(f"[[{title}]]: Multiple taxa lists found in taxobox")
return False

expected = {}
for taxon, author in synonyms:
if taxon in expected and expected[taxon] != author:
# These need to be manually reviewed
self.logger.warning(f'[[{title}]]: Expected synonym list has duplicates')
self.logger.warning(
f"[[{title}]]: Expected synonym list has duplicates"
)
return False
expected[self._normalize(taxon)] = self._normalize(author)

@@ -262,21 +273,27 @@ class SynonymAuthorities(Task):
taxon = self._normalize(taxon_param.value)
author = self._normalize(author_param.value)
if taxon not in expected:
self.logger.warning(f'[[{title}]]: Unknown synonym {taxon!r}')
self.logger.warning(f"[[{title}]]: Unknown synonym {taxon!r}")
return False
actual[taxon] = author
formatted_authors.setdefault(author, []).append(author_param.value.strip())

expected = {taxon: author for taxon, author in expected.items() if taxon in actual}
expected = {
taxon: author for taxon, author in expected.items() if taxon in actual
}
assert set(expected.keys()) == set(actual.keys())
if expected == actual:
self.logger.debug(f'[[{title}]]: Nothing to update')
self.logger.debug(f"[[{title}]]: Nothing to update")
return None
if list(expected.values()) != list(actual.values()):
if set(expected.values()) == set(actual.values()):
self.logger.warning(f'[[{title}]]: Actual authors are not in expected order')
self.logger.warning(
f"[[{title}]]: Actual authors are not in expected order"
)
else:
self.logger.warning(f'[[{title}]]: Actual authors do not match expected')
self.logger.warning(
f"[[{title}]]: Actual authors do not match expected"
)
return False

changes = []
@@ -285,15 +302,15 @@ class SynonymAuthorities(Task):
taxon = self._normalize(taxon_param.value)
if expected[taxon] != actual[taxon]:
author = formatted_authors[expected[taxon]].pop(0)
match = re.match(r'^(\s*).*?(\s*)$', str(author_param.value))
match = re.match(r"^(\s*).*?(\s*)$", str(author_param.value))
ws_before, ws_after = match.group(1), match.group(2)
author_param.value = f'{ws_before}{author}{ws_after}'
author_param.value = f"{ws_before}{author}{ws_after}"
changes.append((taxon, actual[taxon], expected[taxon]))

if changes:
self.logger.info(f'Will update {len(changes)} synonyms in [[{title}]]')
self.logger.info(f"Will update {len(changes)} synonyms in [[{title}]]")
else:
self.logger.debug(f'Nothing to update in [[{title}]]')
self.logger.debug(f"Nothing to update in [[{title}]]")
return changes

@staticmethod
@@ -305,7 +322,9 @@ class SynonymAuthorities(Task):
value = value.strip_code()
if not value or not value.strip():
return None
return unidecode.unidecode(value.strip().casefold().replace('&', 'and').replace(',', ''))
return unidecode.unidecode(
value.strip().casefold().replace("&", "and").replace(",", "")
)

def view_edits(self):
"""
@@ -314,15 +333,16 @@ class SynonymAuthorities(Task):
with open(self.edits_path) as fp:
edits = json.load(fp)

self.logger.info(f'{len(edits)} pages to edit')
self.logger.info(f"{len(edits)} pages to edit")
for pageid, edit in edits.items():
print(f'\n{pageid}: {edit["title"]}:')
old, new = edit['original'], edit['content']
old, new = edit["original"], edit["content"]

udiff = difflib.unified_diff(old.splitlines(), new.splitlines(), 'old', 'new')
udiff = difflib.unified_diff(
old.splitlines(), new.splitlines(), "old", "new"
)
subprocess.run(
['delta', '-s', '--paging', 'never'],
input='\n'.join(udiff), text=True
["delta", "-s", "--paging", "never"], input="\n".join(udiff), text=True
)

def save_edits(self):
@@ -332,21 +352,21 @@ class SynonymAuthorities(Task):
with open(self.edits_path) as fp:
edits = json.load(fp)

self.logger.info(f'{len(edits)} pages to edit')
self.logger.info(f"{len(edits)} pages to edit")
for pageid, edit in edits.items():
page = self.site.get_page(edit['title'])
self.logger.info(f'{pageid}: [[{page.title}]]')
page = self.site.get_page(edit["title"])
self.logger.info(f"{pageid}: [[{page.title}]]")

if self.shutoff_enabled():
raise RuntimeError('Shutoff enabled')
raise RuntimeError("Shutoff enabled")
if not page.check_exclusion():
self.logger.warning(f'[[{page.title}]]: Bot excluded from editing')
self.logger.warning(f"[[{page.title}]]: Bot excluded from editing")
continue

page.edit(
edit['content'],
summary=self.summary.format(changes=len(edit['changes'])),
baserevid=edit['revid'],
edit["content"],
summary=self.summary.format(changes=len(edit["changes"])),
baserevid=edit["revid"],
basetimestamp=None,
starttimestamp=None,
)


Loading…
Cancel
Save