From a73f618e0aec26efdc28b85ae824911e1b9536c1 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 12 Apr 2021 02:56:46 -0400 Subject: [PATCH] Initial conversion to Python 3 --- CHANGELOG | 14 +++- README.rst | 41 +++++------ docs/index.rst | 30 ++++---- docs/installation.rst | 10 +-- docs/setup.rst | 2 +- docs/tips.rst | 2 +- docs/toolset.rst | 12 +-- earwigbot/__init__.py | 2 +- earwigbot/bot.py | 4 +- earwigbot/commands/__init__.py | 7 +- earwigbot/commands/calc.py | 20 ++--- earwigbot/commands/cidr.py | 4 +- earwigbot/commands/crypt.py | 55 +++++++++----- earwigbot/commands/dictionary.py | 46 ++++++------ earwigbot/commands/editcount.py | 6 +- earwigbot/commands/help.py | 2 +- earwigbot/commands/langcode.py | 2 +- earwigbot/commands/link.py | 14 ++-- earwigbot/commands/notes.py | 2 +- earwigbot/commands/remind.py | 12 +-- earwigbot/commands/stalk.py | 10 +-- earwigbot/commands/threads.py | 2 +- earwigbot/commands/time_command.py | 2 +- earwigbot/commands/watchers.py | 2 +- earwigbot/config/__init__.py | 42 ++++++----- earwigbot/config/formatter.py | 4 +- earwigbot/config/node.py | 27 +++---- earwigbot/config/ordered_yaml.py | 12 +-- earwigbot/config/permissions.py | 4 +- earwigbot/config/script.py | 134 +++++++++++++++++----------------- earwigbot/exceptions.py | 2 +- earwigbot/irc/connection.py | 8 +- earwigbot/irc/data.py | 4 +- earwigbot/irc/frontend.py | 5 +- earwigbot/irc/rc.py | 2 +- earwigbot/irc/watcher.py | 5 +- earwigbot/lazy.py | 7 +- earwigbot/managers.py | 8 +- earwigbot/tasks/__init__.py | 6 +- earwigbot/tasks/wikiproject_tagger.py | 50 ++++++------- earwigbot/util.py | 4 +- earwigbot/wiki/__init__.py | 4 +- earwigbot/wiki/category.py | 4 +- earwigbot/wiki/copyvios/__init__.py | 10 +-- earwigbot/wiki/copyvios/exclusions.py | 14 ++-- earwigbot/wiki/copyvios/markov.py | 6 +- earwigbot/wiki/copyvios/parsers.py | 30 ++++---- earwigbot/wiki/copyvios/result.py | 8 +- earwigbot/wiki/copyvios/search.py | 69 ++--------------- earwigbot/wiki/copyvios/workers.py | 51 ++++++------- earwigbot/wiki/page.py | 22 +++--- earwigbot/wiki/site.py | 23 +++--- earwigbot/wiki/sitesdb.py | 12 +-- earwigbot/wiki/user.py | 6 +- setup.py | 32 ++++---- tests/test_calc.py | 2 +- tests/test_test.py | 4 +- 57 files changed, 446 insertions(+), 478 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index a5d82b4..d546b69 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,12 +1,20 @@ v0.4 (unreleased): -- Copyvio detector: improved parsing of excluded URL lists. -- Wiki: fixed not sending Content-Type header in POST requests. +- Migrated to Python 3. +- Copyvios: Configurable proxy support for specific domains. +- Copyvios: Parser-directed URL redirection. +- Copyvios: General parsing improvements. +- Copyvios: URL exclusion improvements. +- Copyvios: Removed long-deprecated Yahoo! BOSS search engine. +- Wiki: Fixed not sending Content-Type header in POST requests. +- IRC: Remember joined channels across restarts. +- IRC: Added !listchans. +- IRC > !stalk: Added modifiers to change message format or filter messages. v0.3 (released March 24, 2019): - Added various new features to the WikiProjectTagger task. -- Copyvio detector: improved sentence splitting algorithm; many performance +- Copyvio detector: Improved sentence splitting algorithm; many performance improvements. - Improved config file command/task exclusion logic. - Wiki: Added logging for warnings. diff --git a/README.rst b/README.rst index 6553a75..c52b02c 100644 --- a/README.rst +++ b/README.rst @@ -9,16 +9,15 @@ online at PyPI_). History ------- -Development began, based on the `Pywikipedia framework`_, in early 2009. -Approval for its first task, a `copyright violation detector`_, was carried out -in May, and the bot has been running consistently ever since (with the -exception of Jan/Feb 2011). It currently handles `several ongoing tasks`_ -ranging from statistics generation to category cleanup, and on-demand tasks -such as WikiProject template tagging. Since it started running, the bot has -made over 50,000 edits. +Development began, based on `Pywikibot`_, in early 2009. Approval for its +first task, a `copyright violation detector`_, was carried out in May, and the +bot has been running consistently ever since (with the exception of Jan/Feb 2011). +It currently handles `several ongoing tasks`_ ranging from statistics generation +to category cleanup, and on-demand tasks such as WikiProject template tagging. +Since it started running, the bot has made over 250,000 edits. A project to rewrite it from scratch began in early April 2011, thus moving -away from the Pywikipedia framework and allowing for less overall code, better +away from the Pywikibot framework and allowing for less overall code, better integration between bot parts, and easier maintenance. Installation @@ -181,21 +180,21 @@ Footnotes .. [1] ``python setup.py install``/``develop`` may require root, or use the ``--user`` switch to install for the current user only. -.. _EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot -.. _Python: http://python.org/ -.. _Wikipedia: http://en.wikipedia.org/ -.. _IRC: http://en.wikipedia.org/wiki/Internet_Relay_Chat -.. _PyPI: http://packages.python.org/earwigbot -.. _Pywikipedia framework: http://pywikipediabot.sourceforge.net/ -.. _copyright violation detector: http://en.wikipedia.org/wiki/Wikipedia:Bots/Requests_for_approval/EarwigBot_1 -.. _several ongoing tasks: http://en.wikipedia.org/wiki/User:EarwigBot#Tasks -.. _my instance of EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot +.. _EarwigBot: https://en.wikipedia.org/wiki/User:EarwigBot +.. _Python: https://python.org/ +.. _Wikipedia: https://en.wikipedia.org/ +.. _IRC: https://en.wikipedia.org/wiki/Internet_Relay_Chat +.. _PyPI: https://packages.python.org/earwigbot +.. _Pywikibot: https://www.mediawiki.org/wiki/Manual:Pywikibot +.. _copyright violation detector: https://en.wikipedia.org/wiki/Wikipedia:Bots/Requests_for_approval/EarwigBot_1 +.. _several ongoing tasks: https://en.wikipedia.org/wiki/User:EarwigBot#Tasks +.. _my instance of EarwigBot: https://en.wikipedia.org/wiki/User:EarwigBot .. _earwigbot-plugins: https://github.com/earwig/earwigbot-plugins .. _Python Package Index: https://pypi.python.org/pypi/earwigbot -.. _get pip: http://pypi.python.org/pypi/pip -.. _this StackOverflow post: http://stackoverflow.com/questions/6504810/how-to-install-lxml-on-ubuntu/6504860#6504860 -.. _git flow: http://nvie.com/posts/a-successful-git-branching-model/ -.. _explanation of YAML: http://en.wikipedia.org/wiki/YAML +.. _get pip: https://pypi.python.org/pypi/pip +.. _this StackOverflow post: https://stackoverflow.com/questions/6504810/how-to-install-lxml-on-ubuntu/6504860#6504860 +.. _git flow: https://nvie.com/posts/a-successful-git-branching-model/ +.. _explanation of YAML: https://en.wikipedia.org/wiki/YAML .. _earwigbot.bot.Bot: https://github.com/earwig/earwigbot/blob/develop/earwigbot/bot.py .. _earwigbot.config.BotConfig: https://github.com/earwig/earwigbot/blob/develop/earwigbot/config.py .. _earwigbot.commands.Command: https://github.com/earwig/earwigbot/blob/develop/earwigbot/commands/__init__.py diff --git a/docs/index.rst b/docs/index.rst index a885d0f..c2744d4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,25 +7,25 @@ over IRC_. History ------- -Development began, based on the `Pywikipedia framework`_, in early 2009. -Approval for its fist task, a `copyright violation detector`_, was carried out -in May, and the bot has been running consistently ever since (with the -exception of Jan/Feb 2011). It currently handles `several ongoing tasks`_ -ranging from statistics generation to category cleanup, and on-demand tasks -such as WikiProject template tagging. Since it started running, the bot has -made over 50,000 edits. +Development began, based on `Pywikibot`_, in early 2009. Approval for its +first task, a `copyright violation detector`_, was carried out in May, and the +bot has been running consistently ever since (with the exception of Jan/Feb 2011). +It currently handles `several ongoing tasks`_ ranging from statistics generation +to category cleanup, and on-demand tasks such as WikiProject template tagging. +Since it started running, the bot has made over 250,000 edits. A project to rewrite it from scratch began in early April 2011, thus moving -away from the Pywikipedia framework and allowing for less overall code, better +away from the Pywikibot framework and allowing for less overall code, better integration between bot parts, and easier maintenance. -.. _EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot -.. _Python: http://python.org/ -.. _Wikipedia: http://en.wikipedia.org/ -.. _IRC: http://en.wikipedia.org/wiki/Internet_Relay_Chat -.. _Pywikipedia framework: http://pywikipediabot.sourceforge.net/ -.. _copyright violation detector: http://en.wikipedia.org/wiki/Wikipedia:Bots/Requests_for_approval/EarwigBot_1 -.. _several ongoing tasks: http://en.wikipedia.org/wiki/User:EarwigBot#Tasks +.. _EarwigBot: https://en.wikipedia.org/wiki/User:EarwigBot +.. _Python: https://python.org/ +.. _Wikipedia: https://en.wikipedia.org/ +.. _IRC: https://en.wikipedia.org/wiki/Internet_Relay_Chat +.. _PyPI: https://packages.python.org/earwigbot +.. _Pywikibot: https://www.mediawiki.org/wiki/Manual:Pywikibot +.. _copyright violation detector: https://en.wikipedia.org/wiki/Wikipedia:Bots/Requests_for_approval/EarwigBot_1 +.. _several ongoing tasks: https://en.wikipedia.org/wiki/User:EarwigBot#Tasks Contents -------- diff --git a/docs/installation.rst b/docs/installation.rst index c4c1e01..23889fc 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -50,9 +50,9 @@ features (``feature/*`` branches):: or use the :command:`--user` switch to install for the current user only. -.. _my instance of EarwigBot: http://en.wikipedia.org/wiki/User:EarwigBot +.. _my instance of EarwigBot: https://en.wikipedia.org/wiki/User:EarwigBot .. _earwigbot-plugins: https://github.com/earwig/earwigbot-plugins -.. _Python Package Index: http://pypi.python.org -.. _get pip: http://pypi.python.org/pypi/pip -.. _this StackOverflow post: http://stackoverflow.com/questions/6504810/how-to-install-lxml-on-ubuntu/6504860#6504860 -.. _git flow: http://nvie.com/posts/a-successful-git-branching-model/ +.. _Python Package Index: https://pypi.python.org/pypi/earwigbot +.. _get pip: https://pypi.python.org/pypi/pip +.. _this StackOverflow post: https://stackoverflow.com/questions/6504810/how-to-install-lxml-on-ubuntu/6504860#6504860 +.. _git flow: https://nvie.com/posts/a-successful-git-branching-model/ diff --git a/docs/setup.rst b/docs/setup.rst index 81523df..ad8bf8a 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -25,4 +25,4 @@ You can stop the bot at any time with :kbd:`Control-c`, same as you stop a normal Python program, and it will try to exit safely. You can also use the "``!quit``" command on IRC. -.. _explanation of YAML: http://en.wikipedia.org/wiki/YAML +.. _explanation of YAML: https://en.wikipedia.org/wiki/YAML diff --git a/docs/tips.rst b/docs/tips.rst index 00a648f..0fb47c2 100644 --- a/docs/tips.rst +++ b/docs/tips.rst @@ -40,7 +40,7 @@ Tips ` and :py:meth:`bot.tasks.load() `! -.. _logging: http://docs.python.org/library/logging.html +.. _logging: https://docs.python.org/library/logging.html .. _!git plugin: https://github.com/earwig/earwigbot-plugins/blob/develop/commands/git.py .. _Let me know: ben.kurtovic@gmail.com .. _create an issue: https://github.com/earwig/earwigbot/issues diff --git a/docs/toolset.rst b/docs/toolset.rst index 5306fc3..30dcd69 100644 --- a/docs/toolset.rst +++ b/docs/toolset.rst @@ -1,7 +1,7 @@ The Wiki Toolset ================ -EarwigBot's answer to the `Pywikipedia framework`_ is the Wiki Toolset +EarwigBot's answer to `Pywikibot`_ is the Wiki Toolset (:py:mod:`earwigbot.wiki`), which you will mainly access through :py:attr:`bot.wiki `. @@ -43,7 +43,7 @@ wikis, you can usually use code like this:: try: site = bot.wiki.get_site(project=project, lang=lang) except earwigbot.SiteNotFoundError: - # Load site info from http://es.wikipedia.org/w/api.php: + # Load site info from https://es.wikipedia.org/w/api.php: site = bot.wiki.add_site(project=project, lang=lang) This works because EarwigBot assumes that the URL for the site is @@ -56,8 +56,8 @@ like:: try: site = bot.wiki.get_site(project=project, lang=lang) except earwigbot.SiteNotFoundError: - # Load site info from http://mysite.net/mywiki/it/s/api.php: - base_url = "http://mysite.net/" + project + "/" + lang + # Load site info from https://mysite.net/mywiki/it/s/api.php: + base_url = "https://mysite.net/" + project + "/" + lang db_name = lang + project + "_p" sql = {host: "sql.mysite.net", db: db_name} site = bot.wiki.add_site(base_url=base_url, script_path="/s", sql=sql) @@ -242,6 +242,6 @@ docstrings`_ to learn how to use it in a more hands-on fashion. For reference, :py:class:`earwigbot.wiki.SitesDB ` tied to the :file:`sites.db` file in the bot's working directory. -.. _Pywikipedia framework: http://pywikipediabot.sourceforge.net/ -.. _CentralAuth: http://www.mediawiki.org/wiki/Extension:CentralAuth +.. _Pywikibot: https://www.mediawiki.org/wiki/Manual:Pywikibot +.. _CentralAuth: https://www.mediawiki.org/wiki/Extension:CentralAuth .. _its code and docstrings: https://github.com/earwig/earwigbot/tree/develop/earwigbot/wiki diff --git a/earwigbot/__init__.py b/earwigbot/__init__.py index afbf228..4b2c13d 100644 --- a/earwigbot/__init__.py +++ b/earwigbot/__init__.py @@ -26,7 +26,7 @@ Wikipedia and interacts with people over IRC. See :file:`README.rst` for an overview, or the :file:`docs/` directory for details. This documentation is also available `online -`_. +`_. """ __author__ = "Ben Kurtovic" diff --git a/earwigbot/bot.py b/earwigbot/bot.py index bd3cf24..4b1e325 100644 --- a/earwigbot/bot.py +++ b/earwigbot/bot.py @@ -32,7 +32,7 @@ from earwigbot.wiki import SitesDB __all__ = ["Bot"] -class Bot(object): +class Bot: """ **EarwigBot: Main Bot Class** @@ -147,7 +147,7 @@ class Bot(object): advance warning of their forced shutdown. """ tasks = [] - component_names = self.config.components.keys() + component_names = list(self.config.components.keys()) skips = component_names + ["MainThread", "reminder", "irc:quit"] for thread in enumerate_threads(): if thread.is_alive() and not any( diff --git a/earwigbot/commands/__init__.py b/earwigbot/commands/__init__.py index 67ba719..f41babe 100644 --- a/earwigbot/commands/__init__.py +++ b/earwigbot/commands/__init__.py @@ -22,7 +22,7 @@ __all__ = ["Command"] -class Command(object): +class Command: """ **EarwigBot: Base IRC Command** @@ -54,9 +54,8 @@ class Command(object): This is called once when the command is loaded (from :py:meth:`commands.load() `). *bot* is out base :py:class:`~earwigbot.bot.Bot` object. Don't override - this directly; if you do, remember to place - ``super(Command, self).__init()`` first. Use :py:meth:`setup` for - typical command-init/setup needs. + this directly; if you do, remember to place ``super().__init()`` first. + Use :py:meth:`setup` for typical command-init/setup needs. """ self.bot = bot self.config = bot.config diff --git a/earwigbot/commands/calc.py b/earwigbot/commands/calc.py index c3dd998..f030123 100644 --- a/earwigbot/commands/calc.py +++ b/earwigbot/commands/calc.py @@ -21,12 +21,13 @@ # SOFTWARE. import re -import urllib +import urllib.request +import urllib.parse from earwigbot.commands import Command class Calc(Command): - """A somewhat advanced calculator: see http://futureboy.us/fsp/frink.fsp + """A somewhat advanced calculator: see https://futureboy.us/fsp/frink.fsp for details.""" name = "calc" @@ -38,9 +39,9 @@ class Calc(Command): query = ' '.join(data.args) query = self.cleanup(query) - url = "http://futureboy.us/fsp/frink.fsp?fromVal={0}" - url = url.format(urllib.quote(query)) - result = urllib.urlopen(url).read() + url = "https://futureboy.us/fsp/frink.fsp?fromVal={0}" + url = url.format(urllib.parse.quote(query)) + result = urllib.request.urlopen(url).read().decode() r_result = re.compile(r'(?i)(.*?)') r_tag = re.compile(r'<\S+.*?>') @@ -64,13 +65,14 @@ class Calc(Command): res = "%s = %s" % (query, result) self.reply(data, res) - def cleanup(self, query): + @staticmethod + def cleanup(query): fixes = [ (' in ', ' -> '), (' over ', ' / '), - (u'£', 'GBP '), - (u'€', 'EUR '), - ('\$', 'USD '), + ('£', 'GBP '), + ('€', 'EUR '), + (r'\$', 'USD '), (r'\bKB\b', 'kilobytes'), (r'\bMB\b', 'megabytes'), (r'\bGB\b', 'gigabytes'), diff --git a/earwigbot/commands/cidr.py b/earwigbot/commands/cidr.py index c424bc3..74a96b8 100644 --- a/earwigbot/commands/cidr.py +++ b/earwigbot/commands/cidr.py @@ -134,7 +134,7 @@ class CIDR(Command): bin_ips[i] = bin_ips[i][:ip.size] + suffix size = len(bin_ips[0]) - for i in xrange(len(bin_ips[0])): + for i in range(len(bin_ips[0])): if any(ip[i] == "X" for ip in bin_ips) or ( any(ip[i] == "0" for ip in bin_ips) and any(ip[i] == "1" for ip in bin_ips)): @@ -154,7 +154,7 @@ class CIDR(Command): def _format_bin(family, binary): """Convert an IP's binary representation to presentation format.""" return socket.inet_ntop(family, "".join( - chr(int(binary[i:i + 8], 2)) for i in xrange(0, len(binary), 8))) + chr(int(binary[i:i + 8], 2)) for i in range(0, len(binary), 8))) @staticmethod def _format_count(count): diff --git a/earwigbot/commands/crypt.py b/earwigbot/commands/crypt.py index f88472d..0b8374e 100644 --- a/earwigbot/commands/crypt.py +++ b/earwigbot/commands/crypt.py @@ -20,16 +20,20 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import base64 import hashlib +import os from earwigbot import importer from earwigbot.commands import Command -Blowfish = importer.new("Crypto.Cipher.Blowfish") +fernet = importer.new("cryptography.fernet") +hashes = importer.new("cryptography.hazmat.primitives.hashes") +pbkdf2 = importer.new("cryptography.hazmat.primitives.kdf.pbkdf2") class Crypt(Command): """Provides hash functions with !hash (!hash list for supported algorithms) - and Blowfish encryption with !encrypt and !decrypt.""" + and basic encryption with !encrypt and !decrypt.""" name = "crypt" commands = ["crypt", "hash", "encrypt", "decrypt"] @@ -47,12 +51,12 @@ class Crypt(Command): if data.command == "hash": algo = data.args[0] if algo == "list": - algos = ', '.join(hashlib.algorithms) + algos = ', '.join(hashlib.algorithms_available) msg = algos.join(("Supported algorithms: ", ".")) self.reply(data, msg) - elif algo in hashlib.algorithms: + elif algo in hashlib.algorithms_available: string = ' '.join(data.args[1:]) - result = getattr(hashlib, algo)(string).hexdigest() + result = getattr(hashlib, algo)(string.encode()).hexdigest() self.reply(data, result) else: msg = "Unknown algorithm: '{0}'.".format(algo) @@ -61,6 +65,7 @@ class Crypt(Command): else: key = data.args[0] text = " ".join(data.args[1:]) + saltlen = 16 if not text: msg = "A key was provided, but text to {0} was not." @@ -68,19 +73,31 @@ class Crypt(Command): return try: - cipher = Blowfish.new(hashlib.sha256(key).digest()) - except ImportError: - msg = "This command requires the 'pycrypto' package: https://www.dlitz.net/software/pycrypto/" - self.reply(data, msg) - return - - try: if data.command == "encrypt": - if len(text) % 8: - pad = 8 - len(text) % 8 - text = text.ljust(len(text) + pad, "\x00") - self.reply(data, cipher.encrypt(text).encode("hex")) + salt = os.urandom(saltlen) + kdf = pbkdf2.PBKDF2HMAC( + algorithm=hashes.SHA256(), + length=32, + salt=salt, + iterations=100000, + ) + f = fernet.Fernet(base64.urlsafe_b64encode(kdf.derive(key.encode()))) + ciphertext = f.encrypt(text.encode()) + self.reply(data, base64.b64encode(salt + ciphertext).decode()) else: - self.reply(data, cipher.decrypt(text.decode("hex"))) - except (ValueError, TypeError) as error: - self.reply(data, error.message) + if len(text) < saltlen: + raise ValueError("Ciphertext is too short") + raw = base64.b64decode(text) + salt, ciphertext = raw[:saltlen], raw[saltlen:] + kdf = pbkdf2.PBKDF2HMAC( + algorithm=hashes.SHA256(), + length=32, + salt=salt, + iterations=100000, + ) + f = fernet.Fernet(base64.urlsafe_b64encode(kdf.derive(key.encode()))) + self.reply(data, f.decrypt(ciphertext).decode()) + except ImportError: + self.reply(data, "This command requires the 'cryptography' package: https://cryptography.io/") + except Exception as error: + self.reply(data, "{}: {}".format(type(error).__name__, str(error))) diff --git a/earwigbot/commands/dictionary.py b/earwigbot/commands/dictionary.py index 9515979..9d3448b 100644 --- a/earwigbot/commands/dictionary.py +++ b/earwigbot/commands/dictionary.py @@ -63,23 +63,23 @@ class Dictionary(Command): level, languages = self.get_languages(entry) if not languages: - return u"Couldn't parse {0}!".format(page.url) + return "Couldn't parse {0}!".format(page.url) if "#" in term: # Requesting a specific language lcase_langs = {lang.lower(): lang for lang in languages} request = term.rsplit("#", 1)[1] lang = lcase_langs.get(request.lower()) if not lang: - resp = u"Language {0} not found in definition." + resp = "Language {0} not found in definition." return resp.format(request) definition = self.get_definition(languages[lang], level) - return u"({0}) {1}".format(lang, definition) + return "({0}) {1}".format(lang, definition) result = [] for lang, section in sorted(languages.items()): definition = self.get_definition(section, level) - result.append(u"({0}) {1}".format(lang, definition)) - return u"; ".join(result) + result.append("({0}) {1}".format(lang, definition)) + return "; ".join(result) def get_languages(self, entry, level=2): regex = r"(?:\A|\n)==\s*([a-zA-Z0-9_ ]*?)\s*==(?:\Z|\n)" @@ -93,7 +93,7 @@ class Dictionary(Command): split.pop(0) languages = {} - for i in xrange(0, len(split), 2): + for i in range(0, len(split), 2): languages[split[i]] = split[i + 1] return level, languages @@ -118,40 +118,40 @@ class Dictionary(Command): } blocks = "=" * (level + 1) defs = [] - for part, basename in parts_of_speech.iteritems(): - fullnames = [basename, "\{\{" + basename + "\}\}", - "\{\{" + basename.lower() + "\}\}"] + for part, basename in parts_of_speech.items(): + fullnames = [basename, r"\{\{" + basename + r"\}\}", + r"\{\{" + basename.lower() + r"\}\}"] for fullname in fullnames: - regex = blocks + "\s*" + fullname + "\s*" + blocks + regex = blocks + r"\s*" + fullname + r"\s*" + blocks if re.search(regex, section): - regex = blocks + "\s*" + fullname - regex += "\s*{0}(.*?)(?:(?:{0})|\Z)".format(blocks) + regex = blocks + r"\s*" + fullname + regex += r"\s*{0}(.*?)(?:(?:{0})|\Z)".format(blocks) bodies = re.findall(regex, section, re.DOTALL) if bodies: for body in bodies: definition = self.parse_body(body) if definition: - msg = u"\x02{0}\x0F {1}" + msg = "\x02{0}\x0F {1}" defs.append(msg.format(part, definition)) return "; ".join(defs) def parse_body(self, body): substitutions = [ - ("", ""), - ("(.*?)", ""), - ("\[\[[^\]|]*?\|([^\]|]*?)\]\]", r"\1"), - ("\{\{unsupported\|(.*?)\}\}", r"\1"), - ("\{\{(.*?) of\|([^}|]*?)(\|(.*?))?\}\}", r"\1 of \2."), - ("\{\{w\|(.*?)\}\}", r"\1"), - ("\{\{surname(.*?)\}\}", r"A surname."), - ("\{\{given name\|([^}|]*?)(\|(.*?))?\}\}", r"A \1 given name."), + (r"", ""), + (r"(.*?)", ""), + (r"\[\[[^\]|]*?\|([^\]|]*?)\]\]", r"\1"), + (r"\{\{unsupported\|(.*?)\}\}", r"\1"), + (r"\{\{(.*?) of\|([^}|]*?)(\|(.*?))?\}\}", r"\1 of \2."), + (r"\{\{w\|(.*?)\}\}", r"\1"), + (r"\{\{surname(.*?)\}\}", r"A surname."), + (r"\{\{given name\|([^}|]*?)(\|(.*?))?\}\}", r"A \1 given name."), ] senses = [] for line in body.splitlines(): line = line.strip() - if re.match("#\s*[^:*#]", line): + if re.match(r"#\s*[^:*#]", line): for regex, repl in substitutions: line = re.sub(regex, repl, line) line = self.strip_templates(line) @@ -167,7 +167,7 @@ class Dictionary(Command): result = [] # Number the senses incrementally for i, sense in enumerate(senses): - result.append(u"{0}. {1}".format(i + 1, sense)) + result.append("{0}. {1}".format(i + 1, sense)) return " ".join(result) def strip_templates(self, line): diff --git a/earwigbot/commands/editcount.py b/earwigbot/commands/editcount.py index 2b33f05..2bc26c0 100644 --- a/earwigbot/commands/editcount.py +++ b/earwigbot/commands/editcount.py @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from urllib import quote_plus +from urllib.parse import quote_plus from earwigbot import exceptions from earwigbot.commands import Command @@ -47,7 +47,7 @@ class Editcount(Command): return safe = quote_plus(user.name.encode("utf8")) - url = "http://tools.wmflabs.org/xtools-ec/index.php?user={0}&lang={1}&wiki={2}" - fullurl = url.format(safe, site.lang, site.project) + url = "https://xtools.wmflabs.org/ec/{}/{}".format(site.domain, safe) + fullurl = url.format(safe, site.domain) msg = "\x0302{0}\x0F has {1} edits ({2})." self.reply(data, msg.format(name, count, fullurl)) diff --git a/earwigbot/commands/help.py b/earwigbot/commands/help.py index 870cf6c..1b3253a 100644 --- a/earwigbot/commands/help.py +++ b/earwigbot/commands/help.py @@ -64,7 +64,7 @@ class Help(Command): if command.name == target or target in command.commands: if command.__doc__: doc = command.__doc__.replace("\n", "") - doc = re.sub("\s\s+", " ", doc) + doc = re.sub(r"\s\s+", " ", doc) msg = 'Help for command \x0303{0}\x0F: "{1}"' self.reply(data, msg.format(target, doc)) return diff --git a/earwigbot/commands/langcode.py b/earwigbot/commands/langcode.py index b1712b6..06a7d47 100644 --- a/earwigbot/commands/langcode.py +++ b/earwigbot/commands/langcode.py @@ -39,7 +39,7 @@ class Langcode(Command): del matrix["count"] del matrix["specials"] - for site in matrix.itervalues(): + for site in matrix.values(): if not site["name"]: continue name = site["name"].encode("utf8") diff --git a/earwigbot/commands/link.py b/earwigbot/commands/link.py index a54ea51..e0cafc0 100644 --- a/earwigbot/commands/link.py +++ b/earwigbot/commands/link.py @@ -32,21 +32,21 @@ class Link(Command): self.last = {} def check(self, data): - if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", data.msg): + if re.search(r"(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", data.msg): self.last[data.chan] = data.msg # Store most recent link return data.is_command and data.command == self.name def process(self, data): self.site = self.bot.wiki.get_site() - if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", data.msg): - links = u" , ".join(self.parse_line(data.msg)) + if re.search(r"(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", data.msg): + links = " , ".join(self.parse_line(data.msg)) self.reply(data, links.encode("utf8")) elif data.command == "link": if not data.args: if data.chan in self.last: - links = u" , ".join(self.parse_line(self.last[data.chan])) + links = " , ".join(self.parse_line(self.last[data.chan])) self.reply(data, links.encode("utf8")) else: self.reply(data, "What do you want me to link to?") @@ -60,17 +60,17 @@ class Link(Command): results = [] # Destroy {{{template parameters}}}: - line = re.sub("\{\{\{(.*?)\}\}\}", "", line) + line = re.sub(r"\{\{\{(.*?)\}\}\}", "", line) # Find all [[links]]: - links = re.findall("(\[\[(.*?)(\||\]\]))", line) + links = re.findall(r"(\[\[(.*?)(\||\]\]))", line) if links: # re.findall() returns a list of tuples, but we only want the 2nd # item in each tuple: results = [self.site.get_page(name[1]).url for name in links] # Find all {{templates}} - templates = re.findall("(\{\{(.*?)(\||\}\}))", line) + templates = re.findall(r"(\{\{(.*?)(\||\}\}))", line) if templates: p_tmpl = lambda name: self.site.get_page("Template:" + name).url templates = [p_tmpl(i[1]) for i in templates] diff --git a/earwigbot/commands/notes.py b/earwigbot/commands/notes.py index bf3a687..a79923d 100644 --- a/earwigbot/commands/notes.py +++ b/earwigbot/commands/notes.py @@ -96,7 +96,7 @@ class Notes(Command): except IndexError: msg = ("\x0302The Earwig Mini-Wiki\x0F: running v{0}. Subcommands " "are: {1}. You can get help on any with '!{2} help subcommand'.") - cmnds = ", ".join((info.keys())) + cmnds = ", ".join(info.keys()) self.reply(data, msg.format(self.version, cmnds, data.command)) return if command in self.aliases: diff --git a/earwigbot/commands/remind.py b/earwigbot/commands/remind.py index e2061d4..631b2a3 100644 --- a/earwigbot/commands/remind.py +++ b/earwigbot/commands/remind.py @@ -80,7 +80,7 @@ class Remind(Command): def _evaluate(node): """Convert an AST node into a real number or raise an exception.""" if isinstance(node, ast.Num): - if not isinstance(node.n, (int, long, float)): + if not isinstance(node.n, (int, float)): raise ValueError(node.n) return node.n elif isinstance(node, ast.BinOp): @@ -89,7 +89,7 @@ class Remind(Command): else: raise ValueError(node) - for unit, factor in time_units.iteritems(): + for unit, factor in time_units.items(): arg = arg.replace(unit, "*" + str(factor)) try: @@ -112,7 +112,7 @@ class Remind(Command): def _get_new_id(self): """Get a free ID for a new reminder.""" - taken = set(robj.id for robj in chain(*self.reminders.values())) + taken = set(robj.id for robj in chain(*list(self.reminders.values()))) num = random.choice(list(set(range(4096)) - taken)) return "R{0:03X}".format(num) @@ -232,7 +232,7 @@ class Remind(Command): fmt = lambda robj, user: '\x0303{0}\x0F (for {1} {2}, {3})'.format( robj.id, user, dest(robj.data), robj.end_time) - rlist = (fmt(rem, user) for user, rems in self.reminders.iteritems() + rlist = (fmt(rem, user) for user, rems in self.reminders.items() for rem in rems) self.reply(data, "All reminders: {0}.".format(", ".join(rlist))) @@ -363,7 +363,7 @@ class Remind(Command): permdb.set_attr("command:remind", "data", str(database)) -class _ReminderThread(object): +class _ReminderThread: """A single thread that handles reminders.""" def __init__(self, lock): @@ -429,7 +429,7 @@ class _ReminderThread(object): self._thread = None -class _Reminder(object): +class _Reminder: """Represents a single reminder.""" def __init__(self, rid, user, wait, message, data, cmdobj, end=None): self.id = rid diff --git a/earwigbot/commands/stalk.py b/earwigbot/commands/stalk.py index 55841c8..25bdef1 100644 --- a/earwigbot/commands/stalk.py +++ b/earwigbot/commands/stalk.py @@ -146,7 +146,7 @@ class Stalk(Command): return target.startswith("re:") and re.match(target[3:], tag) def _process(table, tag, flags): - for target, stalks in table.iteritems(): + for target, stalks in table.items(): if target == tag or _regex_match(target, tag): _update_chans(stalks, flags) @@ -161,7 +161,7 @@ class Stalk(Command): with self.bot.component_lock: frontend = self.bot.frontend if frontend and not frontend.is_stopped(): - for chan, users in chans.iteritems(): + for chan, users in chans.items(): if chan.startswith("#") and chan not in frontend.channels: continue pretty = rc.prettify(color=chan not in nocolor) @@ -178,7 +178,7 @@ class Stalk(Command): def _get_stalks_by_nick(nick, table): """Return a dictionary of stalklist entries by the given nick.""" entries = {} - for target, stalks in table.iteritems(): + for target, stalks in table.items(): for info in stalks: if info[0] == nick: if target in entries: @@ -293,7 +293,7 @@ class Stalk(Command): def _format_stalks(stalks): return ", ".join( "\x0302{0}\x0F ({1})".format(target, _format_chans(chans)) - for target, chans in stalks.iteritems()) + for target, chans in stalks.items()) users = self._get_stalks_by_nick(nick, self._users) pages = self._get_stalks_by_nick(nick, self._pages) @@ -325,7 +325,7 @@ class Stalk(Command): def _format_stalks(stalks): return ", ".join( "\x0302{0}\x0F ({1})".format(target, _format_data(data)) - for target, data in stalks.iteritems()) + for target, data in stalks.items()) users, pages = self._users, self._pages if users: diff --git a/earwigbot/commands/threads.py b/earwigbot/commands/threads.py index d877792..52edc53 100644 --- a/earwigbot/commands/threads.py +++ b/earwigbot/commands/threads.py @@ -80,7 +80,7 @@ class Threads(Command): t = "\x0302copyvio worker\x0F (site {0})" daemon_threads.append(t.format(tname[len("cvworker-"):])) else: - match = re.findall("^(.*?) \((.*?)\)$", tname) + match = re.findall(r"^(.*?) \((.*?)\)$", tname) if match: t = "\x0302{0}\x0F (id {1}, since {2})" thread_info = t.format(match[0][0], ident, match[0][1]) diff --git a/earwigbot/commands/time_command.py b/earwigbot/commands/time_command.py index 87534e9..5a2bc78 100644 --- a/earwigbot/commands/time_command.py +++ b/earwigbot/commands/time_command.py @@ -60,7 +60,7 @@ class Time(Command): try: tzinfo = pytz.timezone(timezone) except ImportError: - msg = "This command requires the 'pytz' package: http://pytz.sourceforge.net/" + msg = "This command requires the 'pytz' package: https://pypi.org/project/pytz/" self.reply(data, msg) return except pytz.exceptions.UnknownTimeZoneError: diff --git a/earwigbot/commands/watchers.py b/earwigbot/commands/watchers.py index 9fb7ec3..a3ef6bd 100644 --- a/earwigbot/commands/watchers.py +++ b/earwigbot/commands/watchers.py @@ -35,7 +35,7 @@ class Watchers(Command): site = self.bot.wiki.get_site() query = site.api_query(action="query", prop="info", inprop="watchers", titles=" ".join(data.args)) - page = query["query"]["pages"].values()[0] + page = list(query["query"]["pages"].values())[0] title = page["title"].encode("utf8") if "invalid" in page: diff --git a/earwigbot/config/__init__.py b/earwigbot/config/__init__.py index 40a85c4..a1abc25 100644 --- a/earwigbot/config/__init__.py +++ b/earwigbot/config/__init__.py @@ -20,9 +20,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import base64 from collections import OrderedDict from getpass import getpass -from hashlib import sha256 import logging import logging.handlers from os import mkdir, path @@ -38,12 +38,13 @@ from earwigbot.config.permissions import PermissionsDB from earwigbot.config.script import ConfigScript from earwigbot.exceptions import NoConfigError -Blowfish = importer.new("Crypto.Cipher.Blowfish") -bcrypt = importer.new("bcrypt") +fernet = importer.new("cryptography.fernet") +hashes = importer.new("cryptography.hazmat.primitives.hashes") +pbkdf2 = importer.new("cryptography.hazmat.primitives.kdf.pbkdf2") __all__ = ["BotConfig"] -class BotConfig(object): +class BotConfig: """ **EarwigBot: YAML Config File Manager** @@ -109,9 +110,9 @@ class BotConfig(object): return "".format(self.root_dir) def _handle_missing_config(self): - print "Config file missing or empty:", self._config_path + print("Config file missing or empty:", self._config_path) msg = "Would you like to create a config file now? [Y/n] " - choice = raw_input(msg) + choice = input(msg) if choice.lower().startswith("n"): raise NoConfigError() else: @@ -127,7 +128,7 @@ class BotConfig(object): try: self._data = yaml.load(fp, OrderedLoader) except yaml.YAMLError: - print "Error parsing config file {0}:".format(filename) + print("Error parsing config file {0}:".format(filename)) raise def _setup_logging(self): @@ -148,7 +149,7 @@ class BotConfig(object): mkdir(log_dir, stat.S_IWUSR|stat.S_IRUSR|stat.S_IXUSR) else: msg = "log_dir ({0}) exists but is not a directory!" - print msg.format(log_dir) + print(msg.format(log_dir)) return main_handler = hand(logfile("bot.log"), "midnight", 1, 7) @@ -173,7 +174,7 @@ class BotConfig(object): try: node._decrypt(self._decryption_cipher, nodes[:-1], nodes[-1]) except ValueError: - print "Error decrypting passwords:" + print("Error decrypting passwords:") raise @property @@ -257,7 +258,7 @@ class BotConfig(object): exit. Data from the config file is stored in six - :py:class:`~earwigbot.config.ConfigNode`\ s (:py:attr:`components`, + :py:class:`~earwigbot.config.ConfigNode`\\ s (:py:attr:`components`, :py:attr:`wiki`, :py:attr:`irc`, :py:attr:`commands`, :py:attr:`tasks`, :py:attr:`metadata`) for easy access (as well as the lower-level :py:attr:`data` attribute). If passwords are encrypted, we'll use @@ -283,18 +284,19 @@ class BotConfig(object): if self.is_encrypted(): if not self._decryption_cipher: try: - blowfish_new = Blowfish.new - hashpw = bcrypt.hashpw + salt = self.metadata["salt"] + kdf = pbkdf2.PBKDF2HMAC( + algorithm=hashes.SHA256(), + length=32, + salt=salt, + iterations=ConfigScript.PBKDF_ROUNDS, + ) except ImportError: - url1 = "http://www.mindrot.org/projects/py-bcrypt" - url2 = "https://www.dlitz.net/software/pycrypto/" - e = "Encryption requires the 'py-bcrypt' and 'pycrypto' packages: {0}, {1}" - raise NoConfigError(e.format(url1, url2)) + e = "Encryption requires the 'cryptography' package: https://cryptography.io/" + raise NoConfigError(e) key = getpass("Enter key to decrypt bot passwords: ") - self._decryption_cipher = blowfish_new(sha256(key).digest()) - signature = self.metadata["signature"] - if hashpw(key, signature) != signature: - raise RuntimeError("Incorrect password.") + self._decryption_cipher = fernet.Fernet( + base64.urlsafe_b64encode(kdf.derive(key.encode()))) for node, nodes in self._decryptable_nodes: self._decrypt(node, nodes) diff --git a/earwigbot/config/formatter.py b/earwigbot/config/formatter.py index 2dae1a4..1219d93 100644 --- a/earwigbot/config/formatter.py +++ b/earwigbot/config/formatter.py @@ -26,7 +26,7 @@ __all__ = ["BotFormatter"] class BotFormatter(logging.Formatter): def __init__(self, color=False): - self._format = super(BotFormatter, self).format + self._format = super().format if color: fmt = "[%(asctime)s %(lvl)s] %(name)s: %(message)s" self.format = lambda rec: self._format(self.format_color(rec)) @@ -34,7 +34,7 @@ class BotFormatter(logging.Formatter): fmt = "[%(asctime)s %(levelname)-8s] %(name)s: %(message)s" self.format = self._format datefmt = "%Y-%m-%d %H:%M:%S" - super(BotFormatter, self).__init__(fmt=fmt, datefmt=datefmt) + super().__init__(fmt=fmt, datefmt=datefmt) def format_color(self, record): l = record.levelname.ljust(8) diff --git a/earwigbot/config/node.py b/earwigbot/config/node.py index 8b8f9f8..b451b30 100644 --- a/earwigbot/config/node.py +++ b/earwigbot/config/node.py @@ -20,18 +20,19 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import base64 from collections import OrderedDict __all__ = ["ConfigNode"] -class ConfigNode(object): +class ConfigNode: def __init__(self): self._data = OrderedDict() def __repr__(self): return self._data - def __nonzero__(self): + def __bool__(self): return bool(self._data) def __len__(self): @@ -45,12 +46,12 @@ class ConfigNode(object): def __getattr__(self, key): if key == "_data": - return super(ConfigNode, self).__getattr__(key) + return super().__getattribute__(key) return self._data[key] def __setattr__(self, key, item): if key == "_data": - super(ConfigNode, self).__setattr__(key, item) + super().__setattr__(key, item) else: self._data[key] = item @@ -63,7 +64,7 @@ class ConfigNode(object): def _dump(self): data = self._data.copy() - for key, val in data.iteritems(): + for key, val in data.items(): if isinstance(val, ConfigNode): data[key] = val._dump() return data @@ -79,26 +80,26 @@ class ConfigNode(object): except KeyError: return if item in base: - ciphertext = base[item].decode("hex") - base[item] = cipher.decrypt(ciphertext).rstrip("\x00") + ciphertext = base64.b64decode(base[item]) + base[item] = cipher.decrypt(ciphertext).decode() def get(self, *args, **kwargs): return self._data.get(*args, **kwargs) def keys(self): - return self._data.keys() + return list(self._data.keys()) def values(self): - return self._data.values() + return list(self._data.values()) def items(self): - return self._data.items() + return list(self._data.items()) def iterkeys(self): - return self._data.iterkeys() + return iter(self._data.keys()) def itervalues(self): - return self._data.itervalues() + return iter(self._data.values()) def iteritems(self): - return self._data.iteritems() + return iter(self._data.items()) diff --git a/earwigbot/config/ordered_yaml.py b/earwigbot/config/ordered_yaml.py index 5cf1e52..60fdcf9 100644 --- a/earwigbot/config/ordered_yaml.py +++ b/earwigbot/config/ordered_yaml.py @@ -24,7 +24,7 @@ Based on: * https://gist.github.com/844388 - * http://pyyaml.org/attachment/ticket/161/use_ordered_dict.py + * https://pyyaml.org/attachment/ticket/161/use_ordered_dict.py with modifications. """ @@ -39,10 +39,10 @@ class OrderedLoader(yaml.Loader): """A YAML loader that loads mappings into ordered dictionaries.""" def __init__(self, *args, **kwargs): - super(OrderedLoader, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) constructor = type(self).construct_yaml_map - self.add_constructor(u"tag:yaml.org,2002:map", constructor) - self.add_constructor(u"tag:yaml.org,2002:omap", constructor) + self.add_constructor("tag:yaml.org,2002:map", constructor) + self.add_constructor("tag:yaml.org,2002:omap", constructor) def construct_yaml_map(self, node): data = OrderedDict() @@ -63,7 +63,7 @@ class OrderedLoader(yaml.Loader): key = self.construct_object(key_node, deep=deep) try: hash(key) - except TypeError, exc: + except TypeError as exc: raise yaml.constructor.ConstructorError( "while constructing a mapping", node.start_mark, "found unacceptable key ({0})".format(exc), @@ -77,7 +77,7 @@ class OrderedDumper(yaml.SafeDumper): """A YAML dumper that dumps ordered dictionaries into mappings.""" def __init__(self, *args, **kwargs): - super(OrderedDumper, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.add_representer(OrderedDict, type(self).represent_dict) def represent_mapping(self, tag, mapping, flow_style=None): diff --git a/earwigbot/config/permissions.py b/earwigbot/config/permissions.py index a92a49a..17ed862 100644 --- a/earwigbot/config/permissions.py +++ b/earwigbot/config/permissions.py @@ -26,7 +26,7 @@ from threading import Lock __all__ = ["PermissionsDB"] -class PermissionsDB(object): +class PermissionsDB: """ **EarwigBot: Permissions Database Manager** @@ -198,7 +198,7 @@ class PermissionsDB(object): with self._db_access_lock, sqlite.connect(self._dbfile) as conn: conn.execute(query, (user, key)) -class User(object): +class User: """A class that represents an IRC user for the purpose of testing rules.""" def __init__(self, nick, ident, host): self.nick = nick diff --git a/earwigbot/config/script.py b/earwigbot/config/script.py index 112829a..4a9ffc6 100644 --- a/earwigbot/config/script.py +++ b/earwigbot/config/script.py @@ -20,9 +20,10 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import base64 from collections import OrderedDict from getpass import getpass -from hashlib import sha256 +import os from os import chmod, makedirs, mkdir, path import re import stat @@ -34,8 +35,9 @@ import yaml from earwigbot import exceptions, importer from earwigbot.config.ordered_yaml import OrderedDumper -Blowfish = importer.new("Crypto.Cipher.Blowfish") -bcrypt = importer.new("bcrypt") +fernet = importer.new("cryptography.fernet") +hashes = importer.new("cryptography.hazmat.primitives.hashes") +pbkdf2 = importer.new("cryptography.hazmat.primitives.kdf.pbkdf2") __all__ = ["ConfigScript"] @@ -48,11 +50,11 @@ def process(bot, rc): pass """ -class ConfigScript(object): +class ConfigScript: """A script to guide a user through the creation of a new config file.""" WIDTH = 79 PROMPT = "\x1b[32m> \x1b[0m" - BCRYPT_ROUNDS = 12 + PBKDF_ROUNDS = 100000 def __init__(self, config): self.config = config @@ -72,24 +74,24 @@ class ConfigScript(object): self._lang = None def _print(self, text): - print fill(re.sub("\s\s+", " ", text), self.WIDTH) + print(fill(re.sub(r"\s\s+", " ", text), self.WIDTH)) def _print_no_nl(self, text): - sys.stdout.write(fill(re.sub("\s\s+", " ", text), self.WIDTH)) + sys.stdout.write(fill(re.sub(r"\s\s+", " ", text), self.WIDTH)) sys.stdout.flush() def _pause(self): - raw_input(self.PROMPT + "Press enter to continue: ") + input(self.PROMPT + "Press enter to continue: ") def _ask(self, text, default=None, require=True): text = self.PROMPT + text if default: text += " \x1b[33m[{0}]\x1b[0m".format(default) - lines = wrap(re.sub("\s\s+", " ", text), self.WIDTH) + lines = wrap(re.sub(r"\s\s+", " ", text), self.WIDTH) if len(lines) > 1: - print "\n".join(lines[:-1]) + print("\n".join(lines[:-1])) while True: - answer = raw_input(lines[-1] + " ") or default + answer = input(lines[-1] + " ") or default if answer or not require: return answer @@ -99,11 +101,11 @@ class ConfigScript(object): text += " \x1b[33m[Y/n]\x1b[0m" else: text += " \x1b[33m[y/N]\x1b[0m" - lines = wrap(re.sub("\s\s+", " ", text), self.WIDTH) + lines = wrap(re.sub(r"\s\s+", " ", text), self.WIDTH) if len(lines) > 1: - print "\n".join(lines[:-1]) + print("\n".join(lines[:-1])) while True: - answer = raw_input(lines[-1] + " ").lower() + answer = input(lines[-1] + " ").lower() if not answer: return default if answer.startswith("y"): @@ -119,59 +121,57 @@ class ConfigScript(object): def _encrypt(self, password): if self._cipher: - mod = len(password) % 8 - if mod: - password = password.ljust(len(password) + (8 - mod), "\x00") - return self._cipher.encrypt(password).encode("hex") + return base64.b64encode(self._cipher.encrypt(password.encode())).decode() else: return password def _ask_list(self, text): - print fill(re.sub("\s\s+", " ", self.PROMPT + text), self.WIDTH) - print "[one item per line; blank line to end]:" + print(fill(re.sub(r"\s\s+", " ", self.PROMPT + text), self.WIDTH)) + print("[one item per line; blank line to end]:") result = [] while True: - line = raw_input(self.PROMPT) + line = input(self.PROMPT) if line: result.append(line) else: return result def _set_metadata(self): - print + print() self.data["metadata"] = OrderedDict([("version", 1)]) self._print("""I can encrypt passwords stored in your config file in addition to preventing other users on your system from reading the file. Encryption is recommended if the bot - is to run on a public server like Wikimedia Labs, but - otherwise the need to enter a key every time you start - the bot may be an inconvenience.""") + is to run on a public server like Toolforge, but the + need to enter a key every time you start the bot may be + an inconvenience.""") self.data["metadata"]["encryptPasswords"] = False if self._ask_bool("Encrypt stored passwords?"): key = getpass(self.PROMPT + "Enter an encryption key: ") - msg = "Running {0} rounds of bcrypt...".format(self.BCRYPT_ROUNDS) - self._print_no_nl(msg) + self._print_no_nl("Generating key...") try: - salt = bcrypt.gensalt(self.BCRYPT_ROUNDS) - signature = bcrypt.hashpw(key, salt) - self._cipher = Blowfish.new(sha256(key).digest()) + salt = os.urandom(16) + kdf = pbkdf2.PBKDF2HMAC( + algorithm=hashes.SHA256(), + length=32, + salt=salt, + iterations=self.PBKDF_ROUNDS, + ) + self._cipher = fernet.Fernet(base64.urlsafe_b64encode(kdf.derive(key.encode()))) except ImportError: - print " error!" - self._print("""Encryption requires the 'py-bcrypt' and - 'pycrypto' packages:""") - strt, end = " * \x1b[36m", "\x1b[0m" - print strt + "http://www.mindrot.org/projects/py-bcrypt/" + end - print strt + "https://www.dlitz.net/software/pycrypto/" + end + print(" error!") + self._print("""Encryption requires the 'cryptography' package: + https://cryptography.io/""") self._print("""I will disable encryption for now; restart configuration after installing these packages if you want it.""") self._pause() else: self.data["metadata"]["encryptPasswords"] = True - self.data["metadata"]["signature"] = signature - print " done." + self.data["metadata"]["salt"] = base64.b64encode(salt).decode() + print(" done.") - print + print() self._print("""The bot can temporarily store its logs in the logs/ subdirectory. Error logs are kept for a month whereas normal logs are kept for a week. If you disable this, @@ -180,7 +180,7 @@ class ConfigScript(object): self.data["metadata"]["enableLogging"] = logging def _set_components(self): - print + print() self._print("""The bot contains three separate components that can run independently of each other.""") self._print("""- The IRC front-end runs on a normal IRC server, like @@ -209,8 +209,8 @@ class ConfigScript(object): try: site = self.config.bot.wiki.add_site(**kwargs) except exceptions.APIError as exc: - print " API error!" - print "\x1b[31m" + exc.message + "\x1b[0m" + print(" API error!") + print("\x1b[31m" + exc.message + "\x1b[0m") question = "Would you like to re-enter the site information?" if self._ask_bool(question): return self._set_wiki() @@ -219,8 +219,8 @@ class ConfigScript(object): raise exceptions.NoConfigError() return self._set_wiki() except exceptions.LoginError as exc: - print " login error!" - print "\x1b[31m" + exc.message + "\x1b[0m" + print(" login error!") + print("\x1b[31m" + exc.message + "\x1b[0m") question = "Would you like to re-enter your login information?" if self._ask_bool(question): self.data["wiki"]["username"] = self._ask("Bot username:") @@ -232,17 +232,17 @@ class ConfigScript(object): question = "Would you like to re-enter the site information?" if self._ask_bool(question): return self._set_wiki() - print + print() self._print("""Moving on. You can modify the login information stored in the bot's config in the future.""") self.data["wiki"]["password"] = None # Clear so we don't login self.config.wiki._load(self.data["wiki"]) self._print_no_nl("Trying to connect to the site...") site = self.config.bot.wiki.add_site(**kwargs) - print " success." + print(" success.") self.data["wiki"]["password"] = password # Reset original value else: - print " success." + print(" success.") # Remember to store the encrypted password: password = self._encrypt(self.data["wiki"]["password"]) @@ -250,7 +250,7 @@ class ConfigScript(object): return site def _set_wiki(self): - print + print() self._wmf = self._ask_bool("""Will this bot run on Wikimedia Foundation wikis, like Wikipedia?""") if self._wmf: @@ -296,7 +296,7 @@ class ConfigScript(object): self.data["wiki"]["shutoff"] = {} msg = "Would you like to enable an automatic shutoff page for the bot?" if self._ask_bool(msg): - print + print() self._print("""The page title can contain two wildcards: $1 will be substituted with the bot's username, and $2 with the current task number. This can be used to implement a @@ -311,7 +311,7 @@ class ConfigScript(object): def _set_irc(self): if self.data["components"]["irc_frontend"]: - print + print() frontend = self.data["irc"]["frontend"] = OrderedDict() msg = "Hostname of the frontend's IRC server, without 'irc://':" frontend["host"] = self._ask(msg, "irc.freenode.net") @@ -328,7 +328,7 @@ class ConfigScript(object): frontend["nickservPassword"] = ns_pass chan_question = "Frontend channels to join by default:" frontend["channels"] = self._ask_list(chan_question) - print + print() self._print("""The bot keeps a database of its admins (users who can use certain sensitive commands) and owners (users who can quit the bot and modify its access @@ -347,7 +347,7 @@ class ConfigScript(object): frontend = {} if self.data["components"]["irc_watcher"]: - print + print() watcher = self.data["irc"]["watcher"] = OrderedDict() if self._wmf: watcher["host"] = "irc.wikimedia.org" @@ -375,7 +375,7 @@ class ConfigScript(object): else: chan_question = "Watcher channels to join by default:" watcher["channels"] = self._ask_list(chan_question) - print + print() self._print("""I am now creating a blank 'rules.py' file, which will determine how the bot handles messages received from the IRC watcher. It contains a process() @@ -390,13 +390,13 @@ class ConfigScript(object): self.data["irc"]["version"] = "EarwigBot - $1 - Python/$2 https://github.com/earwig/earwigbot" def _set_commands(self): - print + print() msg = """Would you like to disable the default IRC commands? You can fine-tune which commands are disabled later on.""" if (not self.data["components"]["irc_frontend"] or self._ask_bool(msg, default=False)): self.data["commands"]["disable"] = True - print + print() self._print("""I am now creating the 'commands/' directory, where you can place custom IRC commands and plugins. Creating your own commands is described in the documentation.""") @@ -404,7 +404,7 @@ class ConfigScript(object): self._pause() def _set_tasks(self): - print + print() self._print("""I am now creating the 'tasks/' directory, where you can place custom bot tasks and plugins. Creating your own tasks is described in the documentation.""") @@ -412,22 +412,22 @@ class ConfigScript(object): self._pause() def _set_schedule(self): - print + print() self._print("""The final section of your config file, 'schedule', is a list of bot tasks to be started by the wiki scheduler. Each entry contains cron-like time quantifiers and a list of tasks. For example, the following starts the 'foobot' task every hour on the half-hour:""") - print "\x1b[33mschedule:" - print " - minute: 30" - print " tasks:" - print " - foobot\x1b[0m" + print("\x1b[33mschedule:") + print(" - minute: 30") + print(" tasks:") + print(" - foobot\x1b[0m") self._print("""The following starts the 'barbot' task with the keyword arguments 'action="baz"' every Monday at 05:00 UTC:""") - print "\x1b[33m - week_day: 1" - print " hour: 5" - print " tasks:" - print ' - ["barbot", {"action": "baz"}]\x1b[0m' + print("\x1b[33m - week_day: 1") + print(" hour: 5") + print(" tasks:") + print(' - ["barbot", {"action": "baz"}]\x1b[0m') self._print("""The full list of quantifiers is minute, hour, month_day, month, and week_day. See the documentation for more information.""") @@ -449,7 +449,7 @@ class ConfigScript(object): open(self.config.path, "w").close() chmod(self.config.path, stat.S_IRUSR|stat.S_IWUSR) except IOError: - print "I can't seem to write to the config file:" + print("I can't seem to write to the config file:") raise self._set_metadata() self._set_components() @@ -461,7 +461,7 @@ class ConfigScript(object): self._set_tasks() if components["wiki_scheduler"]: self._set_schedule() - print + print() self._print("""I am now saving config.yml with your settings. YAML is a relatively straightforward format and you should be able to update these settings in the future when necessary. diff --git a/earwigbot/exceptions.py b/earwigbot/exceptions.py index d452a76..61f1572 100644 --- a/earwigbot/exceptions.py +++ b/earwigbot/exceptions.py @@ -268,5 +268,5 @@ class ParserRedirectError(CopyvioCheckError): exposed in client code. """ def __init__(self, url): - super(ParserRedirectError, self).__init__() + super().__init__() self.url = url diff --git a/earwigbot/irc/connection.py b/earwigbot/irc/connection.py index 2d10341..723ca83 100644 --- a/earwigbot/irc/connection.py +++ b/earwigbot/irc/connection.py @@ -28,7 +28,7 @@ from earwigbot.exceptions import BrokenSocketError __all__ = ["IRCConnection"] -class IRCConnection(object): +class IRCConnection: """Interface with an IRC server.""" def __init__(self, host, port, nick, ident, realname, logger): @@ -84,7 +84,7 @@ class IRCConnection(object): if not data: # Socket isn't giving us any data, so it is dead or broken: raise BrokenSocketError() - return data + return data.decode(errors="ignore") def _send(self, msg, hidelog=False): """Send data to the server.""" @@ -93,7 +93,7 @@ class IRCConnection(object): if time_since_last < 0.75: sleep(0.75 - time_since_last) try: - self._sock.sendall(msg + "\r\n") + self._sock.sendall(msg.encode() + b"\r\n") except socket.error: self._is_running = False else: @@ -177,7 +177,7 @@ class IRCConnection(object): def ident(self): """Our ident on the server, like ``"earwig"``. - See http://en.wikipedia.org/wiki/Ident. + See https://en.wikipedia.org/wiki/Ident_protocol. """ return self._ident diff --git a/earwigbot/irc/data.py b/earwigbot/irc/data.py index 43264f8..a89d9b9 100644 --- a/earwigbot/irc/data.py +++ b/earwigbot/irc/data.py @@ -24,7 +24,7 @@ import re __all__ = ["Data"] -class Data(object): +class Data: """Store data from an individual line received on IRC.""" def __init__(self, my_nick, line, msgtype): @@ -160,7 +160,7 @@ class Data(object): @property def ident(self): - """`Ident `_ of the sender.""" + """`Ident `_ of the sender.""" return self._ident @property diff --git a/earwigbot/irc/frontend.py b/earwigbot/irc/frontend.py index 3503ba7..a2a38fe 100644 --- a/earwigbot/irc/frontend.py +++ b/earwigbot/irc/frontend.py @@ -43,9 +43,8 @@ class Frontend(IRCConnection): def __init__(self, bot): self.bot = bot cf = bot.config.irc["frontend"] - base = super(Frontend, self) - base.__init__(cf["host"], cf["port"], cf["nick"], cf["ident"], - cf["realname"], bot.logger.getChild("frontend")) + super().__init__(cf["host"], cf["port"], cf["nick"], cf["ident"], + cf["realname"], bot.logger.getChild("frontend")) self._auth_wait = False self._channels = set() diff --git a/earwigbot/irc/rc.py b/earwigbot/irc/rc.py index 557d1b0..318a103 100644 --- a/earwigbot/irc/rc.py +++ b/earwigbot/irc/rc.py @@ -24,7 +24,7 @@ import re __all__ = ["RC"] -class RC(object): +class RC: """Store data from an event received from our IRC watcher.""" re_color = re.compile("\x03([0-9]{1,2}(,[0-9]{1,2})?)?") re_edit = re.compile("\A\[\[(.*?)\]\]\s(.*?)\s(https?://.*?)\s\*\s(.*?)\s\*\s(.*?)\Z") diff --git a/earwigbot/irc/watcher.py b/earwigbot/irc/watcher.py index bca8360..b049fef 100644 --- a/earwigbot/irc/watcher.py +++ b/earwigbot/irc/watcher.py @@ -40,9 +40,8 @@ class Watcher(IRCConnection): def __init__(self, bot): self.bot = bot cf = bot.config.irc["watcher"] - base = super(Watcher, self) - base.__init__(cf["host"], cf["port"], cf["nick"], cf["ident"], - cf["realname"], bot.logger.getChild("watcher")) + super().__init__(cf["host"], cf["port"], cf["nick"], cf["ident"], + cf["realname"], bot.logger.getChild("watcher")) self._prepare_process_hook() self._connect() diff --git a/earwigbot/lazy.py b/earwigbot/lazy.py index 6301c18..b6c07b3 100644 --- a/earwigbot/lazy.py +++ b/earwigbot/lazy.py @@ -22,12 +22,13 @@ """ Implements a hierarchy of importing classes as defined in `PEP 302 -`_ to load modules in a safe yet lazy +`_ to load modules in a safe yet lazy manner, so that they can be referred to by name but are not actually loaded until they are used (i.e. their attributes are read or modified). """ from imp import acquire_lock, release_lock +import importlib import sys from threading import RLock from types import ModuleType @@ -46,7 +47,7 @@ def _mock_get(self, attr): if _real_get(self, "_unloaded"): type(self)._unloaded = False try: - reload(self) + importlib.reload(self) except ImportError as exc: type(self).__getattribute__ = _create_failing_get(exc) del type(self)._lock @@ -77,7 +78,7 @@ class _LazyModule(type): release_lock() -class LazyImporter(object): +class LazyImporter: """An importer for modules that are loaded lazily. This inserts itself into :py:data:`sys.meta_path`, storing a dictionary of diff --git a/earwigbot/managers.py b/earwigbot/managers.py index 1562bdd..0debfb8 100644 --- a/earwigbot/managers.py +++ b/earwigbot/managers.py @@ -32,7 +32,7 @@ from earwigbot.tasks import Task __all__ = ["CommandManager", "TaskManager"] -class _ResourceManager(object): +class _ResourceManager: """ **EarwigBot: Resource Manager** @@ -69,7 +69,7 @@ class _ResourceManager(object): def __iter__(self): with self.lock: - for resource in self._resources.itervalues(): + for resource in self._resources.values(): yield resource def _is_disabled(self, name): @@ -201,7 +201,7 @@ class CommandManager(_ResourceManager): Manages (i.e., loads, reloads, and calls) IRC commands. """ def __init__(self, bot): - super(CommandManager, self).__init__(bot, "commands", Command) + super().__init__(bot, "commands", Command) def _wrap_check(self, command, data): """Check whether a command should be called, catching errors.""" @@ -248,7 +248,7 @@ class TaskManager(_ResourceManager): Manages (i.e., loads, reloads, schedules, and runs) wiki bot tasks. """ def __init__(self, bot): - super(TaskManager, self).__init__(bot, "tasks", Task) + super().__init__(bot, "tasks", Task) def _wrapper(self, task, **kwargs): """Wrapper for task classes: run the task and catch any errors.""" diff --git a/earwigbot/tasks/__init__.py b/earwigbot/tasks/__init__.py index b502f88..3f10846 100644 --- a/earwigbot/tasks/__init__.py +++ b/earwigbot/tasks/__init__.py @@ -25,7 +25,7 @@ from earwigbot import wiki __all__ = ["Task"] -class Task(object): +class Task: """ **EarwigBot: Base Bot Task** @@ -48,8 +48,8 @@ class Task(object): This is called once immediately after the task class is loaded by the task manager (in :py:meth:`tasks.load() `). Don't override this - directly; if you do, remember to place ``super(Task, self).__init()`` - first. Use :py:meth:`setup` for typical task-init/setup needs. + directly; if you do, remember to place ``super().__init()`` first. + Use :py:meth:`setup` for typical task-init/setup needs. """ self.bot = bot self.config = bot.config diff --git a/earwigbot/tasks/wikiproject_tagger.py b/earwigbot/tasks/wikiproject_tagger.py index a23eb3a..9e67cac 100644 --- a/earwigbot/tasks/wikiproject_tagger.py +++ b/earwigbot/tasks/wikiproject_tagger.py @@ -183,11 +183,11 @@ class WikiProjectTagger(Task): """ prefix = title.split(":", 1)[0] if prefix == title: - return u":".join((site.namespace_id_to_name(assumed), title)) + return ":".join((site.namespace_id_to_name(assumed), title)) try: site.namespace_name_to_id(prefix) except exceptions.NamespaceNotFoundError: - return u":".join((site.namespace_id_to_name(assumed), title)) + return ":".join((site.namespace_id_to_name(assumed), title)) return title def get_names(self, site, banner): @@ -197,7 +197,7 @@ class WikiProjectTagger(Task): banner = banner.split(":", 1)[1] page = site.get_page(title) if page.exists != page.PAGE_EXISTS: - self.logger.error(u"Banner [[%s]] does not exist", title) + self.logger.error("Banner [[%s]] does not exist", title) return banner, None names = {banner, title} @@ -208,17 +208,17 @@ class WikiProjectTagger(Task): if backlink["ns"] == constants.NS_TEMPLATE: names.add(backlink["title"].split(":", 1)[1]) - log = u"Found %s aliases for banner [[%s]]" + log = "Found %s aliases for banner [[%s]]" self.logger.debug(log, len(names), title) return banner, names def process_category(self, page, job, recursive): """Try to tag all pages in the given category.""" if page.title in job.processed_cats: - self.logger.debug(u"Skipping category, already processed: [[%s]]", + self.logger.debug("Skipping category, already processed: [[%s]]", page.title) return - self.logger.info(u"Processing category: [[%s]]", page.title) + self.logger.info("Processing category: [[%s]]", page.title) job.processed_cats.add(page.title) if job.tag_categories: @@ -243,7 +243,7 @@ class WikiProjectTagger(Task): page = page.toggle_talk() if page.title in job.processed_pages: - self.logger.debug(u"Skipping page, already processed: [[%s]]", + self.logger.debug("Skipping page, already processed: [[%s]]", page.title) return job.processed_pages.add(page.title) @@ -259,7 +259,7 @@ class WikiProjectTagger(Task): self.process_new_page(page, job) return except exceptions.InvalidPageError: - self.logger.error(u"Skipping invalid page: [[%s]]", page.title) + self.logger.error("Skipping invalid page: [[%s]]", page.title) return is_update = False @@ -270,28 +270,28 @@ class WikiProjectTagger(Task): is_update = True break else: - log = u"Skipping page: [[%s]]; already tagged with '%s'" + log = "Skipping page: [[%s]]; already tagged with '%s'" self.logger.info(log, page.title, template.name) return if job.only_with: if not any(template.name.matches(job.only_with) for template in code.ifilter_templates(recursive=True)): - log = u"Skipping page: [[%s]]; fails only-with condition" + log = "Skipping page: [[%s]]; fails only-with condition" self.logger.info(log, page.title) return if is_update: - old_banner = unicode(banner) + old_banner = str(banner) self.update_banner(banner, job, code) if banner == old_banner: - log = u"Skipping page: [[%s]]; already tagged and no updates" + log = "Skipping page: [[%s]]; already tagged and no updates" self.logger.info(log, page.title) return - self.logger.info(u"Updating banner on page: [[%s]]", page.title) + self.logger.info("Updating banner on page: [[%s]]", page.title) banner = banner.encode("utf8") else: - self.logger.info(u"Tagging page: [[%s]]", page.title) + self.logger.info("Tagging page: [[%s]]", page.title) banner = self.make_banner(job, code) shell = self.get_banner_shell(code) if shell: @@ -299,22 +299,22 @@ class WikiProjectTagger(Task): else: self.add_banner(code, banner) - self.save_page(page, job, unicode(code), banner) + self.save_page(page, job, str(code), banner) def process_new_page(self, page, job): """Try to tag a *page* that doesn't exist yet using the *job*.""" if job.nocreate or job.only_with: - log = u"Skipping nonexistent page: [[%s]]" + log = "Skipping nonexistent page: [[%s]]" self.logger.info(log, page.title) else: - self.logger.info(u"Tagging new page: [[%s]]", page.title) + self.logger.info("Tagging new page: [[%s]]", page.title) banner = self.make_banner(job) self.save_page(page, job, banner, banner) def save_page(self, page, job, text, banner): """Save a page with an updated banner.""" if job.dry_run: - self.logger.debug(u"[DRY RUN] Banner: %s", banner) + self.logger.debug("[DRY RUN] Banner: %s", banner) else: summary = job.summary.replace("$3", banner) page.edit(text, self.make_summary(summary), minor=True) @@ -365,7 +365,7 @@ class WikiProjectTagger(Task): classes = {klass: 0 for klass in classnames} for template in code.ifilter_templates(recursive=True): if template.has("class"): - value = unicode(template.get("class").value).lower() + value = str(template.get("class").value).lower() if value in classes: classes[value] += 1 @@ -388,14 +388,14 @@ class WikiProjectTagger(Task): if not shells: shells = code.filter_templates(matches=regex, recursive=True) if shells: - log = u"Inserting banner into shell: %s" + log = "Inserting banner into shell: %s" self.logger.debug(log, shells[0].name) return shells[0] def add_banner_to_shell(self, shell, banner): """Add *banner* to *shell*.""" if shell.has_param(1): - if unicode(shell.get(1).value).endswith("\n"): + if str(shell.get(1).value).endswith("\n"): banner += "\n" else: banner = "\n" + banner @@ -410,16 +410,16 @@ class WikiProjectTagger(Task): name = template.name.lower().replace("_", " ") for regex in self.TOP_TEMPS: if re.match(regex, name): - self.logger.debug(u"Skipping past top template: %s", name) + self.logger.debug("Skipping past top template: %s", name) predecessor = template break if "wikiproject" in name or name.startswith("wp"): - self.logger.debug(u"Skipping past banner template: %s", name) + self.logger.debug("Skipping past banner template: %s", name) predecessor = template if predecessor: self.logger.debug("Inserting banner after template") - if not unicode(predecessor).endswith("\n"): + if not str(predecessor).endswith("\n"): banner = "\n" + banner post = code.index(predecessor) + 1 if len(code.nodes) > post and not code.get(post).startswith("\n"): @@ -429,7 +429,7 @@ class WikiProjectTagger(Task): self.logger.debug("Inserting banner at beginning") code.insert(0, banner + "\n") -class _Job(object): +class _Job: """Represents a single wikiproject-tagging task. Stores information on the banner to add, the edit summary to use, whether diff --git a/earwigbot/util.py b/earwigbot/util.py index 29286a0..be31dab 100755 --- a/earwigbot/util.py +++ b/earwigbot/util.py @@ -128,8 +128,8 @@ def main(): level = logging.DEBUG elif args.quiet: level = logging.WARNING - print version - print + print(version) + print() bot = Bot(path.abspath(args.path), level=level) if args.task: diff --git a/earwigbot/wiki/__init__.py b/earwigbot/wiki/__init__.py index a84c8e6..57473ad 100644 --- a/earwigbot/wiki/__init__.py +++ b/earwigbot/wiki/__init__.py @@ -25,8 +25,8 @@ This is a collection of classes and functions to read from and write to Wikipedia and other wiki sites. No connection whatsoever to `python-wikitools -`_ written by `Mr.Z-man -`_, other than a similar purpose. +`_ written by `Mr.Z-man +`_, other than a similar purpose. We share no code. Import the toolset directly with ``from earwigbot import wiki``. If using the diff --git a/earwigbot/wiki/category.py b/earwigbot/wiki/category.py index f5edcc4..b9f154f 100644 --- a/earwigbot/wiki/category.py +++ b/earwigbot/wiki/category.py @@ -99,7 +99,7 @@ class Category(Page): base = row[0].replace("_", " ").decode("utf8") namespace = self.site.namespace_id_to_name(row[1]) if namespace: - title = u":".join((namespace, base)) + title = ":".join((namespace, base)) else: # Avoid doing a silly (albeit valid) ":Pagename" thing title = base yield self.site.get_page(title, follow_redirects=follow, @@ -109,7 +109,7 @@ class Category(Page): """Return the size of the category using the API.""" result = self.site.api_query(action="query", prop="categoryinfo", titles=self.title) - info = result["query"]["pages"].values()[0]["categoryinfo"] + info = list(result["query"]["pages"].values())[0]["categoryinfo"] return info[member_type] def _get_size_via_sql(self, member_type): diff --git a/earwigbot/wiki/copyvios/__init__.py b/earwigbot/wiki/copyvios/__init__.py index 9b1c616..b511b47 100644 --- a/earwigbot/wiki/copyvios/__init__.py +++ b/earwigbot/wiki/copyvios/__init__.py @@ -21,7 +21,7 @@ # SOFTWARE. from time import sleep, time -from urllib2 import build_opener +from urllib.request import build_opener from earwigbot import exceptions from earwigbot.wiki.copyvios.markov import MarkovChain @@ -32,7 +32,7 @@ from earwigbot.wiki.copyvios.workers import ( __all__ = ["CopyvioMixIn", "globalize", "localize"] -class CopyvioMixIn(object): +class CopyvioMixIn: """ **EarwigBot: Wiki Toolset: Copyright Violation MixIn** @@ -114,7 +114,7 @@ class CopyvioMixIn(object): (:exc:`.UnknownSearchEngineError`, :exc:`.SearchQueryError`, ...) on errors. """ - log = u"Starting copyvio check for [[{0}]]" + log = "Starting copyvio check for [[{0}]]" self._logger.info(log.format(self.title)) searcher = self._get_search_engine() parser = ArticleTextParser(self.get(), args={ @@ -151,7 +151,7 @@ class CopyvioMixIn(object): if short_circuit and workspace.finished: workspace.possible_miss = True break - log = u"[[{0}]] -> querying {1} for {2!r}" + log = "[[{0}]] -> querying {1} for {2!r}" self._logger.debug(log.format(self.title, searcher.name, chunk)) workspace.enqueue(searcher.search(chunk)) num_queries += 1 @@ -183,7 +183,7 @@ class CopyvioMixIn(object): Since no searching is done, neither :exc:`.UnknownSearchEngineError` nor :exc:`.SearchQueryError` will be raised. """ - log = u"Starting copyvio compare for [[{0}]] against {1}" + log = "Starting copyvio compare for [[{0}]] against {1}" self._logger.info(log.format(self.title, url)) article = MarkovChain(ArticleTextParser(self.get()).strip()) workspace = CopyvioWorkspace( diff --git a/earwigbot/wiki/copyvios/exclusions.py b/earwigbot/wiki/copyvios/exclusions.py index a82147b..62a6daa 100644 --- a/earwigbot/wiki/copyvios/exclusions.py +++ b/earwigbot/wiki/copyvios/exclusions.py @@ -24,7 +24,7 @@ import re import sqlite3 as sqlite from threading import Lock from time import time -from urlparse import urlparse +from urllib.parse import urlparse from earwigbot import exceptions @@ -45,7 +45,7 @@ DEFAULT_SOURCES = { _RE_STRIP_PREFIX = r"^https?://(www\.)?" -class ExclusionsDB(object): +class ExclusionsDB: """ **EarwigBot: Wiki Toolset: Exclusions Database Manager** @@ -77,7 +77,7 @@ class ExclusionsDB(object): """ query = "INSERT INTO sources VALUES (?, ?);" sources = [] - for sitename, pages in DEFAULT_SOURCES.iteritems(): + for sitename, pages in DEFAULT_SOURCES.items(): for page in pages: sources.append((sitename, page)) @@ -168,11 +168,11 @@ class ExclusionsDB(object): max_staleness = 60 * 60 * (12 if sitename == "all" else 48) time_since_update = int(time() - self._get_last_update(sitename)) if force or time_since_update > max_staleness: - log = u"Updating stale database: {0} (last updated {1} seconds ago)" + log = "Updating stale database: {0} (last updated {1} seconds ago)" self._logger.info(log.format(sitename, time_since_update)) self._update(sitename) else: - log = u"Database for {0} is still fresh (last updated {1} seconds ago)" + log = "Database for {0} is still fresh (last updated {1} seconds ago)" self._logger.debug(log.format(sitename, time_since_update)) if sitename != "all": self.sync("all", force=force) @@ -202,11 +202,11 @@ class ExclusionsDB(object): else: matches = normalized.startswith(excl) if matches: - log = u"Exclusion detected in {0} for {1}" + log = "Exclusion detected in {0} for {1}" self._logger.debug(log.format(sitename, url)) return True - log = u"No exclusions in {0} for {1}".format(sitename, url) + log = "No exclusions in {0} for {1}".format(sitename, url) self._logger.debug(log) return False diff --git a/earwigbot/wiki/copyvios/markov.py b/earwigbot/wiki/copyvios/markov.py index 9a4717d..b5e6606 100644 --- a/earwigbot/wiki/copyvios/markov.py +++ b/earwigbot/wiki/copyvios/markov.py @@ -25,7 +25,7 @@ from re import sub, UNICODE __all__ = ["EMPTY", "EMPTY_INTERSECTION", "MarkovChain", "MarkovChainIntersection"] -class MarkovChain(object): +class MarkovChain: """Implements a basic ngram Markov chain of words.""" START = -1 END = -2 @@ -43,7 +43,7 @@ class MarkovChain(object): words = ([self.START] * padding) + words + ([self.END] * padding) chain = {} - for i in xrange(len(words) - self.degree + 1): + for i in range(len(words) - self.degree + 1): phrase = tuple(words[i:i+self.degree]) if phrase in chain: chain[phrase] += 1 @@ -53,7 +53,7 @@ class MarkovChain(object): def _get_size(self): """Return the size of the Markov chain: the total number of nodes.""" - return sum(self.chain.itervalues()) + return sum(self.chain.values()) def __repr__(self): """Return the canonical string representation of the MarkovChain.""" diff --git a/earwigbot/wiki/copyvios/parsers.py b/earwigbot/wiki/copyvios/parsers.py index 5d694aa..9dea64a 100644 --- a/earwigbot/wiki/copyvios/parsers.py +++ b/earwigbot/wiki/copyvios/parsers.py @@ -23,9 +23,9 @@ import json from os import path import re -from StringIO import StringIO -import urllib -import urlparse +from io import StringIO +import urllib.parse +import urllib.request import mwparserfromhell @@ -40,7 +40,7 @@ pdfpage = importer.new("pdfminer.pdfpage") __all__ = ["ArticleTextParser", "get_parser"] -class _BaseTextParser(object): +class _BaseTextParser: """Base class for a parser that handles text.""" TYPE = None @@ -93,8 +93,8 @@ class ArticleTextParser(_BaseTextParser): self._merge_templates(param.value) chunks.append(param.value) if chunks: - subst = u" ".join(map(unicode, chunks)) - code.replace(template, u" " + subst + u" ") + subst = " ".join(map(str, chunks)) + code.replace(template, " " + subst + " ") else: code.remove(template) @@ -178,7 +178,7 @@ class ArticleTextParser(_BaseTextParser): self._merge_templates(wikicode) clean = wikicode.strip_code(normalize=True, collapse=True) - self.clean = re.sub("\n\n+", "\n", clean).strip() + self.clean = re.sub(r"\n\n+", "\n", clean).strip() return self.clean def chunk(self, max_chunks, min_query=8, max_query=128, split_thresh=32): @@ -191,7 +191,7 @@ class ArticleTextParser(_BaseTextParser): and *max_chunks* is low, so we don't end up just searching for just the first paragraph. - This is implemented using :py:mod:`nltk` (http://nltk.org/). A base + This is implemented using :py:mod:`nltk` (https://nltk.org/). A base directory (*nltk_dir*) is required to store nltk's punctuation database, and should be passed as an argument to the constructor. It is typically located in the bot's working directory. @@ -223,7 +223,7 @@ class ArticleTextParser(_BaseTextParser): """ schemes = ("http://", "https://") links = mwparserfromhell.parse(self.text).ifilter_external_links() - return [unicode(link.url) for link in links + return [str(link.url) for link in links if link.url.startswith(schemes)] @@ -288,7 +288,7 @@ class _HTMLParser(_BaseTextParser): "dynamicviews": "1", "rewriteforssl": "true", } - raw = self._open(url + urllib.urlencode(params), + raw = self._open(url + urllib.parse.urlencode(params), allow_content_types=["application/json"]) if raw is None: return "" @@ -307,9 +307,9 @@ class _HTMLParser(_BaseTextParser): """Return the actual text contained within an HTML document. Implemented using :py:mod:`BeautifulSoup ` - (http://www.crummy.com/software/BeautifulSoup/). + (https://www.crummy.com/software/BeautifulSoup/). """ - url = urlparse.urlparse(self.url) if self.url else None + url = urllib.parse.urlparse(self.url) if self.url else None soup = self._get_soup(self.text) if not soup.body: # No tag present in HTML -> @@ -336,8 +336,8 @@ class _PDFParser(_BaseTextParser): """A parser that can extract text from a PDF file.""" TYPE = "PDF" substitutions = [ - (u"\x0c", u"\n"), - (u"\u2022", u" "), + ("\x0c", "\n"), + ("\u2022", " "), ] def parse(self): @@ -359,7 +359,7 @@ class _PDFParser(_BaseTextParser): value = output.getvalue().decode("utf8") for orig, new in self.substitutions: value = value.replace(orig, new) - return re.sub("\n\n+", "\n", value).strip() + return re.sub(r"\n\n+", "\n", value).strip() class _PlainTextParser(_BaseTextParser): diff --git a/earwigbot/wiki/copyvios/result.py b/earwigbot/wiki/copyvios/result.py index ca73036..9997be6 100644 --- a/earwigbot/wiki/copyvios/result.py +++ b/earwigbot/wiki/copyvios/result.py @@ -27,7 +27,7 @@ from earwigbot.wiki.copyvios.markov import EMPTY, EMPTY_INTERSECTION __all__ = ["CopyvioSource", "CopyvioCheckResult"] -class CopyvioSource(object): +class CopyvioSource: """ **EarwigBot: Wiki Toolset: Copyvio Source** @@ -110,7 +110,7 @@ class CopyvioSource(object): event.wait() -class CopyvioCheckResult(object): +class CopyvioCheckResult: """ **EarwigBot: Wiki Toolset: Copyvio Check Result** @@ -167,9 +167,9 @@ class CopyvioCheckResult(object): def get_log_message(self, title): """Build a relevant log message for this copyvio check result.""" if not self.sources: - log = u"No violation for [[{0}]] (no sources; {1} queries; {2} seconds)" + log = "No violation for [[{0}]] (no sources; {1} queries; {2} seconds)" return log.format(title, self.queries, self.time) - log = u"{0} for [[{1}]] (best: {2} ({3} confidence); {4} sources; {5} queries; {6} seconds)" + log = "{0} for [[{1}]] (best: {2} ({3} confidence); {4} sources; {5} queries; {6} seconds)" is_vio = "Violation detected" if self.violation else "No violation" return log.format(is_vio, title, self.url, self.confidence, len(self.sources), self.queries, self.time) diff --git a/earwigbot/wiki/copyvios/search.py b/earwigbot/wiki/copyvios/search.py index d05ec82..dedffd8 100644 --- a/earwigbot/wiki/copyvios/search.py +++ b/earwigbot/wiki/copyvios/search.py @@ -24,20 +24,18 @@ from gzip import GzipFile from json import loads from re import sub as re_sub from socket import error -from StringIO import StringIO -from urllib import quote, urlencode -from urllib2 import URLError +from io import StringIO +from urllib.parse import quote, urlencode +from urllib.error import URLError from earwigbot import importer from earwigbot.exceptions import SearchQueryError lxml = importer.new("lxml") -oauth = importer.new("oauth2") -__all__ = ["BingSearchEngine", "GoogleSearchEngine", "YahooBOSSSearchEngine", - "YandexSearchEngine", "SEARCH_ENGINES"] +__all__ = ["BingSearchEngine", "GoogleSearchEngine", "YandexSearchEngine", "SEARCH_ENGINES"] -class _BaseSearchEngine(object): +class _BaseSearchEngine: """Base class for a simple search engine interface.""" name = "Base" @@ -95,7 +93,7 @@ class BingSearchEngine(_BaseSearchEngine): name = "Bing" def __init__(self, cred, opener): - super(BingSearchEngine, self).__init__(cred, opener) + super().__init__(cred, opener) key = self.cred["key"] auth = (key + ":" + key).encode("base64").replace("\n", "") @@ -170,60 +168,6 @@ class GoogleSearchEngine(_BaseSearchEngine): return [] -class YahooBOSSSearchEngine(_BaseSearchEngine): - """A search engine interface with Yahoo! BOSS.""" - name = "Yahoo! BOSS" - - @staticmethod - def _build_url(base, params): - """Works like urllib.urlencode(), but uses %20 for spaces over +.""" - enc = lambda s: quote(s.encode("utf8"), safe="") - args = ["=".join((enc(k), enc(v))) for k, v in params.iteritems()] - return base + "?" + "&".join(args) - - @staticmethod - def requirements(): - return ["oauth2"] - - def search(self, query): - """Do a Yahoo! BOSS web search for *query*. - - Returns a list of URLs ranked by relevance (as determined by Yahoo). - Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors. - """ - key, secret = self.cred["key"], self.cred["secret"] - consumer = oauth.Consumer(key=key, secret=secret) - - url = "http://yboss.yahooapis.com/ysearch/web" - params = { - "oauth_version": oauth.OAUTH_VERSION, - "oauth_nonce": oauth.generate_nonce(), - "oauth_timestamp": oauth.Request.make_timestamp(), - "oauth_consumer_key": consumer.key, - "q": '"' + query.encode("utf8") + '"', - "count": str(self.count), - "type": "html,text,pdf", - "format": "json", - } - - req = oauth.Request(method="GET", url=url, parameters=params) - req.sign_request(oauth.SignatureMethod_HMAC_SHA1(), consumer, None) - - result = self._open(self._build_url(url, req)) - - try: - res = loads(result) - except ValueError: - err = "Yahoo! BOSS Error: JSON could not be decoded" - raise SearchQueryError(err) - - try: - results = res["bossresponse"]["web"]["results"] - except KeyError: - return [] - return [result["url"] for result in results] - - class YandexSearchEngine(_BaseSearchEngine): """A search engine interface with Yandex Search.""" name = "Yandex" @@ -263,6 +207,5 @@ class YandexSearchEngine(_BaseSearchEngine): SEARCH_ENGINES = { "Bing": BingSearchEngine, "Google": GoogleSearchEngine, - "Yahoo! BOSS": YahooBOSSSearchEngine, "Yandex": YandexSearchEngine } diff --git a/earwigbot/wiki/copyvios/workers.py b/earwigbot/wiki/copyvios/workers.py index e3081ec..a3d70cd 100644 --- a/earwigbot/wiki/copyvios/workers.py +++ b/earwigbot/wiki/copyvios/workers.py @@ -25,17 +25,18 @@ import collections from collections import deque import functools from gzip import GzipFile -from httplib import HTTPException +from http.client import HTTPException from logging import getLogger from math import log -from Queue import Empty, Queue +from queue import Empty, Queue from socket import error as socket_error -from StringIO import StringIO +from io import StringIO from struct import error as struct_error from threading import Lock, Thread import time -from urllib2 import build_opener, Request, URLError -import urlparse +from urllib.error import URLError +import urllib.parse +from urllib.request import build_opener, Request from earwigbot import importer from earwigbot.exceptions import ParserExclusionError, ParserRedirectError @@ -72,7 +73,7 @@ def globalize(num_workers=8): return _global_queues = _CopyvioQueues() - for i in xrange(num_workers): + for i in range(num_workers): worker = _CopyvioWorker("global-{0}".format(i), _global_queues) worker.start() _global_workers.append(worker) @@ -91,14 +92,14 @@ def localize(): if not _is_globalized: return - for i in xrange(len(_global_workers)): + for i in range(len(_global_workers)): _global_queues.unassigned.put((StopIteration, None)) _global_queues = None _global_workers = [] _is_globalized = False -class _CopyvioQueues(object): +class _CopyvioQueues: """Stores data necessary to maintain the various queues during a check.""" def __init__(self): @@ -107,7 +108,7 @@ class _CopyvioQueues(object): self.unassigned = Queue() -class _CopyvioWorker(object): +class _CopyvioWorker: """A multithreaded URL opener/parser instance.""" def __init__(self, name, queues, until=None): @@ -149,8 +150,8 @@ class _CopyvioWorker(object): None will be returned for URLs that cannot be read for whatever reason. """ - parsed = urlparse.urlparse(url) - if not isinstance(url, unicode): + parsed = urllib.parse.urlparse(url) + if not isinstance(url, str): url = url.encode("utf8") extra_headers = {} url, _ = self._try_map_proxy_url(url, parsed, extra_headers) @@ -251,7 +252,7 @@ class _CopyvioWorker(object): site, queue = self._queues.unassigned.get(timeout=timeout) if site is StopIteration: raise StopIteration - self._logger.debug(u"Acquired new site queue: {0}".format(site)) + self._logger.debug("Acquired new site queue: {0}".format(site)) self._site = site self._queue = queue @@ -260,7 +261,7 @@ class _CopyvioWorker(object): if not self._site: self._acquire_new_site() - logmsg = u"Fetching source URL from queue {0}" + logmsg = "Fetching source URL from queue {0}" self._logger.debug(logmsg.format(self._site)) self._queues.lock.acquire() try: @@ -273,7 +274,7 @@ class _CopyvioWorker(object): self._queues.lock.release() return self._dequeue() - self._logger.debug(u"Got source URL: {0}".format(source.url)) + self._logger.debug("Got source URL: {0}".format(source.url)) if source.skipped: self._logger.debug("Source has been skipped") self._queues.lock.release() @@ -331,7 +332,7 @@ class _CopyvioWorker(object): thread.start() -class CopyvioWorkspace(object): +class CopyvioWorkspace: """Manages a single copyvio check distributed across threads.""" def __init__(self, article, min_confidence, max_time, logger, headers, @@ -359,7 +360,7 @@ class CopyvioWorkspace(object): else: self._queues = _CopyvioQueues() self._num_workers = num_workers - for i in xrange(num_workers): + for i in range(num_workers): name = "local-{0:04}.{1}".format(id(self) % 10000, i) _CopyvioWorker(name, self._queues, self._until).start() @@ -371,7 +372,7 @@ class CopyvioWorkspace(object): # reaches the default "suspect" confidence threshold, at which # point it transitions to polynomial growth with a limit of 1 as # (delta / article) approaches 1. - # A graph can be viewed here: http://goo.gl/mKPhvr + # A graph can be viewed here: https://goo.gl/mKPhvr ratio = delta / article if ratio <= 0.52763: return -log(1 - ratio) @@ -383,7 +384,7 @@ class CopyvioWorkspace(object): # This piecewise function was derived from experimental data using # reference points at (0, 0), (100, 0.5), (250, 0.75), (500, 0.9), # and (1000, 0.95), with a limit of 1 as delta approaches infinity. - # A graph can be viewed here: http://goo.gl/lVl7or + # A graph can be viewed here: https://goo.gl/lVl7or if delta <= 100: return delta / (delta + 100) elif delta <= 250: @@ -417,22 +418,22 @@ class CopyvioWorkspace(object): self.sources.append(source) if self._exclude_check and self._exclude_check(url): - self._logger.debug(u"enqueue(): exclude {0}".format(url)) + self._logger.debug("enqueue(): exclude {0}".format(url)) source.excluded = True source.skip() continue if self._short_circuit and self.finished: - self._logger.debug(u"enqueue(): auto-skip {0}".format(url)) + self._logger.debug("enqueue(): auto-skip {0}".format(url)) source.skip() continue try: key = tldextract.extract(url).registered_domain except ImportError: # Fall back on very naive method - from urlparse import urlparse - key = u".".join(urlparse(url).netloc.split(".")[-2:]) + from urllib.parse import urlparse + key = ".".join(urlparse(url).netloc.split(".")[-2:]) - logmsg = u"enqueue(): {0} {1} -> {2}" + logmsg = "enqueue(): {0} {1} -> {2}" if key in self._queues.sites: self._logger.debug(logmsg.format("append", key, url)) self._queues.sites[key].append(source) @@ -449,7 +450,7 @@ class CopyvioWorkspace(object): conf = self._calculate_confidence(delta) else: conf = 0.0 - self._logger.debug(u"compare(): {0} -> {1}".format(source.url, conf)) + self._logger.debug("compare(): {0} -> {1}".format(source.url, conf)) with self._finish_lock: if source_chain: source.update(conf, source_chain, delta) @@ -468,7 +469,7 @@ class CopyvioWorkspace(object): with self._finish_lock: pass # Wait for any remaining comparisons to be finished if not _is_globalized: - for i in xrange(self._num_workers): + for i in range(self._num_workers): self._queues.unassigned.put((StopIteration, None)) def get_result(self, num_queries=0): diff --git a/earwigbot/wiki/page.py b/earwigbot/wiki/page.py index 302188f..43d18d3 100644 --- a/earwigbot/wiki/page.py +++ b/earwigbot/wiki/page.py @@ -24,7 +24,7 @@ from hashlib import md5 from logging import getLogger, NullHandler import re from time import gmtime, strftime -from urllib import quote +from urllib.parse import quote import mwparserfromhell @@ -94,7 +94,7 @@ class Page(CopyvioMixIn): __init__() will not do any API queries, but it will use basic namespace logic to determine our namespace ID and if we are a talkpage. """ - super(Page, self).__init__(site) + super().__init__(site) self._site = site self._title = title.strip() self._follow_redirects = self._keep_following = follow_redirects @@ -157,7 +157,7 @@ class Page(CopyvioMixIn): contains "[") it will always be invalid, and cannot be edited. """ if self._exists == self.PAGE_INVALID: - e = u"Page '{0}' is invalid.".format(self._title) + e = "Page '{0}' is invalid.".format(self._title) raise exceptions.InvalidPageError(e) def _assert_existence(self): @@ -169,7 +169,7 @@ class Page(CopyvioMixIn): """ self._assert_validity() if self._exists == self.PAGE_MISSING: - e = u"Page '{0}' does not exist.".format(self._title) + e = "Page '{0}' does not exist.".format(self._title) raise exceptions.PageNotFoundError(e) def _load(self): @@ -217,11 +217,11 @@ class Page(CopyvioMixIn): self._exists = self.PAGE_INVALID return - res = result["query"]["pages"].values()[0] + res = list(result["query"]["pages"].values())[0] self._title = res["title"] # Normalize our pagename/title self._is_redirect = "redirect" in res - self._pageid = int(result["query"]["pages"].keys()[0]) + self._pageid = int(list(result["query"]["pages"].keys())[0]) if self._pageid < 0: if "missing" in res: # If it has a negative ID and it's missing; we can still get @@ -267,7 +267,7 @@ class Page(CopyvioMixIn): rvprop="content|timestamp", rvslots="main", titles=self._title) - res = result["query"]["pages"].values()[0] + res = list(result["query"]["pages"].values())[0] try: revision = res["revisions"][0] self._content = revision["slots"]["main"]["*"] @@ -322,7 +322,7 @@ class Page(CopyvioMixIn): def _build_edit_params(self, text, summary, minor, bot, force, section, captcha_id, captcha_word): """Given some keyword arguments, build an API edit query string.""" - unitxt = text.encode("utf8") if isinstance(text, unicode) else text + unitxt = text.encode("utf8") if isinstance(text, str) else text hashed = md5(unitxt).hexdigest() # Checksum to ensure text is correct params = { "action": "edit", "title": self._title, "text": text, @@ -455,7 +455,7 @@ class Page(CopyvioMixIn): encoded = self._title.encode("utf8").replace(" ", "_") slug = quote(encoded, safe="/:").decode("utf8") path = self.site._article_path.replace("$1", slug) - return u"".join((self.site.url, path)) + return "".join((self.site.url, path)) @property def namespace(self): @@ -546,7 +546,7 @@ class Page(CopyvioMixIn): """ if self._namespace < 0: ns = self.site.namespace_id_to_name(self._namespace) - e = u"Pages in the {0} namespace can't have talk pages.".format(ns) + e = "Pages in the {0} namespace can't have talk pages.".format(ns) raise exceptions.InvalidPageError(e) if self._is_talkpage: @@ -564,7 +564,7 @@ class Page(CopyvioMixIn): # If the new page is in namespace 0, don't do ":Title" (it's correct, # but unnecessary), just do "Title": if new_prefix: - new_title = u":".join((new_prefix, body)) + new_title = ":".join((new_prefix, body)) else: new_title = body diff --git a/earwigbot/wiki/site.py b/earwigbot/wiki/site.py index 94c5a0e..60b6f08 100644 --- a/earwigbot/wiki/site.py +++ b/earwigbot/wiki/site.py @@ -20,14 +20,13 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from cookielib import CookieJar +from http.cookiejar import CookieJar from json import dumps from logging import getLogger, NullHandler from os.path import expanduser from threading import RLock from time import sleep, time -from urllib import unquote_plus -from urlparse import urlparse +from urllib.parse import unquote_plus, urlparse import requests from requests_oauthlib import OAuth1 @@ -42,7 +41,7 @@ oursql = importer.new("oursql") __all__ = ["Site"] -class Site(object): +class Site: """ **EarwigBot: Wiki Toolset: Site** @@ -208,9 +207,9 @@ class Site(object): def _unicodeify(self, value, encoding="utf8"): """Return input as unicode if it's not unicode to begin with.""" - if isinstance(value, unicode): + if isinstance(value, str): return value - return unicode(value, encoding) + return str(value, encoding) def _api_query(self, params, tries=0, wait=5, ignore_maxlag=False, no_assert=False, ae_retry=True): @@ -304,7 +303,7 @@ class Site(object): info = res["error"]["info"] except (TypeError, KeyError): # If there's no error code/info, return if "query" in res and "tokens" in res["query"]: - for name, token in res["query"]["tokens"].iteritems(): + for name, token in res["query"]["tokens"].items(): self._tokens[name.split("token")[0]] = token return res @@ -574,7 +573,7 @@ class Site(object): establish a connection. """ args = self._sql_data - for key, value in kwargs.iteritems(): + for key, value in kwargs.items(): args[key] = value if "read_default_file" not in args and "user" not in args and "passwd" not in args: args["read_default_file"] = expanduser("~/.my.cnf") @@ -588,7 +587,7 @@ class Site(object): try: self._sql_conn = oursql.connect(**args) except ImportError: - e = "SQL querying requires the 'oursql' package: http://packages.python.org/oursql/" + e = "SQL querying requires the 'oursql' package: https://pythonhosted.org/oursql/" raise exceptions.SQLError(e) def _get_service_order(self): @@ -743,7 +742,7 @@ class Site(object): See :py:meth:`_sql_connect` for information on how a connection is acquired. Also relevant is `oursql's documentation - `_ for details on that package. + `_ for details on that package. """ if not cursor_class: if dict_cursor: @@ -865,7 +864,7 @@ class Site(object): if lname in lnames: return ns_id - e = u"There is no namespace with name '{0}'.".format(name) + e = "There is no namespace with name '{0}'.".format(name) raise exceptions.NamespaceNotFoundError(e) def get_page(self, title, follow_redirects=False, pageid=None): @@ -900,7 +899,7 @@ class Site(object): """ catname = self._unicodeify(catname) prefix = self.namespace_id_to_name(constants.NS_CATEGORY) - pagename = u':'.join((prefix, catname)) + pagename = ':'.join((prefix, catname)) return Category(self, pagename, follow_redirects, pageid, self._logger) def get_user(self, username=None): diff --git a/earwigbot/wiki/sitesdb.py b/earwigbot/wiki/sitesdb.py index b9028fe..a01ef64 100644 --- a/earwigbot/wiki/sitesdb.py +++ b/earwigbot/wiki/sitesdb.py @@ -21,8 +21,8 @@ # SOFTWARE. from collections import OrderedDict -from cookielib import LWPCookieJar, LoadError import errno +from http.cookiejar import LWPCookieJar, LoadError from os import chmod, path from platform import python_version import stat @@ -35,7 +35,7 @@ from earwigbot.wiki.site import Site __all__ = ["SitesDB"] -class SitesDB(object): +class SitesDB: """ **EarwigBot: Wiki Toolset: Sites Database Manager** @@ -207,8 +207,8 @@ class SitesDB(object): if not sql: sql = config.wiki.get("sql", OrderedDict()).copy() - for key, value in sql.iteritems(): - if isinstance(value, basestring) and "$1" in value: + for key, value in sql.items(): + if isinstance(value, str) and "$1" in value: sql[key] = value.replace("$1", name) return Site(name=name, project=project, lang=lang, base_url=base_url, @@ -257,9 +257,9 @@ class SitesDB(object): name = site.name sites_data = (name, site.project, site.lang, site._base_url, site._article_path, site._script_path) - sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()] + sql_data = [(name, key, val) for key, val in site._sql_data.items()] ns_data = [] - for ns_id, ns_names in site._namespaces.iteritems(): + for ns_id, ns_names in site._namespaces.items(): ns_data.append((name, ns_id, ns_names.pop(0), True)) for ns_name in ns_names: ns_data.append((name, ns_id, ns_name, False)) diff --git a/earwigbot/wiki/user.py b/earwigbot/wiki/user.py index 4f12102..258158a 100644 --- a/earwigbot/wiki/user.py +++ b/earwigbot/wiki/user.py @@ -30,7 +30,7 @@ from earwigbot.wiki.page import Page __all__ = ["User"] -class User(object): +class User: """ **EarwigBot: Wiki Toolset: User** @@ -106,7 +106,7 @@ class User(object): if not hasattr(self, attr): self._load_attributes() if not self._exists: - e = u"User '{0}' does not exist.".format(self._name) + e = "User '{0}' does not exist.".format(self._name) raise UserNotFoundError(e) return getattr(self, attr) @@ -143,7 +143,7 @@ class User(object): self._groups = res["groups"] try: - self._rights = res["rights"].values() + self._rights = list(res["rights"].values()) except AttributeError: self._rights = res["rights"] self._editcount = res["editcount"] diff --git a/setup.py b/setup.py index d4688d0..a928353 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (C) 2009-2019 Ben Kurtovic +# Copyright (C) 2009-2021 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -26,31 +26,29 @@ from setuptools import setup, find_packages from earwigbot import __version__ required_deps = [ - "PyYAML >= 3.12", # Parsing config files - "mwparserfromhell >= 0.5", # Parsing wikicode for manipulation - "requests >= 2.21.0", # Wiki API requests - "requests_oauthlib >= 1.2.0", # API authentication via OAuth + "PyYAML >= 5.4.1", # Parsing config files + "mwparserfromhell >= 0.6", # Parsing wikicode for manipulation + "requests >= 2.25.1", # Wiki API requests + "requests_oauthlib >= 1.3.0", # API authentication via OAuth ] extra_deps = { "crypto": [ - "py-bcrypt >= 0.4", # Hashing the bot key in the config file - "pycrypto >= 2.6.1", # Storing bot passwords + keys in the config file + "cryptography >= 3.4.7", # Storing bot passwords + keys in the config file ], "sql": [ - "oursql >= 0.9.3.2", # Interfacing with MediaWiki databases + "oursql3 >= 0.9.4", # Interfacing with MediaWiki databases ], "copyvios": [ - "beautifulsoup4 >= 4.6.0", # Parsing/scraping HTML - "cchardet >= 2.1.1", # Encoding detection for BeautifulSoup - "lxml >= 3.8.0", # Faster parser for BeautifulSoup - "nltk >= 3.2.4", # Parsing sentences to split article content - "oauth2 >= 1.9.0", # Interfacing with Yahoo! BOSS Search - "pdfminer >= 20140328", # Extracting text from PDF files - "tldextract >= 2.1.0", # Getting domains for the multithreaded workers + "beautifulsoup4 >= 4.9.3", # Parsing/scraping HTML + "cchardet >= 2.1.7", # Encoding detection for BeautifulSoup + "lxml >= 4.6.3", # Faster parser for BeautifulSoup + "nltk >= 3.6.1", # Parsing sentences to split article content + "pdfminer >= 20191125", # Extracting text from PDF files + "tldextract >= 3.1.0", # Getting domains for the multithreaded workers ], "time": [ - "pytz >= 2017.2", # Handling timezones for the !time IRC command + "pytz >= 2021.1", # Handling timezones for the !time IRC command ], } @@ -81,7 +79,7 @@ setup( "License :: OSI Approved :: MIT License", "Natural Language :: English", "Operating System :: OS Independent", - "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", "Topic :: Communications :: Chat :: Internet Relay Chat", "Topic :: Internet :: WWW/HTTP" ], diff --git a/tests/test_calc.py b/tests/test_calc.py index db46935..55c5e3c 100644 --- a/tests/test_calc.py +++ b/tests/test_calc.py @@ -28,7 +28,7 @@ from tests import CommandTestCase class TestCalc(CommandTestCase): def setUp(self): - super(TestCalc, self).setUp(Command) + super().setUp(Command) def test_check(self): self.assertFalse(self.command.check(self.make_msg("bloop"))) diff --git a/tests/test_test.py b/tests/test_test.py index 2fe160b..81e6e38 100644 --- a/tests/test_test.py +++ b/tests/test_test.py @@ -28,7 +28,7 @@ from tests import CommandTestCase class TestTest(CommandTestCase): def setUp(self): - super(TestTest, self).setUp(Command) + super().setUp(Command) def test_check(self): self.assertFalse(self.command.check(self.make_msg("bloop"))) @@ -42,7 +42,7 @@ class TestTest(CommandTestCase): self.command.process(self.make_msg("test")) self.assertSaidIn(["Hey \x02Foo\x0F!", "'sup \x02Foo\x0F?"]) - for i in xrange(64): + for i in range(64): test() if __name__ == "__main__":