@@ -23,14 +23,6 @@ config Package | |||||
:members: | :members: | ||||
:undoc-members: | :undoc-members: | ||||
:mod:`ordered_yaml` Module | |||||
.. automodule:: earwigbot.config.ordered_yaml | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:mod:`permissions` Module | :mod:`permissions` Module | ||||
------------------------- | ------------------------- | ||||
@@ -30,13 +30,6 @@ earwigbot Package | |||||
:undoc-members: | :undoc-members: | ||||
:show-inheritance: | :show-inheritance: | ||||
:mod:`lazy` Module | |||||
.. automodule:: earwigbot.lazy | |||||
:members: | |||||
:undoc-members: | |||||
:mod:`managers` Module | :mod:`managers` Module | ||||
---------------------- | ---------------------- | ||||
@@ -62,9 +62,6 @@ build-backend = "setuptools.build_meta" | |||||
exclude = [ | exclude = [ | ||||
# TODO | # TODO | ||||
"src/earwigbot/commands", | "src/earwigbot/commands", | ||||
"src/earwigbot/config", | |||||
"src/earwigbot/lazy.py", | |||||
"src/earwigbot/irc", | |||||
"src/earwigbot/tasks", | "src/earwigbot/tasks", | ||||
"src/earwigbot/wiki/copyvios" | "src/earwigbot/wiki/copyvios" | ||||
] | ] | ||||
@@ -26,8 +26,17 @@ See :file:`README.rst` for an overview, or the :file:`docs/` directory for detai | |||||
This documentation is also available `online <https://packages.python.org/earwigbot>`_. | This documentation is also available `online <https://packages.python.org/earwigbot>`_. | ||||
""" | """ | ||||
import typing | |||||
__all__ = [ | |||||
"bot", | |||||
"cli", | |||||
"commands", | |||||
"config", | |||||
"exceptions", | |||||
"irc", | |||||
"managers", | |||||
"tasks", | |||||
"wiki", | |||||
] | |||||
__author__ = "Ben Kurtovic" | __author__ = "Ben Kurtovic" | ||||
__copyright__ = "Copyright (C) 2009-2024 Ben Kurtovic" | __copyright__ = "Copyright (C) 2009-2024 Ben Kurtovic" | ||||
__license__ = "MIT License" | __license__ = "MIT License" | ||||
@@ -54,30 +63,14 @@ if not __release__: | |||||
finally: | finally: | ||||
del _get_git_commit_id | del _get_git_commit_id | ||||
from earwigbot import lazy | |||||
importer = lazy.LazyImporter() | |||||
if typing.TYPE_CHECKING: | |||||
from earwigbot import ( | |||||
bot, | |||||
cli, | |||||
commands, | |||||
config, | |||||
exceptions, | |||||
irc, | |||||
managers, | |||||
tasks, | |||||
wiki, | |||||
) | |||||
else: | |||||
bot = importer.new("earwigbot.bot") | |||||
cli = importer.new("earwigbot.cli") | |||||
commands = importer.new("earwigbot.commands") | |||||
config = importer.new("earwigbot.config") | |||||
exceptions = importer.new("earwigbot.exceptions") | |||||
irc = importer.new("earwigbot.irc") | |||||
managers = importer.new("earwigbot.managers") | |||||
tasks = importer.new("earwigbot.tasks") | |||||
wiki = importer.new("earwigbot.wiki") | |||||
from earwigbot import ( | |||||
bot, | |||||
cli, | |||||
commands, | |||||
config, | |||||
exceptions, | |||||
irc, | |||||
managers, | |||||
tasks, | |||||
wiki, | |||||
) |
@@ -22,13 +22,8 @@ import base64 | |||||
import hashlib | import hashlib | ||||
import os | import os | ||||
from earwigbot import importer | |||||
from earwigbot.commands import Command | from earwigbot.commands import Command | ||||
fernet = importer.new("cryptography.fernet") | |||||
hashes = importer.new("cryptography.hazmat.primitives.hashes") | |||||
pbkdf2 = importer.new("cryptography.hazmat.primitives.kdf.pbkdf2") | |||||
class Crypt(Command): | class Crypt(Command): | ||||
"""Provides hash functions with !hash (!hash list for supported algorithms) | """Provides hash functions with !hash (!hash list for supported algorithms) | ||||
@@ -73,6 +68,16 @@ class Crypt(Command): | |||||
return | return | ||||
try: | try: | ||||
from cryptography import fernet | |||||
from cryptography.hazmat.primitives import hashes | |||||
from cryptography.hazmat.primitives.kdf import pbkdf2 | |||||
except ModuleNotFoundError: | |||||
self.reply( | |||||
data, | |||||
"This command requires the 'cryptography' package: https://cryptography.io/", | |||||
) | |||||
try: | |||||
if data.command == "encrypt": | if data.command == "encrypt": | ||||
salt = os.urandom(saltlen) | salt = os.urandom(saltlen) | ||||
kdf = pbkdf2.PBKDF2HMAC( | kdf = pbkdf2.PBKDF2HMAC( | ||||
@@ -101,10 +106,5 @@ class Crypt(Command): | |||||
base64.urlsafe_b64encode(kdf.derive(key.encode())) | base64.urlsafe_b64encode(kdf.derive(key.encode())) | ||||
) | ) | ||||
self.reply(data, f.decrypt(ciphertext).decode()) | self.reply(data, f.decrypt(ciphertext).decode()) | ||||
except ImportError: | |||||
self.reply( | |||||
data, | |||||
"This command requires the 'cryptography' package: https://cryptography.io/", | |||||
) | |||||
except Exception as error: | except Exception as error: | ||||
self.reply(data, f"{type(error).__name__}: {str(error)}") | self.reply(data, f"{type(error).__name__}: {str(error)}") |
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -28,18 +28,12 @@ from os import mkdir, path | |||||
import yaml | import yaml | ||||
from earwigbot import importer | |||||
from earwigbot.config.formatter import BotFormatter | from earwigbot.config.formatter import BotFormatter | ||||
from earwigbot.config.node import ConfigNode | from earwigbot.config.node import ConfigNode | ||||
from earwigbot.config.ordered_yaml import OrderedLoader | |||||
from earwigbot.config.permissions import PermissionsDB | from earwigbot.config.permissions import PermissionsDB | ||||
from earwigbot.config.script import ConfigScript | from earwigbot.config.script import ConfigScript | ||||
from earwigbot.exceptions import NoConfigError | from earwigbot.exceptions import NoConfigError | ||||
fernet = importer.new("cryptography.fernet") | |||||
hashes = importer.new("cryptography.hazmat.primitives.hashes") | |||||
pbkdf2 = importer.new("cryptography.hazmat.primitives.kdf.pbkdf2") | |||||
__all__ = ["BotConfig"] | __all__ = ["BotConfig"] | ||||
@@ -128,12 +122,11 @@ class BotConfig: | |||||
def _load(self): | def _load(self): | ||||
"""Load data from our JSON config file (config.yml) into self._data.""" | """Load data from our JSON config file (config.yml) into self._data.""" | ||||
filename = self._config_path | |||||
with open(filename) as fp: | |||||
with open(self._config_path) as fp: | |||||
try: | try: | ||||
self._data = yaml.load(fp, OrderedLoader) | |||||
self._data = yaml.load(fp, yaml.CSafeLoader) | |||||
except yaml.YAMLError: | except yaml.YAMLError: | ||||
print(f"Error parsing config file {filename}:") | |||||
print(f"Error parsing config file {self._config_path}:") | |||||
raise | raise | ||||
def _setup_logging(self): | def _setup_logging(self): | ||||
@@ -276,9 +269,7 @@ class BotConfig: | |||||
if not path.exists(self._config_path): | if not path.exists(self._config_path): | ||||
self._handle_missing_config() | self._handle_missing_config() | ||||
self._load() | self._load() | ||||
if not self._data: | |||||
self._handle_missing_config() | |||||
self._load() | |||||
assert self._data is not None | |||||
self.components._load(self._data.get("components", OrderedDict())) | self.components._load(self._data.get("components", OrderedDict())) | ||||
self.wiki._load(self._data.get("wiki", OrderedDict())) | self.wiki._load(self._data.get("wiki", OrderedDict())) | ||||
@@ -291,6 +282,10 @@ class BotConfig: | |||||
if self.is_encrypted(): | if self.is_encrypted(): | ||||
if not self._decryption_cipher: | if not self._decryption_cipher: | ||||
try: | try: | ||||
from cryptography import fernet | |||||
from cryptography.hazmat.primitives import hashes | |||||
from cryptography.hazmat.primitives.kdf import pbkdf2 | |||||
salt = self.metadata["salt"] | salt = self.metadata["salt"] | ||||
kdf = pbkdf2.PBKDF2HMAC( | kdf = pbkdf2.PBKDF2HMAC( | ||||
algorithm=hashes.SHA256(), | algorithm=hashes.SHA256(), | ||||
@@ -298,7 +293,7 @@ class BotConfig: | |||||
salt=salt, | salt=salt, | ||||
iterations=ConfigScript.PBKDF_ROUNDS, | iterations=ConfigScript.PBKDF_ROUNDS, | ||||
) | ) | ||||
except ImportError: | |||||
except ModuleNotFoundError: | |||||
e = "Encryption requires the 'cryptography' package: https://cryptography.io/" | e = "Encryption requires the 'cryptography' package: https://cryptography.io/" | ||||
raise NoConfigError(e) | raise NoConfigError(e) | ||||
key = getpass("Enter key to decrypt bot passwords: ") | key = getpass("Enter key to decrypt bot passwords: ") | ||||
@@ -352,6 +347,7 @@ class BotConfig: | |||||
"week_day": week_day, | "week_day": week_day, | ||||
} | } | ||||
assert self._data is not None | |||||
data = self._data.get("schedule", []) | data = self._data.get("schedule", []) | ||||
for event in data: | for event in data: | ||||
do = True | do = True | ||||
@@ -19,22 +19,21 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
import base64 | import base64 | ||||
from collections import OrderedDict | |||||
__all__ = ["ConfigNode"] | __all__ = ["ConfigNode"] | ||||
class ConfigNode: | class ConfigNode: | ||||
def __init__(self): | def __init__(self): | ||||
self._data = OrderedDict() | |||||
self._data = {} | |||||
def __repr__(self): | |||||
return self._data | |||||
def __repr__(self) -> str: | |||||
return repr(self._data) | |||||
def __bool__(self): | |||||
def __bool__(self) -> bool: | |||||
return bool(self._data) | return bool(self._data) | ||||
def __len__(self): | |||||
def __len__(self) -> int: | |||||
return len(self._data) | return len(self._data) | ||||
def __getitem__(self, key): | def __getitem__(self, key): | ||||
@@ -1,108 +0,0 @@ | |||||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
""" | |||||
Based on: | |||||
* https://gist.github.com/844388 | |||||
* https://pyyaml.org/attachment/ticket/161/use_ordered_dict.py | |||||
with modifications. | |||||
""" | |||||
from collections import OrderedDict | |||||
import yaml | |||||
__all__ = ["OrderedLoader", "OrderedDumper"] | |||||
class OrderedLoader(yaml.Loader): | |||||
"""A YAML loader that loads mappings into ordered dictionaries.""" | |||||
def __init__(self, *args, **kwargs): | |||||
super().__init__(*args, **kwargs) | |||||
constructor = type(self).construct_yaml_map | |||||
self.add_constructor("tag:yaml.org,2002:map", constructor) | |||||
self.add_constructor("tag:yaml.org,2002:omap", constructor) | |||||
def construct_yaml_map(self, node): | |||||
data = OrderedDict() | |||||
yield data | |||||
value = self.construct_mapping(node) | |||||
data.update(value) | |||||
def construct_mapping(self, node, deep=False): | |||||
if isinstance(node, yaml.MappingNode): | |||||
self.flatten_mapping(node) | |||||
else: | |||||
raise yaml.constructor.ConstructorError( | |||||
None, | |||||
None, | |||||
f"expected a mapping node, but found {node.id}", | |||||
node.start_mark, | |||||
) | |||||
mapping = OrderedDict() | |||||
for key_node, value_node in node.value: | |||||
key = self.construct_object(key_node, deep=deep) | |||||
try: | |||||
hash(key) | |||||
except TypeError as exc: | |||||
raise yaml.constructor.ConstructorError( | |||||
"while constructing a mapping", | |||||
node.start_mark, | |||||
f"found unacceptable key ({exc})", | |||||
key_node.start_mark, | |||||
) | |||||
value = self.construct_object(value_node, deep=deep) | |||||
mapping[key] = value | |||||
return mapping | |||||
class OrderedDumper(yaml.SafeDumper): | |||||
"""A YAML dumper that dumps ordered dictionaries into mappings.""" | |||||
def __init__(self, *args, **kwargs): | |||||
super().__init__(*args, **kwargs) | |||||
self.add_representer(OrderedDict, type(self).represent_dict) | |||||
def represent_mapping(self, tag, mapping, flow_style=None): | |||||
value = [] | |||||
node = yaml.MappingNode(tag, value, flow_style=flow_style) | |||||
if self.alias_key is not None: | |||||
self.represented_objects[self.alias_key] = node | |||||
best_style = True | |||||
if hasattr(mapping, "items"): | |||||
mapping = list(mapping.items()) | |||||
for item_key, item_value in mapping: | |||||
node_key = self.represent_data(item_key) | |||||
node_value = self.represent_data(item_value) | |||||
if not (isinstance(node_key, yaml.ScalarNode) and not node_key.style): | |||||
best_style = False | |||||
if not (isinstance(node_value, yaml.ScalarNode) and not node_value.style): | |||||
best_style = False | |||||
value.append((node_key, node_value)) | |||||
if flow_style is None: | |||||
if self.default_flow_style is not None: | |||||
node.flow_style = self.default_flow_style | |||||
else: | |||||
node.flow_style = best_style | |||||
return node |
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -19,36 +19,40 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
import base64 | import base64 | ||||
import getpass | |||||
import os | import os | ||||
import os.path | |||||
import re | import re | ||||
import stat | import stat | ||||
import sys | import sys | ||||
from collections import OrderedDict | |||||
from getpass import getpass | |||||
from os import chmod, makedirs, mkdir, path | |||||
from textwrap import fill, wrap | |||||
import textwrap | |||||
import typing | |||||
from typing import Any, Literal | |||||
import yaml | import yaml | ||||
from earwigbot import exceptions, importer | |||||
from earwigbot.config.ordered_yaml import OrderedDumper | |||||
fernet = importer.new("cryptography.fernet") | |||||
hashes = importer.new("cryptography.hazmat.primitives.hashes") | |||||
pbkdf2 = importer.new("cryptography.hazmat.primitives.kdf.pbkdf2") | |||||
from earwigbot import exceptions | |||||
__all__ = ["ConfigScript"] | __all__ = ["ConfigScript"] | ||||
RULES_TEMPLATE = """# -*- coding: utf-8 -*- | |||||
RULES_TEMPLATE = """\ | |||||
from earwigbot.bot import Bot | |||||
from earwigbot.irc import RC | |||||
def process(bot: Bot, rc: RC): | |||||
\"\"\" | |||||
Return a list of channels to report this event to. | |||||
def process(bot, rc): | |||||
\"\"\"Given a Bot() object and an RC() object, return a list of channels | |||||
to report this event to. Also, start any wiki bot tasks within this | |||||
function if necessary.\"\"\" | |||||
Also, start any wiki bot tasks within this function if necessary. | |||||
\"\"\" | |||||
pass | pass | ||||
""" | """ | ||||
class RetryError(Exception): | |||||
pass | |||||
class ConfigScript: | class ConfigScript: | ||||
"""A script to guide a user through the creation of a new config file.""" | """A script to guide a user through the creation of a new config file.""" | ||||
@@ -58,17 +62,15 @@ class ConfigScript: | |||||
def __init__(self, config): | def __init__(self, config): | ||||
self.config = config | self.config = config | ||||
self.data = OrderedDict( | |||||
[ | |||||
("metadata", OrderedDict()), | |||||
("components", OrderedDict()), | |||||
("wiki", OrderedDict()), | |||||
("irc", OrderedDict()), | |||||
("commands", OrderedDict()), | |||||
("tasks", OrderedDict()), | |||||
("schedule", []), | |||||
] | |||||
) | |||||
self.data = { | |||||
"metadata": {}, | |||||
"components": {}, | |||||
"wiki": {}, | |||||
"irc": {}, | |||||
"commands": {}, | |||||
"tasks": {}, | |||||
"schedule": [], | |||||
} | |||||
self._cipher = None | self._cipher = None | ||||
self._wmf = False | self._wmf = False | ||||
@@ -76,20 +78,28 @@ class ConfigScript: | |||||
self._lang = None | self._lang = None | ||||
def _print(self, text): | def _print(self, text): | ||||
print(fill(re.sub(r"\s\s+", " ", text), self.WIDTH)) | |||||
print(textwrap.fill(re.sub(r"\s\s+", " ", text), self.WIDTH)) | |||||
def _print_no_nl(self, text): | def _print_no_nl(self, text): | ||||
sys.stdout.write(fill(re.sub(r"\s\s+", " ", text), self.WIDTH)) | |||||
sys.stdout.write(textwrap.fill(re.sub(r"\s\s+", " ", text), self.WIDTH)) | |||||
sys.stdout.flush() | sys.stdout.flush() | ||||
def _pause(self): | def _pause(self): | ||||
input(self.PROMPT + "Press enter to continue: ") | input(self.PROMPT + "Press enter to continue: ") | ||||
def _ask(self, text, default=None, require=True): | |||||
@typing.overload | |||||
def _ask(self, text, default=None, require: Literal[True] = True) -> str: ... | |||||
@typing.overload | |||||
def _ask( | |||||
self, text, default=None, require: Literal[False] = False | |||||
) -> str | None: ... | |||||
def _ask(self, text, default=None, require=True) -> str | None: | |||||
text = self.PROMPT + text | text = self.PROMPT + text | ||||
if default: | if default: | ||||
text += f" \x1b[33m[{default}]\x1b[0m" | text += f" \x1b[33m[{default}]\x1b[0m" | ||||
lines = wrap(re.sub(r"\s\s+", " ", text), self.WIDTH) | |||||
lines = textwrap.wrap(re.sub(r"\s\s+", " ", text), self.WIDTH) | |||||
if len(lines) > 1: | if len(lines) > 1: | ||||
print("\n".join(lines[:-1])) | print("\n".join(lines[:-1])) | ||||
while True: | while True: | ||||
@@ -103,7 +113,7 @@ class ConfigScript: | |||||
text += " \x1b[33m[Y/n]\x1b[0m" | text += " \x1b[33m[Y/n]\x1b[0m" | ||||
else: | else: | ||||
text += " \x1b[33m[y/N]\x1b[0m" | text += " \x1b[33m[y/N]\x1b[0m" | ||||
lines = wrap(re.sub(r"\s\s+", " ", text), self.WIDTH) | |||||
lines = textwrap.wrap(re.sub(r"\s\s+", " ", text), self.WIDTH) | |||||
if len(lines) > 1: | if len(lines) > 1: | ||||
print("\n".join(lines[:-1])) | print("\n".join(lines[:-1])) | ||||
while True: | while True: | ||||
@@ -116,7 +126,7 @@ class ConfigScript: | |||||
return False | return False | ||||
def _ask_pass(self, text, encrypt=True): | def _ask_pass(self, text, encrypt=True): | ||||
password = getpass(self.PROMPT + text + " ") | |||||
password = getpass.getpass(self.PROMPT + text + " ") | |||||
if encrypt: | if encrypt: | ||||
return self._encrypt(password) | return self._encrypt(password) | ||||
return password | return password | ||||
@@ -128,7 +138,7 @@ class ConfigScript: | |||||
return password | return password | ||||
def _ask_list(self, text): | def _ask_list(self, text): | ||||
print(fill(re.sub(r"\s\s+", " ", self.PROMPT + text), self.WIDTH)) | |||||
print(textwrap.fill(re.sub(r"\s\s+", " ", self.PROMPT + text), self.WIDTH)) | |||||
print("[one item per line; blank line to end]:") | print("[one item per line; blank line to end]:") | ||||
result = [] | result = [] | ||||
while True: | while True: | ||||
@@ -140,18 +150,24 @@ class ConfigScript: | |||||
def _set_metadata(self): | def _set_metadata(self): | ||||
print() | print() | ||||
self.data["metadata"] = OrderedDict([("version", 1)]) | |||||
self._print("""I can encrypt passwords stored in your config file in | |||||
addition to preventing other users on your system from | |||||
reading the file. Encryption is recommended if the bot | |||||
is to run on a public server like Toolforge, but the | |||||
need to enter a key every time you start the bot may be | |||||
an inconvenience.""") | |||||
self.data["metadata"]["encryptPasswords"] = False | |||||
metadata: dict[str, Any] = {"version": 1} | |||||
self.data["metadata"] = metadata | |||||
self._print( | |||||
"""I can encrypt passwords stored in your config file in addition to | |||||
preventing other users on your system from reading the file. Encryption is | |||||
recommended if the bot is to run on a public server like Toolforge, but the | |||||
need to enter a key every time you start the bot may be an | |||||
inconvenience.""" | |||||
) | |||||
metadata["encryptPasswords"] = False | |||||
if self._ask_bool("Encrypt stored passwords?"): | if self._ask_bool("Encrypt stored passwords?"): | ||||
key = getpass(self.PROMPT + "Enter an encryption key: ") | |||||
key = getpass.getpass(self.PROMPT + "Enter an encryption key: ") | |||||
self._print_no_nl("Generating key...") | self._print_no_nl("Generating key...") | ||||
try: | try: | ||||
from cryptography import fernet | |||||
from cryptography.hazmat.primitives import hashes | |||||
from cryptography.hazmat.primitives.kdf import pbkdf2 | |||||
salt = os.urandom(16) | salt = os.urandom(16) | ||||
kdf = pbkdf2.PBKDF2HMAC( | kdf = pbkdf2.PBKDF2HMAC( | ||||
algorithm=hashes.SHA256(), | algorithm=hashes.SHA256(), | ||||
@@ -162,44 +178,52 @@ class ConfigScript: | |||||
self._cipher = fernet.Fernet( | self._cipher = fernet.Fernet( | ||||
base64.urlsafe_b64encode(kdf.derive(key.encode())) | base64.urlsafe_b64encode(kdf.derive(key.encode())) | ||||
) | ) | ||||
except ImportError: | |||||
except ModuleNotFoundError: | |||||
print(" error!") | print(" error!") | ||||
self._print("""Encryption requires the 'cryptography' package: | |||||
https://cryptography.io/""") | |||||
self._print("""I will disable encryption for now; restart | |||||
configuration after installing these packages if | |||||
you want it.""") | |||||
self._print( | |||||
"Encryption requires the 'cryptography' package: https://cryptography.io/" | |||||
) | |||||
self._print( | |||||
"""I will disable encryption for now; restart configuration after | |||||
installing these packages if you want it.""" | |||||
) | |||||
self._pause() | self._pause() | ||||
else: | else: | ||||
self.data["metadata"]["encryptPasswords"] = True | |||||
self.data["metadata"]["salt"] = base64.b64encode(salt).decode() | |||||
metadata["encryptPasswords"] = True | |||||
metadata["salt"] = base64.b64encode(salt).decode() | |||||
print(" done.") | print(" done.") | ||||
print() | print() | ||||
self._print("""The bot can temporarily store its logs in the logs/ | |||||
subdirectory. Error logs are kept for a month whereas | |||||
normal logs are kept for a week. If you disable this, | |||||
the bot will still print logs to stdout.""") | |||||
self._print( | |||||
"""The bot can temporarily store its logs in the logs/ subdirectory. Error | |||||
logs are kept for a month whereas normal logs are kept for a week. If you | |||||
disable this, the bot will still print logs to stdout.""" | |||||
) | |||||
logging = self._ask_bool("Enable logging?") | logging = self._ask_bool("Enable logging?") | ||||
self.data["metadata"]["enableLogging"] = logging | |||||
metadata["enableLogging"] = logging | |||||
def _set_components(self): | def _set_components(self): | ||||
print() | print() | ||||
self._print("""The bot contains three separate components that can run | |||||
independently of each other.""") | |||||
self._print("""- The IRC front-end runs on a normal IRC server, like | |||||
Libera, and expects users to interact with it through | |||||
commands.""") | |||||
self._print("""- The IRC watcher runs on a wiki recent-changes server, | |||||
like irc.wikimedia.org, and listens for edits. Users | |||||
cannot interact with this component. It can detect | |||||
specific events and report them to "feed" channels on | |||||
the front-end or start bot tasks.""") | |||||
self._print("""- The wiki task scheduler runs wiki-editing bot tasks in | |||||
separate threads at user-defined times through a | |||||
cron-like interface. Tasks which are not scheduled can | |||||
be started by the IRC watcher manually through the IRC | |||||
front-end.""") | |||||
self._print( | |||||
"""The bot contains three separate components that can run independently of | |||||
each other.""" | |||||
) | |||||
self._print( | |||||
"""- The IRC front-end runs on a normal IRC server, like Libera, and | |||||
expects users to interact with it through commands.""" | |||||
) | |||||
self._print( | |||||
"""- The IRC watcher runs on a wiki recent-changes server, like | |||||
irc.wikimedia.org, and listens for edits. Users cannot interact with this | |||||
component. It can detect specific events and report them to "feed" channels | |||||
on the front-end or start bot tasks.""" | |||||
) | |||||
self._print( | |||||
"""- The wiki task scheduler runs wiki-editing bot tasks in separate | |||||
threads at user-defined times through a cron-like interface. Tasks which | |||||
are not scheduled can be started by the IRC watcher manually through the | |||||
IRC front-end.""" | |||||
) | |||||
frontend = self._ask_bool("Enable the IRC front-end?") | frontend = self._ask_bool("Enable the IRC front-end?") | ||||
watcher = self._ask_bool("Enable the IRC watcher?") | watcher = self._ask_bool("Enable the IRC watcher?") | ||||
scheduler = self._ask_bool("Enable the wiki task scheduler?") | scheduler = self._ask_bool("Enable the wiki task scheduler?") | ||||
@@ -214,17 +238,17 @@ class ConfigScript: | |||||
site = self.config.bot.wiki.add_site(**kwargs) | site = self.config.bot.wiki.add_site(**kwargs) | ||||
except exceptions.APIError as exc: | except exceptions.APIError as exc: | ||||
print(" API error!") | print(" API error!") | ||||
print("\x1b[31m" + exc.message + "\x1b[0m") | |||||
print(f"\x1b[31m{exc}\x1b[0m") | |||||
question = "Would you like to re-enter the site information?" | question = "Would you like to re-enter the site information?" | ||||
if self._ask_bool(question): | if self._ask_bool(question): | ||||
return self._set_wiki() | |||||
raise RetryError() | |||||
question = "This will cancel the setup process. Are you sure?" | question = "This will cancel the setup process. Are you sure?" | ||||
if self._ask_bool(question, default=False): | if self._ask_bool(question, default=False): | ||||
raise exceptions.NoConfigError() | raise exceptions.NoConfigError() | ||||
return self._set_wiki() | |||||
raise RetryError() | |||||
except exceptions.LoginError as exc: | except exceptions.LoginError as exc: | ||||
print(" login error!") | print(" login error!") | ||||
print("\x1b[31m" + exc.message + "\x1b[0m") | |||||
print(f"\x1b[31m{exc}\x1b[0m") | |||||
question = "Would you like to re-enter your login information?" | question = "Would you like to re-enter your login information?" | ||||
if self._ask_bool(question): | if self._ask_bool(question): | ||||
self.data["wiki"]["username"] = self._ask("Bot username:") | self.data["wiki"]["username"] = self._ask("Bot username:") | ||||
@@ -235,10 +259,12 @@ class ConfigScript: | |||||
password = self.data["wiki"]["password"] | password = self.data["wiki"]["password"] | ||||
question = "Would you like to re-enter the site information?" | question = "Would you like to re-enter the site information?" | ||||
if self._ask_bool(question): | if self._ask_bool(question): | ||||
return self._set_wiki() | |||||
raise RetryError() | |||||
print() | print() | ||||
self._print("""Moving on. You can modify the login information | |||||
stored in the bot's config in the future.""") | |||||
self._print( | |||||
"""Moving on. You can modify the login information stored in the bot's | |||||
config in the future.""" | |||||
) | |||||
self.data["wiki"]["password"] = None # Clear so we don't login | self.data["wiki"]["password"] = None # Clear so we don't login | ||||
self.config.wiki._load(self.data["wiki"]) | self.config.wiki._load(self.data["wiki"]) | ||||
self._print_no_nl("Trying to connect to the site...") | self._print_no_nl("Trying to connect to the site...") | ||||
@@ -255,8 +281,9 @@ class ConfigScript: | |||||
def _set_wiki(self): | def _set_wiki(self): | ||||
print() | print() | ||||
self._wmf = self._ask_bool("""Will this bot run on Wikimedia Foundation | |||||
wikis, like Wikipedia?""") | |||||
self._wmf = self._ask_bool( | |||||
"Will this bot run on Wikimedia Foundation wikis, like Wikipedia?" | |||||
) | |||||
if self._wmf: | if self._wmf: | ||||
msg = "Site project (e.g. 'wikipedia', 'wiktionary', 'wikimedia'):" | msg = "Site project (e.g. 'wikipedia', 'wiktionary', 'wikimedia'):" | ||||
self._proj = project = self._ask(msg, "wikipedia").lower() | self._proj = project = self._ask(msg, "wikipedia").lower() | ||||
@@ -288,39 +315,32 @@ class ConfigScript: | |||||
msg = "Will this bot run from the Wikimedia Tool Labs?" | msg = "Will this bot run from the Wikimedia Tool Labs?" | ||||
labs = self._ask_bool(msg, default=False) | labs = self._ask_bool(msg, default=False) | ||||
if labs: | if labs: | ||||
args = [ | |||||
("host", "$1.labsdb"), | |||||
("db", "$1_p"), | |||||
("read_default_file", "~/replica.my.cnf"), | |||||
] | |||||
self.data["wiki"]["sql"] = OrderedDict(args) | |||||
else: | |||||
msg = "Will this bot run from the Wikimedia Toolserver?" | |||||
toolserver = self._ask_bool(msg, default=False) | |||||
if toolserver: | |||||
args = [("host", "$1-p.rrdb.toolserver.org"), ("db", "$1_p")] | |||||
self.data["wiki"]["sql"] = OrderedDict(args) | |||||
self.data["wiki"]["sql"] = { | |||||
"host": "$1.labsdb", | |||||
"db": "$1_p", | |||||
"read_default_file": "~/replica.my.cnf", | |||||
} | |||||
self.data["wiki"]["shutoff"] = {} | self.data["wiki"]["shutoff"] = {} | ||||
msg = "Would you like to enable an automatic shutoff page for the bot?" | msg = "Would you like to enable an automatic shutoff page for the bot?" | ||||
if self._ask_bool(msg): | if self._ask_bool(msg): | ||||
print() | print() | ||||
self._print("""The page title can contain two wildcards: $1 will be | |||||
substituted with the bot's username, and $2 with the | |||||
current task number. This can be used to implement a | |||||
separate shutoff page for each task.""") | |||||
self._print( | |||||
"""The page title can contain two wildcards: $1 will be substituted | |||||
with the bot's username, and $2 with the current task number. This can | |||||
be used to implement a separate shutoff page for each task.""" | |||||
) | |||||
page = self._ask("Page title:", "User:$1/Shutoff") | page = self._ask("Page title:", "User:$1/Shutoff") | ||||
msg = "Page content to indicate the bot is *not* shut off:" | msg = "Page content to indicate the bot is *not* shut off:" | ||||
disabled = self._ask(msg, "run") | disabled = self._ask(msg, "run") | ||||
args = [("page", page), ("disabled", disabled)] | |||||
self.data["wiki"]["shutoff"] = OrderedDict(args) | |||||
self.data["wiki"]["shutoff"] = {"page": page, "disabled": disabled} | |||||
self.data["wiki"]["search"] = {} | self.data["wiki"]["search"] = {} | ||||
def _set_irc(self): | def _set_irc(self): | ||||
if self.data["components"]["irc_frontend"]: | if self.data["components"]["irc_frontend"]: | ||||
print() | print() | ||||
frontend = self.data["irc"]["frontend"] = OrderedDict() | |||||
frontend = self.data["irc"]["frontend"] = {} | |||||
frontend["host"] = self._ask( | frontend["host"] = self._ask( | ||||
"Hostname of the frontend's IRC server:", "irc.libera.chat" | "Hostname of the frontend's IRC server:", "irc.libera.chat" | ||||
) | ) | ||||
@@ -339,14 +359,14 @@ class ConfigScript: | |||||
chan_question = "Frontend channels to join by default:" | chan_question = "Frontend channels to join by default:" | ||||
frontend["channels"] = self._ask_list(chan_question) | frontend["channels"] = self._ask_list(chan_question) | ||||
print() | print() | ||||
self._print("""The bot keeps a database of its admins (users who | |||||
can use certain sensitive commands) and owners | |||||
(users who can quit the bot and modify its access | |||||
list), identified by nick, ident, and/or hostname. | |||||
Hostname is the most secure option since it cannot | |||||
be easily spoofed. If you have a cloak, this will | |||||
probably look like 'wikipedia/Username' or | |||||
'user/nickname'.""") | |||||
self._print( | |||||
"""The bot keeps a database of its admins (users who can use certain | |||||
sensitive commands) and owners (users who can quit the bot and modify | |||||
its access list), identified by nick, ident, and/or hostname. Hostname | |||||
is the most secure option since it cannot be easily spoofed. If you | |||||
have a cloak, this will probably look like 'wikipedia/Username' or | |||||
'user/nickname'.""" | |||||
) | |||||
host = self._ask("Your hostname on the frontend:", require=False) | host = self._ask("Your hostname on the frontend:", require=False) | ||||
if host: | if host: | ||||
permdb = self.config._permissions | permdb = self.config._permissions | ||||
@@ -358,7 +378,7 @@ class ConfigScript: | |||||
if self.data["components"]["irc_watcher"]: | if self.data["components"]["irc_watcher"]: | ||||
print() | print() | ||||
watcher = self.data["irc"]["watcher"] = OrderedDict() | |||||
watcher = self.data["irc"]["watcher"] = {} | |||||
if self._wmf: | if self._wmf: | ||||
watcher["host"] = "irc.wikimedia.org" | watcher["host"] = "irc.wikimedia.org" | ||||
watcher["port"] = 6667 | watcher["port"] = 6667 | ||||
@@ -386,14 +406,14 @@ class ConfigScript: | |||||
chan_question = "Watcher channels to join by default:" | chan_question = "Watcher channels to join by default:" | ||||
watcher["channels"] = self._ask_list(chan_question) | watcher["channels"] = self._ask_list(chan_question) | ||||
print() | print() | ||||
self._print("""I am now creating a blank 'rules.py' file, which | |||||
will determine how the bot handles messages received | |||||
from the IRC watcher. It contains a process() | |||||
function that takes a Bot object (allowing you to | |||||
start tasks) and an RC object (storing the message | |||||
from the watcher). See the documentation for | |||||
details.""") | |||||
with open(path.join(self.config.root_dir, "rules.py"), "w") as fp: | |||||
self._print( | |||||
"""I am now creating a blank 'rules.py' file, which will determine how | |||||
the bot handles messages received from the IRC watcher. It contains a | |||||
process() function that takes a Bot object (allowing you to start | |||||
tasks) and an RC object (storing the message from the watcher). See the | |||||
documentation for details.""" | |||||
) | |||||
with open(os.path.join(self.config.root_dir, "rules.py"), "w") as fp: | |||||
fp.write(RULES_TEMPLATE) | fp.write(RULES_TEMPLATE) | ||||
self._pause() | self._pause() | ||||
@@ -403,47 +423,55 @@ class ConfigScript: | |||||
def _set_commands(self): | def _set_commands(self): | ||||
print() | print() | ||||
msg = """Would you like to disable the default IRC commands? You can | |||||
fine-tune which commands are disabled later on.""" | |||||
msg = """Would you like to disable the default IRC commands? You can fine-tune | |||||
which commands are disabled later on.""" | |||||
if not self.data["components"]["irc_frontend"] or self._ask_bool( | if not self.data["components"]["irc_frontend"] or self._ask_bool( | ||||
msg, default=False | msg, default=False | ||||
): | ): | ||||
self.data["commands"]["disable"] = True | self.data["commands"]["disable"] = True | ||||
print() | print() | ||||
self._print("""I am now creating the 'commands/' directory, where you | |||||
can place custom IRC commands and plugins. Creating your | |||||
own commands is described in the documentation.""") | |||||
mkdir(path.join(self.config.root_dir, "commands")) | |||||
self._print( | |||||
"""I am now creating the 'commands/' directory, where you can place custom | |||||
IRC commands and plugins. Creating your own commands is described in the | |||||
documentation.""" | |||||
) | |||||
os.mkdir(os.path.join(self.config.root_dir, "commands")) | |||||
self._pause() | self._pause() | ||||
def _set_tasks(self): | def _set_tasks(self): | ||||
print() | print() | ||||
self._print("""I am now creating the 'tasks/' directory, where you can | |||||
place custom bot tasks and plugins. Creating your own | |||||
tasks is described in the documentation.""") | |||||
mkdir(path.join(self.config.root_dir, "tasks")) | |||||
self._print( | |||||
"""I am now creating the 'tasks/' directory, where you can place custom bot | |||||
tasks and plugins. Creating your own tasks is described in the | |||||
documentation.""" | |||||
) | |||||
os.mkdir(os.path.join(self.config.root_dir, "tasks")) | |||||
self._pause() | self._pause() | ||||
def _set_schedule(self): | def _set_schedule(self): | ||||
print() | print() | ||||
self._print("""The final section of your config file, 'schedule', is a | |||||
list of bot tasks to be started by the wiki scheduler. | |||||
Each entry contains cron-like time quantifiers and a | |||||
list of tasks. For example, the following starts the | |||||
'foobot' task every hour on the half-hour:""") | |||||
self._print( | |||||
"""The final section of your config file, 'schedule', is a list of bot | |||||
tasks to be started by the wiki scheduler. Each entry contains cron-like | |||||
time quantifiers and a list of tasks. For example, the following starts the | |||||
'foobot' task every hour on the half-hour:""" | |||||
) | |||||
print("\x1b[33mschedule:") | print("\x1b[33mschedule:") | ||||
print(" - minute: 30") | print(" - minute: 30") | ||||
print(" tasks:") | print(" tasks:") | ||||
print(" - foobot\x1b[0m") | print(" - foobot\x1b[0m") | ||||
self._print("""The following starts the 'barbot' task with the keyword | |||||
arguments 'action="baz"' every Monday at 05:00 UTC:""") | |||||
self._print( | |||||
"""The following starts the 'barbot' task with the keyword arguments | |||||
'action="baz"' every Monday at 05:00 UTC:""" | |||||
) | |||||
print("\x1b[33m - week_day: 1") | print("\x1b[33m - week_day: 1") | ||||
print(" hour: 5") | print(" hour: 5") | ||||
print(" tasks:") | print(" tasks:") | ||||
print(' - ["barbot", {"action": "baz"}]\x1b[0m') | print(' - ["barbot", {"action": "baz"}]\x1b[0m') | ||||
self._print("""The full list of quantifiers is minute, hour, month_day, | |||||
month, and week_day. See the documentation for more | |||||
information.""") | |||||
self._print( | |||||
"""The full list of quantifiers is minute, hour, month_day, month, and | |||||
week_day. See the documentation for more information.""" | |||||
) | |||||
self._pause() | self._pause() | ||||
def _save(self): | def _save(self): | ||||
@@ -451,7 +479,7 @@ class ConfigScript: | |||||
yaml.dump( | yaml.dump( | ||||
self.data, | self.data, | ||||
stream, | stream, | ||||
OrderedDumper, | |||||
yaml.CSafeDumper, | |||||
indent=4, | indent=4, | ||||
allow_unicode=True, | allow_unicode=True, | ||||
default_flow_style=False, | default_flow_style=False, | ||||
@@ -460,19 +488,24 @@ class ConfigScript: | |||||
def make_new(self): | def make_new(self): | ||||
"""Make a new config file based on the user's input.""" | """Make a new config file based on the user's input.""" | ||||
try: | try: | ||||
makedirs(path.dirname(self.config.path)) | |||||
os.makedirs(os.path.dirname(self.config.path)) | |||||
except OSError as exc: | except OSError as exc: | ||||
if exc.errno != 17: | if exc.errno != 17: | ||||
raise | raise | ||||
try: | try: | ||||
open(self.config.path, "w").close() | open(self.config.path, "w").close() | ||||
chmod(self.config.path, stat.S_IRUSR | stat.S_IWUSR) | |||||
os.chmod(self.config.path, stat.S_IRUSR | stat.S_IWUSR) | |||||
except OSError: | except OSError: | ||||
print("I can't seem to write to the config file:") | print("I can't seem to write to the config file:") | ||||
raise | raise | ||||
self._set_metadata() | self._set_metadata() | ||||
self._set_components() | self._set_components() | ||||
self._set_wiki() | |||||
while True: | |||||
try: | |||||
self._set_wiki() | |||||
break | |||||
except RetryError: | |||||
continue | |||||
components = self.data["components"] | components = self.data["components"] | ||||
if components["irc_frontend"] or components["irc_watcher"]: | if components["irc_frontend"] or components["irc_watcher"]: | ||||
self._set_irc() | self._set_irc() | ||||
@@ -481,12 +514,12 @@ class ConfigScript: | |||||
if components["wiki_scheduler"]: | if components["wiki_scheduler"]: | ||||
self._set_schedule() | self._set_schedule() | ||||
print() | print() | ||||
self._print("""I am now saving config.yml with your settings. YAML is a | |||||
relatively straightforward format and you should be able | |||||
to update these settings in the future when necessary. | |||||
I will start the bot at your signal. Feel free to | |||||
contact me at wikipedia.earwig@gmail.com if you have any | |||||
questions.""") | |||||
self._print( | |||||
"""I am now saving config.yml with your settings. YAML is a relatively | |||||
straightforward format and you should be able to update these settings in | |||||
the future when necessary. I will start the bot at your signal. Feel free | |||||
to contact me at wikipedia.earwig@gmail.com if you have any questions.""" | |||||
) | |||||
self._save() | self._save() | ||||
if not self._ask_bool("Start the bot now?"): | if not self._ask_bool("Start the bot now?"): | ||||
exit() | exit() |
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -18,8 +18,10 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
from earwigbot.irc.connection import * | |||||
from earwigbot.irc.data import * | |||||
from earwigbot.irc.frontend import * | |||||
from earwigbot.irc.rc import * | |||||
from earwigbot.irc.watcher import * | |||||
__all__ = ["Data", "Frontend", "IRCConnection", "RC", "Watcher"] | |||||
from earwigbot.irc.connection import IRCConnection | |||||
from earwigbot.irc.data import Data | |||||
from earwigbot.irc.frontend import Frontend | |||||
from earwigbot.irc.rc import RC | |||||
from earwigbot.irc.watcher import Watcher |
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -18,14 +18,14 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
__all__ = ["IRCConnection"] | |||||
import socket | import socket | ||||
from threading import Lock | from threading import Lock | ||||
from time import sleep, time | from time import sleep, time | ||||
from earwigbot.exceptions import BrokenSocketError | from earwigbot.exceptions import BrokenSocketError | ||||
__all__ = ["IRCConnection"] | |||||
class IRCConnection: | class IRCConnection: | ||||
"""Interface with an IRC server.""" | """Interface with an IRC server.""" | ||||
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -18,10 +18,10 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
import re | |||||
__all__ = ["Data"] | __all__ = ["Data"] | ||||
import re | |||||
class Data: | class Data: | ||||
"""Store data from an individual line received on IRC.""" | """Store data from an individual line received on IRC.""" | ||||
@@ -78,6 +78,7 @@ class Data: | |||||
bot's name); self.is_command will be set to True, and self.trigger will | bot's name); self.is_command will be set to True, and self.trigger will | ||||
store the trigger string. Otherwise, is_command will be set to False. | store the trigger string. Otherwise, is_command will be set to False. | ||||
""" | """ | ||||
assert self.msg is not None | |||||
self._args = self.msg.strip().split() | self._args = self.msg.strip().split() | ||||
try: | try: | ||||
@@ -87,16 +88,16 @@ class Data: | |||||
return | return | ||||
# e.g. "!command>user arg1 arg2" | # e.g. "!command>user arg1 arg2" | ||||
if ">" in self.command: | |||||
if ">" in self._command: | |||||
command_uc, self._reply_nick = command_uc.split(">", 1) | command_uc, self._reply_nick = command_uc.split(">", 1) | ||||
self._command = command_uc.lower() | self._command = command_uc.lower() | ||||
if self.command.startswith("!") or self.command.startswith("."): | |||||
if self._command.startswith("!") or self._command.startswith("."): | |||||
# e.g. "!command arg1 arg2" | # e.g. "!command arg1 arg2" | ||||
self._is_command = True | self._is_command = True | ||||
self._trigger = self.command[0] | |||||
self._command = self.command[1:] # Strip the "!" or "." | |||||
elif re.match(rf"{re.escape(self.my_nick)}\W*?$", self.command, re.U): | |||||
self._trigger = self._command[0] | |||||
self._command = self._command[1:] # Strip the "!" or "." | |||||
elif re.match(rf"{re.escape(self.my_nick)}\W*?$", self._command, re.U): | |||||
# e.g. "EarwigBot, command arg1 arg2" | # e.g. "EarwigBot, command arg1 arg2" | ||||
self._is_command = True | self._is_command = True | ||||
self._trigger = self.my_nick | self._trigger = self.my_nick | ||||
@@ -110,7 +111,7 @@ class Data: | |||||
if self.args: | if self.args: | ||||
self.args[-1] = self.args[-1][:-1] | self.args[-1] = self.args[-1][:-1] | ||||
else: | else: | ||||
self._command = self.command[:-1] | |||||
self._command = self._command[:-1] | |||||
except IndexError: | except IndexError: | ||||
pass | pass | ||||
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2021 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -18,11 +18,11 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
from time import sleep | |||||
__all__ = ["Frontend"] | |||||
from earwigbot.irc import Data, IRCConnection | |||||
import time | |||||
__all__ = ["Frontend"] | |||||
from earwigbot.irc import Data, IRCConnection | |||||
class Frontend(IRCConnection): | class Frontend(IRCConnection): | ||||
@@ -121,10 +121,11 @@ class Frontend(IRCConnection): | |||||
elif line[1] == "NOTICE": | elif line[1] == "NOTICE": | ||||
data = Data(self.nick, line, msgtype="NOTICE") | data = Data(self.nick, line, msgtype="NOTICE") | ||||
if self._auth_wait and data.nick == self.NICK_SERVICES: | if self._auth_wait and data.nick == self.NICK_SERVICES: | ||||
assert data.msg is not None | |||||
if data.msg.startswith("This nickname is registered."): | if data.msg.startswith("This nickname is registered."): | ||||
return | return | ||||
self._auth_wait = False | self._auth_wait = False | ||||
sleep(2) # Wait for hostname change to propagate | |||||
time.sleep(2) # Wait for hostname change to propagate | |||||
self._join_channels() | self._join_channels() | ||||
elif line[1] == "KICK": | elif line[1] == "KICK": | ||||
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2021 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -18,19 +18,19 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
import re | |||||
__all__ = ["RC"] | __all__ = ["RC"] | ||||
import re | |||||
class RC: | class RC: | ||||
"""Store data from an event received from our IRC watcher.""" | """Store data from an event received from our IRC watcher.""" | ||||
re_color = re.compile("\x03([0-9]{1,2}(,[0-9]{1,2})?)?") | re_color = re.compile("\x03([0-9]{1,2}(,[0-9]{1,2})?)?") | ||||
re_edit = re.compile( | re_edit = re.compile( | ||||
"\A\[\[(.*?)\]\]\s(.*?)\s(https?://.*?)\s\*\s(.*?)\s\*\s(.*?)\Z" | |||||
r"\A\[\[(.*?)\]\]\s(.*?)\s(https?://.*?)\s\*\s(.*?)\s\*\s(.*?)\Z" | |||||
) | ) | ||||
re_log = re.compile("\A\[\[(.*?)\]\]\s(.*?)\s\s\*\s(.*?)\s\*\s(.*?)\Z") | |||||
re_log = re.compile(r"\A\[\[(.*?)\]\]\s(.*?)\s\s\*\s(.*?)\s\*\s(.*?)\Z") | |||||
pretty_edit = "\x02New {0}\x0f: \x0314[[\x0307{1}\x0314]]\x0306 * \x0303{2}\x0306 * \x0302{3}\x0306 * \x0310{4}" | pretty_edit = "\x02New {0}\x0f: \x0314[[\x0307{1}\x0314]]\x0306 * \x0303{2}\x0306 * \x0302{3}\x0306 * \x0310{4}" | ||||
pretty_log = "\x02New {0}\x0f: \x0303{1}\x0306 * \x0302{2}\x0306 * \x0310{3}" | pretty_log = "\x02New {0}\x0f: \x0303{1}\x0306 * \x0302{2}\x0306 * \x0310{3}" | ||||
@@ -1,100 +0,0 @@ | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
""" | |||||
Implements a hierarchy of importing classes as defined in `PEP 302 | |||||
<https://www.python.org/dev/peps/pep-0302/>`_ to load modules in a safe yet lazy | |||||
manner, so that they can be referred to by name but are not actually loaded | |||||
until they are used (i.e. their attributes are read or modified). | |||||
""" | |||||
import importlib | |||||
import sys | |||||
from threading import RLock | |||||
from types import ModuleType | |||||
__all__ = ["LazyImporter"] | |||||
_real_get = ModuleType.__getattribute__ | |||||
_lazy_init_lock = RLock() | |||||
def _create_failing_get(exc): | |||||
def _fail(self, attr): | |||||
raise exc | |||||
return _fail | |||||
def _mock_get(self, attr): | |||||
with _real_get(self, "_lock"): | |||||
if _real_get(self, "_unloaded"): | |||||
type(self)._unloaded = False | |||||
try: | |||||
importlib.reload(self) | |||||
except ImportError as exc: | |||||
type(self).__getattribute__ = _create_failing_get(exc) | |||||
del type(self)._lock | |||||
raise | |||||
type(self).__getattribute__ = _real_get | |||||
del type(self)._lock | |||||
return _real_get(self, attr) | |||||
class _LazyModule(type): | |||||
def __new__(cls, name): | |||||
with _lazy_init_lock: | |||||
if name not in sys.modules: | |||||
attributes = { | |||||
"__name__": name, | |||||
"__getattribute__": _mock_get, | |||||
"_unloaded": True, | |||||
"_lock": RLock(), | |||||
} | |||||
parents = (ModuleType,) | |||||
klass = type.__new__(cls, "module", parents, attributes) | |||||
sys.modules[name] = klass(name) | |||||
if "." in name: # Also ensure the parent exists | |||||
_LazyModule(name.rsplit(".", 1)[0]) | |||||
return sys.modules[name] | |||||
class LazyImporter: | |||||
"""An importer for modules that are loaded lazily. | |||||
This inserts itself into :py:data:`sys.meta_path`, storing a dictionary of | |||||
:py:class:`_LazyModule`\ s (which is added to with :py:meth:`new`). | |||||
""" | |||||
def __init__(self): | |||||
self._modules = {} | |||||
sys.meta_path.append(self) | |||||
def new(self, name): | |||||
module = _LazyModule(name) | |||||
self._modules[name] = module | |||||
return module | |||||
def find_module(self, fullname, path=None): | |||||
if fullname in self._modules and fullname not in sys.modules: | |||||
return self | |||||
def load_module(self, fullname): | |||||
return self._modules.pop(fullname) |
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -18,7 +18,7 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
from time import sleep | |||||
import time | |||||
from urllib.request import build_opener | from urllib.request import build_opener | ||||
from earwigbot import exceptions | from earwigbot import exceptions | ||||
@@ -73,7 +73,7 @@ class CopyvioMixIn: | |||||
for dep in klass.requirements(): | for dep in klass.requirements(): | ||||
try: | try: | ||||
__import__(dep).__name__ | __import__(dep).__name__ | ||||
except (ImportError, AttributeError): | |||||
except (ModuleNotFoundError, AttributeError): | |||||
e = "Missing a required dependency ({}) for the {} engine" | e = "Missing a required dependency ({}) for the {} engine" | ||||
e = e.format(dep, engine) | e = e.format(dep, engine) | ||||
raise exceptions.UnsupportedSearchEngineError(e) | raise exceptions.UnsupportedSearchEngineError(e) | ||||
@@ -173,7 +173,7 @@ class CopyvioMixIn: | |||||
self._logger.debug(log.format(self.title, searcher.name, chunk)) | self._logger.debug(log.format(self.title, searcher.name, chunk)) | ||||
workspace.enqueue(searcher.search(chunk)) | workspace.enqueue(searcher.search(chunk)) | ||||
num_queries += 1 | num_queries += 1 | ||||
sleep(1) | |||||
time.sleep(1) | |||||
workspace.wait() | workspace.wait() | ||||
result = workspace.get_result(num_queries) | result = workspace.get_result(num_queries) | ||||
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -19,10 +19,10 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
import re | import re | ||||
import sqlite3 as sqlite | |||||
from threading import Lock | |||||
from time import time | |||||
from urllib.parse import urlparse | |||||
import sqlite3 | |||||
import threading | |||||
import time | |||||
import urllib.parse | |||||
from earwigbot import exceptions | from earwigbot import exceptions | ||||
@@ -60,7 +60,7 @@ class ExclusionsDB: | |||||
self._sitesdb = sitesdb | self._sitesdb = sitesdb | ||||
self._dbfile = dbfile | self._dbfile = dbfile | ||||
self._logger = logger | self._logger = logger | ||||
self._db_access_lock = Lock() | |||||
self._db_access_lock = threading.Lock() | |||||
def __repr__(self): | def __repr__(self): | ||||
"""Return the canonical string representation of the ExclusionsDB.""" | """Return the canonical string representation of the ExclusionsDB.""" | ||||
@@ -84,7 +84,7 @@ class ExclusionsDB: | |||||
for page in pages: | for page in pages: | ||||
sources.append((sitename, page)) | sources.append((sitename, page)) | ||||
with sqlite.connect(self._dbfile) as conn: | |||||
with sqlite3.connect(self._dbfile) as conn: | |||||
conn.executescript(script) | conn.executescript(script) | ||||
conn.executemany(query, sources) | conn.executemany(query, sources) | ||||
@@ -139,7 +139,7 @@ class ExclusionsDB: | |||||
site = self._sitesdb.get_site("enwiki") | site = self._sitesdb.get_site("enwiki") | ||||
else: | else: | ||||
site = self._sitesdb.get_site(sitename) | site = self._sitesdb.get_site(sitename) | ||||
with self._db_access_lock, sqlite.connect(self._dbfile) as conn: | |||||
with self._db_access_lock, sqlite3.connect(self._dbfile) as conn: | |||||
urls = set() | urls = set() | ||||
for (source,) in conn.execute(query1, (sitename,)): | for (source,) in conn.execute(query1, (sitename,)): | ||||
urls |= self._load_source(site, source) | urls |= self._load_source(site, source) | ||||
@@ -150,17 +150,17 @@ class ExclusionsDB: | |||||
conn.execute(query3, (sitename, url)) | conn.execute(query3, (sitename, url)) | ||||
conn.executemany(query4, [(sitename, url) for url in urls]) | conn.executemany(query4, [(sitename, url) for url in urls]) | ||||
if conn.execute(query5, (sitename,)).fetchone(): | if conn.execute(query5, (sitename,)).fetchone(): | ||||
conn.execute(query6, (int(time()), sitename)) | |||||
conn.execute(query6, (int(time.time()), sitename)) | |||||
else: | else: | ||||
conn.execute(query7, (sitename, int(time()))) | |||||
conn.execute(query7, (sitename, int(time.time()))) | |||||
def _get_last_update(self, sitename): | def _get_last_update(self, sitename): | ||||
"""Return the UNIX timestamp of the last time the db was updated.""" | """Return the UNIX timestamp of the last time the db was updated.""" | ||||
query = "SELECT update_time FROM updates WHERE update_sitename = ?" | query = "SELECT update_time FROM updates WHERE update_sitename = ?" | ||||
with self._db_access_lock, sqlite.connect(self._dbfile) as conn: | |||||
with self._db_access_lock, sqlite3.connect(self._dbfile) as conn: | |||||
try: | try: | ||||
result = conn.execute(query, (sitename,)).fetchone() | result = conn.execute(query, (sitename,)).fetchone() | ||||
except sqlite.OperationalError: | |||||
except sqlite3.OperationalError: | |||||
self._create() | self._create() | ||||
return 0 | return 0 | ||||
return result[0] if result else 0 | return result[0] if result else 0 | ||||
@@ -174,7 +174,7 @@ class ExclusionsDB: | |||||
after 12 hours. | after 12 hours. | ||||
""" | """ | ||||
max_staleness = 60 * 60 * (12 if sitename == "all" else 48) | max_staleness = 60 * 60 * (12 if sitename == "all" else 48) | ||||
time_since_update = int(time() - self._get_last_update(sitename)) | |||||
time_since_update = int(time.time() - self._get_last_update(sitename)) | |||||
if force or time_since_update > max_staleness: | if force or time_since_update > max_staleness: | ||||
log = "Updating stale database: {0} (last updated {1} seconds ago)" | log = "Updating stale database: {0} (last updated {1} seconds ago)" | ||||
self._logger.info(log.format(sitename, time_since_update)) | self._logger.info(log.format(sitename, time_since_update)) | ||||
@@ -191,10 +191,10 @@ class ExclusionsDB: | |||||
Return ``True`` if the URL is in the database, or ``False`` otherwise. | Return ``True`` if the URL is in the database, or ``False`` otherwise. | ||||
""" | """ | ||||
normalized = re.sub(_RE_STRIP_PREFIX, "", url.lower()) | normalized = re.sub(_RE_STRIP_PREFIX, "", url.lower()) | ||||
parsed = urlparse(url.lower()) | |||||
parsed = urllib.parse.urlparse(url.lower()) | |||||
query = """SELECT exclusion_url FROM exclusions | query = """SELECT exclusion_url FROM exclusions | ||||
WHERE exclusion_sitename = ? OR exclusion_sitename = ?""" | WHERE exclusion_sitename = ? OR exclusion_sitename = ?""" | ||||
with self._db_access_lock, sqlite.connect(self._dbfile) as conn: | |||||
with self._db_access_lock, sqlite3.connect(self._dbfile) as conn: | |||||
for (excl,) in conn.execute(query, (sitename, "all")): | for (excl,) in conn.execute(query, (sitename, "all")): | ||||
excl = excl.lower() | excl = excl.lower() | ||||
if excl.startswith("*."): | if excl.startswith("*."): | ||||
@@ -231,7 +231,7 @@ class ExclusionsDB: | |||||
certain HTML tag attributes (``"href"`` and ``"src"``). | certain HTML tag attributes (``"href"`` and ``"src"``). | ||||
""" | """ | ||||
site = page.site | site = page.site | ||||
path = urlparse(page.url).path | |||||
path = urllib.parse.urlparse(page.url).path | |||||
roots = [site.domain] | roots = [site.domain] | ||||
scripts = ["index.php", "load.php", "api.php"] | scripts = ["index.php", "load.php", "api.php"] | ||||
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -18,7 +18,7 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
from re import UNICODE, sub | |||||
import re | |||||
__all__ = ["EMPTY", "EMPTY_INTERSECTION", "MarkovChain", "MarkovChainIntersection"] | __all__ = ["EMPTY", "EMPTY_INTERSECTION", "MarkovChain", "MarkovChainIntersection"] | ||||
@@ -38,7 +38,7 @@ class MarkovChain: | |||||
def _build(self): | def _build(self): | ||||
"""Build and return the Markov chain from the input text.""" | """Build and return the Markov chain from the input text.""" | ||||
padding = self.degree - 1 | padding = self.degree - 1 | ||||
words = sub(r"[^\w\s-]", "", self.text.lower(), flags=UNICODE).split() | |||||
words = re.sub(r"[^\w\s-]", "", self.text.lower(), flags=re.UNICODE).split() | |||||
words = ([self.START] * padding) + words + ([self.END] * padding) | words = ([self.START] * padding) + words + ([self.END] * padding) | ||||
chain = {} | chain = {} | ||||
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -18,24 +18,17 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
import io | |||||
import json | import json | ||||
import os.path | |||||
import re | import re | ||||
import urllib.parse | import urllib.parse | ||||
import urllib.request | import urllib.request | ||||
from io import StringIO | |||||
from os import path | |||||
import mwparserfromhell | import mwparserfromhell | ||||
from earwigbot import importer | |||||
from earwigbot.exceptions import ParserExclusionError, ParserRedirectError | from earwigbot.exceptions import ParserExclusionError, ParserRedirectError | ||||
bs4 = importer.new("bs4") | |||||
nltk = importer.new("nltk") | |||||
converter = importer.new("pdfminer.converter") | |||||
pdfinterp = importer.new("pdfminer.pdfinterp") | |||||
pdfpage = importer.new("pdfminer.pdfpage") | |||||
__all__ = ["ArticleTextParser", "get_parser"] | __all__ = ["ArticleTextParser", "get_parser"] | ||||
@@ -101,9 +94,10 @@ class ArticleTextParser(_BaseTextParser): | |||||
def _get_tokenizer(self): | def _get_tokenizer(self): | ||||
"""Return a NLTK punctuation tokenizer for the article's language.""" | """Return a NLTK punctuation tokenizer for the article's language.""" | ||||
import nltk | |||||
def datafile(lang): | def datafile(lang): | ||||
return "file:" + path.join( | |||||
return "file:" + os.path.join( | |||||
self._args["nltk_dir"], "tokenizers", "punkt", lang + ".pickle" | self._args["nltk_dir"], "tokenizers", "punkt", lang + ".pickle" | ||||
) | ) | ||||
@@ -213,11 +207,11 @@ class ArticleTextParser(_BaseTextParser): | |||||
elif len(chunks) % 5 == 1: | elif len(chunks) % 5 == 1: | ||||
chunk = sentences.pop() # Pop from end | chunk = sentences.pop() # Pop from end | ||||
elif len(chunks) % 5 == 2: | elif len(chunks) % 5 == 2: | ||||
chunk = sentences.pop(len(sentences) / 2) # Pop from Q2 | |||||
chunk = sentences.pop(len(sentences) // 2) # Pop from Q2 | |||||
elif len(chunks) % 5 == 3: | elif len(chunks) % 5 == 3: | ||||
chunk = sentences.pop(len(sentences) / 4) # Pop from Q1 | |||||
chunk = sentences.pop(len(sentences) // 4) # Pop from Q1 | |||||
else: | else: | ||||
chunk = sentences.pop(3 * len(sentences) / 4) # Pop from Q3 | |||||
chunk = sentences.pop(3 * len(sentences) // 4) # Pop from Q3 | |||||
chunks.append(chunk) | chunks.append(chunk) | ||||
return chunks | return chunks | ||||
@@ -256,6 +250,8 @@ class _HTMLParser(_BaseTextParser): | |||||
@staticmethod | @staticmethod | ||||
def _get_soup(text): | def _get_soup(text): | ||||
"""Parse some text using BeautifulSoup.""" | """Parse some text using BeautifulSoup.""" | ||||
import bs4 | |||||
try: | try: | ||||
return bs4.BeautifulSoup(text, "lxml") | return bs4.BeautifulSoup(text, "lxml") | ||||
except ValueError: | except ValueError: | ||||
@@ -263,6 +259,7 @@ class _HTMLParser(_BaseTextParser): | |||||
def _clean_soup(self, soup): | def _clean_soup(self, soup): | ||||
"""Clean a BeautifulSoup tree of invisible tags.""" | """Clean a BeautifulSoup tree of invisible tags.""" | ||||
import bs4 | |||||
def is_comment(text): | def is_comment(text): | ||||
return isinstance(text, bs4.element.Comment) | return isinstance(text, bs4.element.Comment) | ||||
@@ -353,21 +350,23 @@ class _PDFParser(_BaseTextParser): | |||||
def parse(self): | def parse(self): | ||||
"""Return extracted text from the PDF.""" | """Return extracted text from the PDF.""" | ||||
output = StringIO() | |||||
from pdfminer import converter, pdfinterp, pdfpage | |||||
output = io.StringIO() | |||||
manager = pdfinterp.PDFResourceManager() | manager = pdfinterp.PDFResourceManager() | ||||
conv = converter.TextConverter(manager, output) | conv = converter.TextConverter(manager, output) | ||||
interp = pdfinterp.PDFPageInterpreter(manager, conv) | interp = pdfinterp.PDFPageInterpreter(manager, conv) | ||||
try: | try: | ||||
pages = pdfpage.PDFPage.get_pages(StringIO(self.text)) | |||||
pages = pdfpage.PDFPage.get_pages(io.StringIO(self.text)) | |||||
for page in pages: | for page in pages: | ||||
interp.process_page(page) | interp.process_page(page) | ||||
except Exception: # pylint: disable=broad-except | except Exception: # pylint: disable=broad-except | ||||
return output.getvalue().decode("utf8") | |||||
return output.getvalue() | |||||
finally: | finally: | ||||
conv.close() | conv.close() | ||||
value = output.getvalue().decode("utf8") | |||||
value = output.getvalue() | |||||
for orig, new in self.substitutions: | for orig, new in self.substitutions: | ||||
value = value.replace(orig, new) | value = value.replace(orig, new) | ||||
return re.sub(r"\n\n+", "\n", value).strip() | return re.sub(r"\n\n+", "\n", value).strip() | ||||
@@ -380,7 +379,9 @@ class _PlainTextParser(_BaseTextParser): | |||||
def parse(self): | def parse(self): | ||||
"""Unicode-ify and strip whitespace from the plain text document.""" | """Unicode-ify and strip whitespace from the plain text document.""" | ||||
converted = bs4.UnicodeDammit(self.text).unicode_markup | |||||
from bs4.dammit import UnicodeDammit | |||||
converted = UnicodeDammit(self.text).unicode_markup | |||||
return converted.strip() if converted else "" | return converted.strip() if converted else "" | ||||
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2009-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -18,18 +18,15 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
import re | |||||
from gzip import GzipFile | from gzip import GzipFile | ||||
from io import StringIO | from io import StringIO | ||||
from json import loads | from json import loads | ||||
from re import sub as re_sub | |||||
from urllib.error import URLError | from urllib.error import URLError | ||||
from urllib.parse import urlencode | from urllib.parse import urlencode | ||||
from earwigbot import importer | |||||
from earwigbot.exceptions import SearchQueryError | from earwigbot.exceptions import SearchQueryError | ||||
lxml = importer.new("lxml") | |||||
__all__ = [ | __all__ = [ | ||||
"BingSearchEngine", | "BingSearchEngine", | ||||
"GoogleSearchEngine", | "GoogleSearchEngine", | ||||
@@ -104,7 +101,7 @@ class BingSearchEngine(_BaseSearchEngine): | |||||
auth = (key + ":" + key).encode("base64").replace("\n", "") | auth = (key + ":" + key).encode("base64").replace("\n", "") | ||||
self.opener.addheaders.append(("Authorization", "Basic " + auth)) | self.opener.addheaders.append(("Authorization", "Basic " + auth)) | ||||
def search(self, query): | |||||
def search(self, query: str) -> list[str]: | |||||
"""Do a Bing web search for *query*. | """Do a Bing web search for *query*. | ||||
Returns a list of URLs ranked by relevance (as determined by Bing). | Returns a list of URLs ranked by relevance (as determined by Bing). | ||||
@@ -142,7 +139,7 @@ class GoogleSearchEngine(_BaseSearchEngine): | |||||
name = "Google" | name = "Google" | ||||
def search(self, query): | |||||
def search(self, query: str) -> list[str]: | |||||
"""Do a Google web search for *query*. | """Do a Google web search for *query*. | ||||
Returns a list of URLs ranked by relevance (as determined by Google). | Returns a list of URLs ranked by relevance (as determined by Google). | ||||
@@ -153,7 +150,7 @@ class GoogleSearchEngine(_BaseSearchEngine): | |||||
params = { | params = { | ||||
"cx": self.cred["id"], | "cx": self.cred["id"], | ||||
"key": self.cred["key"], | "key": self.cred["key"], | ||||
"q": '"' + query.replace('"', "").encode("utf8") + '"', | |||||
"q": '"' + query.replace('"', "") + '"', | |||||
"alt": "json", | "alt": "json", | ||||
"num": str(self.count), | "num": str(self.count), | ||||
"safe": "off", | "safe": "off", | ||||
@@ -183,15 +180,17 @@ class YandexSearchEngine(_BaseSearchEngine): | |||||
def requirements(): | def requirements(): | ||||
return ["lxml.etree"] | return ["lxml.etree"] | ||||
def search(self, query): | |||||
def search(self, query: str) -> list[str]: | |||||
"""Do a Yandex web search for *query*. | """Do a Yandex web search for *query*. | ||||
Returns a list of URLs ranked by relevance (as determined by Yandex). | Returns a list of URLs ranked by relevance (as determined by Yandex). | ||||
Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors. | Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors. | ||||
""" | """ | ||||
import lxml.etree | |||||
domain = self.cred.get("proxy", "yandex.com") | domain = self.cred.get("proxy", "yandex.com") | ||||
url = f"https://{domain}/search/xml?" | url = f"https://{domain}/search/xml?" | ||||
query = re_sub(r"[^a-zA-Z0-9 ]", "", query).encode("utf8") | |||||
query = re.sub(r"[^a-zA-Z0-9 ]", "", query) | |||||
params = { | params = { | ||||
"user": self.cred["user"], | "user": self.cred["user"], | ||||
"key": self.cred["key"], | "key": self.cred["key"], | ||||
@@ -205,7 +204,7 @@ class YandexSearchEngine(_BaseSearchEngine): | |||||
result = self._open(url + urlencode(params)) | result = self._open(url + urlencode(params)) | ||||
try: | try: | ||||
data = lxml.etree.fromstring(result) | |||||
data = lxml.etree.fromstring(result) # type: ignore | |||||
return [elem.text for elem in data.xpath(".//url")] | return [elem.text for elem in data.xpath(".//url")] | ||||
except lxml.etree.Error as exc: | except lxml.etree.Error as exc: | ||||
raise SearchQueryError("Yandex XML parse error: " + str(exc)) | raise SearchQueryError("Yandex XML parse error: " + str(exc)) | ||||
@@ -35,7 +35,7 @@ import requests | |||||
from requests.cookies import RequestsCookieJar | from requests.cookies import RequestsCookieJar | ||||
from requests_oauthlib import OAuth1 | from requests_oauthlib import OAuth1 | ||||
from earwigbot import exceptions, importer | |||||
from earwigbot import exceptions | |||||
from earwigbot.wiki import constants | from earwigbot.wiki import constants | ||||
from earwigbot.wiki.category import Category | from earwigbot.wiki.category import Category | ||||
from earwigbot.wiki.constants import Service | from earwigbot.wiki.constants import Service | ||||
@@ -47,7 +47,11 @@ if typing.TYPE_CHECKING: | |||||
import pymysql.cursors | import pymysql.cursors | ||||
from pymysql.cursors import Cursor | from pymysql.cursors import Cursor | ||||
else: | else: | ||||
pymysql = importer.new("pymysql") | |||||
try: | |||||
import pymysql | |||||
import pymysql.cursors | |||||
except ModuleNotFoundError: | |||||
pymysql = None | |||||
__all__ = ["Site"] | __all__ = ["Site"] | ||||
@@ -711,11 +715,11 @@ class Site: | |||||
if "autoreconnect" not in args: | if "autoreconnect" not in args: | ||||
args["autoreconnect"] = True | args["autoreconnect"] = True | ||||
try: | |||||
return pymysql.connect(**args) | |||||
except ImportError: | |||||
e = "SQL querying requires the 'pymysql' package: https://pymysql.readthedocs.io/" | |||||
raise exceptions.SQLError(e) | |||||
if pymysql is None: | |||||
raise exceptions.SQLError( | |||||
"SQL querying requires the 'pymysql' package: https://pymysql.readthedocs.io/" | |||||
) | |||||
return pymysql.connect(**args) | |||||
def _get_service_order(self) -> list[Service]: | def _get_service_order(self) -> list[Service]: | ||||
""" | """ | ||||
@@ -731,6 +735,10 @@ class Site: | |||||
lag is also very high. self.SERVICE_SQL will not be included in the list if we | lag is also very high. self.SERVICE_SQL will not be included in the list if we | ||||
cannot form a proper SQL connection. | cannot form a proper SQL connection. | ||||
""" | """ | ||||
if pymysql is None: | |||||
self._sql_info_cache["usable"] = False | |||||
return [Service.API] | |||||
now = time.time() | now = time.time() | ||||
if now - self._sql_info_cache["lastcheck"] > 120: | if now - self._sql_info_cache["lastcheck"] > 120: | ||||
self._sql_info_cache["lastcheck"] = now | self._sql_info_cache["lastcheck"] = now | ||||
@@ -739,7 +747,7 @@ class Site: | |||||
self._sql_info_cache["replag"] = sqllag = self.get_replag() | self._sql_info_cache["replag"] = sqllag = self.get_replag() | ||||
except pymysql.Error as exc: | except pymysql.Error as exc: | ||||
raise exceptions.SQLError(str(exc)) | raise exceptions.SQLError(str(exc)) | ||||
except (exceptions.SQLError, ImportError): | |||||
except exceptions.SQLError: | |||||
self._sql_info_cache["usable"] = False | self._sql_info_cache["usable"] = False | ||||
return [Service.API] | return [Service.API] | ||||
self._sql_info_cache["usable"] = True | self._sql_info_cache["usable"] = True | ||||