@@ -23,14 +23,6 @@ config Package | |||
:members: | |||
:undoc-members: | |||
:mod:`ordered_yaml` Module | |||
.. automodule:: earwigbot.config.ordered_yaml | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`permissions` Module | |||
------------------------- | |||
@@ -30,13 +30,6 @@ earwigbot Package | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`lazy` Module | |||
.. automodule:: earwigbot.lazy | |||
:members: | |||
:undoc-members: | |||
:mod:`managers` Module | |||
---------------------- | |||
@@ -62,9 +62,6 @@ build-backend = "setuptools.build_meta" | |||
exclude = [ | |||
# TODO | |||
"src/earwigbot/commands", | |||
"src/earwigbot/config", | |||
"src/earwigbot/lazy.py", | |||
"src/earwigbot/irc", | |||
"src/earwigbot/tasks", | |||
"src/earwigbot/wiki/copyvios" | |||
] | |||
@@ -26,8 +26,17 @@ See :file:`README.rst` for an overview, or the :file:`docs/` directory for detai | |||
This documentation is also available `online <https://packages.python.org/earwigbot>`_. | |||
""" | |||
import typing | |||
__all__ = [ | |||
"bot", | |||
"cli", | |||
"commands", | |||
"config", | |||
"exceptions", | |||
"irc", | |||
"managers", | |||
"tasks", | |||
"wiki", | |||
] | |||
__author__ = "Ben Kurtovic" | |||
__copyright__ = "Copyright (C) 2009-2024 Ben Kurtovic" | |||
__license__ = "MIT License" | |||
@@ -54,30 +63,14 @@ if not __release__: | |||
finally: | |||
del _get_git_commit_id | |||
from earwigbot import lazy | |||
importer = lazy.LazyImporter() | |||
if typing.TYPE_CHECKING: | |||
from earwigbot import ( | |||
bot, | |||
cli, | |||
commands, | |||
config, | |||
exceptions, | |||
irc, | |||
managers, | |||
tasks, | |||
wiki, | |||
) | |||
else: | |||
bot = importer.new("earwigbot.bot") | |||
cli = importer.new("earwigbot.cli") | |||
commands = importer.new("earwigbot.commands") | |||
config = importer.new("earwigbot.config") | |||
exceptions = importer.new("earwigbot.exceptions") | |||
irc = importer.new("earwigbot.irc") | |||
managers = importer.new("earwigbot.managers") | |||
tasks = importer.new("earwigbot.tasks") | |||
wiki = importer.new("earwigbot.wiki") | |||
from earwigbot import ( | |||
bot, | |||
cli, | |||
commands, | |||
config, | |||
exceptions, | |||
irc, | |||
managers, | |||
tasks, | |||
wiki, | |||
) |
@@ -22,13 +22,8 @@ import base64 | |||
import hashlib | |||
import os | |||
from earwigbot import importer | |||
from earwigbot.commands import Command | |||
fernet = importer.new("cryptography.fernet") | |||
hashes = importer.new("cryptography.hazmat.primitives.hashes") | |||
pbkdf2 = importer.new("cryptography.hazmat.primitives.kdf.pbkdf2") | |||
class Crypt(Command): | |||
"""Provides hash functions with !hash (!hash list for supported algorithms) | |||
@@ -73,6 +68,16 @@ class Crypt(Command): | |||
return | |||
try: | |||
from cryptography import fernet | |||
from cryptography.hazmat.primitives import hashes | |||
from cryptography.hazmat.primitives.kdf import pbkdf2 | |||
except ModuleNotFoundError: | |||
self.reply( | |||
data, | |||
"This command requires the 'cryptography' package: https://cryptography.io/", | |||
) | |||
try: | |||
if data.command == "encrypt": | |||
salt = os.urandom(saltlen) | |||
kdf = pbkdf2.PBKDF2HMAC( | |||
@@ -101,10 +106,5 @@ class Crypt(Command): | |||
base64.urlsafe_b64encode(kdf.derive(key.encode())) | |||
) | |||
self.reply(data, f.decrypt(ciphertext).decode()) | |||
except ImportError: | |||
self.reply( | |||
data, | |||
"This command requires the 'cryptography' package: https://cryptography.io/", | |||
) | |||
except Exception as error: | |||
self.reply(data, f"{type(error).__name__}: {str(error)}") |
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -28,18 +28,12 @@ from os import mkdir, path | |||
import yaml | |||
from earwigbot import importer | |||
from earwigbot.config.formatter import BotFormatter | |||
from earwigbot.config.node import ConfigNode | |||
from earwigbot.config.ordered_yaml import OrderedLoader | |||
from earwigbot.config.permissions import PermissionsDB | |||
from earwigbot.config.script import ConfigScript | |||
from earwigbot.exceptions import NoConfigError | |||
fernet = importer.new("cryptography.fernet") | |||
hashes = importer.new("cryptography.hazmat.primitives.hashes") | |||
pbkdf2 = importer.new("cryptography.hazmat.primitives.kdf.pbkdf2") | |||
__all__ = ["BotConfig"] | |||
@@ -128,12 +122,11 @@ class BotConfig: | |||
def _load(self): | |||
"""Load data from our JSON config file (config.yml) into self._data.""" | |||
filename = self._config_path | |||
with open(filename) as fp: | |||
with open(self._config_path) as fp: | |||
try: | |||
self._data = yaml.load(fp, OrderedLoader) | |||
self._data = yaml.load(fp, yaml.CSafeLoader) | |||
except yaml.YAMLError: | |||
print(f"Error parsing config file {filename}:") | |||
print(f"Error parsing config file {self._config_path}:") | |||
raise | |||
def _setup_logging(self): | |||
@@ -276,9 +269,7 @@ class BotConfig: | |||
if not path.exists(self._config_path): | |||
self._handle_missing_config() | |||
self._load() | |||
if not self._data: | |||
self._handle_missing_config() | |||
self._load() | |||
assert self._data is not None | |||
self.components._load(self._data.get("components", OrderedDict())) | |||
self.wiki._load(self._data.get("wiki", OrderedDict())) | |||
@@ -291,6 +282,10 @@ class BotConfig: | |||
if self.is_encrypted(): | |||
if not self._decryption_cipher: | |||
try: | |||
from cryptography import fernet | |||
from cryptography.hazmat.primitives import hashes | |||
from cryptography.hazmat.primitives.kdf import pbkdf2 | |||
salt = self.metadata["salt"] | |||
kdf = pbkdf2.PBKDF2HMAC( | |||
algorithm=hashes.SHA256(), | |||
@@ -298,7 +293,7 @@ class BotConfig: | |||
salt=salt, | |||
iterations=ConfigScript.PBKDF_ROUNDS, | |||
) | |||
except ImportError: | |||
except ModuleNotFoundError: | |||
e = "Encryption requires the 'cryptography' package: https://cryptography.io/" | |||
raise NoConfigError(e) | |||
key = getpass("Enter key to decrypt bot passwords: ") | |||
@@ -352,6 +347,7 @@ class BotConfig: | |||
"week_day": week_day, | |||
} | |||
assert self._data is not None | |||
data = self._data.get("schedule", []) | |||
for event in data: | |||
do = True | |||
@@ -19,22 +19,21 @@ | |||
# SOFTWARE. | |||
import base64 | |||
from collections import OrderedDict | |||
__all__ = ["ConfigNode"] | |||
class ConfigNode: | |||
def __init__(self): | |||
self._data = OrderedDict() | |||
self._data = {} | |||
def __repr__(self): | |||
return self._data | |||
def __repr__(self) -> str: | |||
return repr(self._data) | |||
def __bool__(self): | |||
def __bool__(self) -> bool: | |||
return bool(self._data) | |||
def __len__(self): | |||
def __len__(self) -> int: | |||
return len(self._data) | |||
def __getitem__(self, key): | |||
@@ -1,108 +0,0 @@ | |||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
""" | |||
Based on: | |||
* https://gist.github.com/844388 | |||
* https://pyyaml.org/attachment/ticket/161/use_ordered_dict.py | |||
with modifications. | |||
""" | |||
from collections import OrderedDict | |||
import yaml | |||
__all__ = ["OrderedLoader", "OrderedDumper"] | |||
class OrderedLoader(yaml.Loader): | |||
"""A YAML loader that loads mappings into ordered dictionaries.""" | |||
def __init__(self, *args, **kwargs): | |||
super().__init__(*args, **kwargs) | |||
constructor = type(self).construct_yaml_map | |||
self.add_constructor("tag:yaml.org,2002:map", constructor) | |||
self.add_constructor("tag:yaml.org,2002:omap", constructor) | |||
def construct_yaml_map(self, node): | |||
data = OrderedDict() | |||
yield data | |||
value = self.construct_mapping(node) | |||
data.update(value) | |||
def construct_mapping(self, node, deep=False): | |||
if isinstance(node, yaml.MappingNode): | |||
self.flatten_mapping(node) | |||
else: | |||
raise yaml.constructor.ConstructorError( | |||
None, | |||
None, | |||
f"expected a mapping node, but found {node.id}", | |||
node.start_mark, | |||
) | |||
mapping = OrderedDict() | |||
for key_node, value_node in node.value: | |||
key = self.construct_object(key_node, deep=deep) | |||
try: | |||
hash(key) | |||
except TypeError as exc: | |||
raise yaml.constructor.ConstructorError( | |||
"while constructing a mapping", | |||
node.start_mark, | |||
f"found unacceptable key ({exc})", | |||
key_node.start_mark, | |||
) | |||
value = self.construct_object(value_node, deep=deep) | |||
mapping[key] = value | |||
return mapping | |||
class OrderedDumper(yaml.SafeDumper): | |||
"""A YAML dumper that dumps ordered dictionaries into mappings.""" | |||
def __init__(self, *args, **kwargs): | |||
super().__init__(*args, **kwargs) | |||
self.add_representer(OrderedDict, type(self).represent_dict) | |||
def represent_mapping(self, tag, mapping, flow_style=None): | |||
value = [] | |||
node = yaml.MappingNode(tag, value, flow_style=flow_style) | |||
if self.alias_key is not None: | |||
self.represented_objects[self.alias_key] = node | |||
best_style = True | |||
if hasattr(mapping, "items"): | |||
mapping = list(mapping.items()) | |||
for item_key, item_value in mapping: | |||
node_key = self.represent_data(item_key) | |||
node_value = self.represent_data(item_value) | |||
if not (isinstance(node_key, yaml.ScalarNode) and not node_key.style): | |||
best_style = False | |||
if not (isinstance(node_value, yaml.ScalarNode) and not node_value.style): | |||
best_style = False | |||
value.append((node_key, node_value)) | |||
if flow_style is None: | |||
if self.default_flow_style is not None: | |||
node.flow_style = self.default_flow_style | |||
else: | |||
node.flow_style = best_style | |||
return node |
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -19,36 +19,40 @@ | |||
# SOFTWARE. | |||
import base64 | |||
import getpass | |||
import os | |||
import os.path | |||
import re | |||
import stat | |||
import sys | |||
from collections import OrderedDict | |||
from getpass import getpass | |||
from os import chmod, makedirs, mkdir, path | |||
from textwrap import fill, wrap | |||
import textwrap | |||
import typing | |||
from typing import Any, Literal | |||
import yaml | |||
from earwigbot import exceptions, importer | |||
from earwigbot.config.ordered_yaml import OrderedDumper | |||
fernet = importer.new("cryptography.fernet") | |||
hashes = importer.new("cryptography.hazmat.primitives.hashes") | |||
pbkdf2 = importer.new("cryptography.hazmat.primitives.kdf.pbkdf2") | |||
from earwigbot import exceptions | |||
__all__ = ["ConfigScript"] | |||
RULES_TEMPLATE = """# -*- coding: utf-8 -*- | |||
RULES_TEMPLATE = """\ | |||
from earwigbot.bot import Bot | |||
from earwigbot.irc import RC | |||
def process(bot: Bot, rc: RC): | |||
\"\"\" | |||
Return a list of channels to report this event to. | |||
def process(bot, rc): | |||
\"\"\"Given a Bot() object and an RC() object, return a list of channels | |||
to report this event to. Also, start any wiki bot tasks within this | |||
function if necessary.\"\"\" | |||
Also, start any wiki bot tasks within this function if necessary. | |||
\"\"\" | |||
pass | |||
""" | |||
class RetryError(Exception): | |||
pass | |||
class ConfigScript: | |||
"""A script to guide a user through the creation of a new config file.""" | |||
@@ -58,17 +62,15 @@ class ConfigScript: | |||
def __init__(self, config): | |||
self.config = config | |||
self.data = OrderedDict( | |||
[ | |||
("metadata", OrderedDict()), | |||
("components", OrderedDict()), | |||
("wiki", OrderedDict()), | |||
("irc", OrderedDict()), | |||
("commands", OrderedDict()), | |||
("tasks", OrderedDict()), | |||
("schedule", []), | |||
] | |||
) | |||
self.data = { | |||
"metadata": {}, | |||
"components": {}, | |||
"wiki": {}, | |||
"irc": {}, | |||
"commands": {}, | |||
"tasks": {}, | |||
"schedule": [], | |||
} | |||
self._cipher = None | |||
self._wmf = False | |||
@@ -76,20 +78,28 @@ class ConfigScript: | |||
self._lang = None | |||
def _print(self, text): | |||
print(fill(re.sub(r"\s\s+", " ", text), self.WIDTH)) | |||
print(textwrap.fill(re.sub(r"\s\s+", " ", text), self.WIDTH)) | |||
def _print_no_nl(self, text): | |||
sys.stdout.write(fill(re.sub(r"\s\s+", " ", text), self.WIDTH)) | |||
sys.stdout.write(textwrap.fill(re.sub(r"\s\s+", " ", text), self.WIDTH)) | |||
sys.stdout.flush() | |||
def _pause(self): | |||
input(self.PROMPT + "Press enter to continue: ") | |||
def _ask(self, text, default=None, require=True): | |||
@typing.overload | |||
def _ask(self, text, default=None, require: Literal[True] = True) -> str: ... | |||
@typing.overload | |||
def _ask( | |||
self, text, default=None, require: Literal[False] = False | |||
) -> str | None: ... | |||
def _ask(self, text, default=None, require=True) -> str | None: | |||
text = self.PROMPT + text | |||
if default: | |||
text += f" \x1b[33m[{default}]\x1b[0m" | |||
lines = wrap(re.sub(r"\s\s+", " ", text), self.WIDTH) | |||
lines = textwrap.wrap(re.sub(r"\s\s+", " ", text), self.WIDTH) | |||
if len(lines) > 1: | |||
print("\n".join(lines[:-1])) | |||
while True: | |||
@@ -103,7 +113,7 @@ class ConfigScript: | |||
text += " \x1b[33m[Y/n]\x1b[0m" | |||
else: | |||
text += " \x1b[33m[y/N]\x1b[0m" | |||
lines = wrap(re.sub(r"\s\s+", " ", text), self.WIDTH) | |||
lines = textwrap.wrap(re.sub(r"\s\s+", " ", text), self.WIDTH) | |||
if len(lines) > 1: | |||
print("\n".join(lines[:-1])) | |||
while True: | |||
@@ -116,7 +126,7 @@ class ConfigScript: | |||
return False | |||
def _ask_pass(self, text, encrypt=True): | |||
password = getpass(self.PROMPT + text + " ") | |||
password = getpass.getpass(self.PROMPT + text + " ") | |||
if encrypt: | |||
return self._encrypt(password) | |||
return password | |||
@@ -128,7 +138,7 @@ class ConfigScript: | |||
return password | |||
def _ask_list(self, text): | |||
print(fill(re.sub(r"\s\s+", " ", self.PROMPT + text), self.WIDTH)) | |||
print(textwrap.fill(re.sub(r"\s\s+", " ", self.PROMPT + text), self.WIDTH)) | |||
print("[one item per line; blank line to end]:") | |||
result = [] | |||
while True: | |||
@@ -140,18 +150,24 @@ class ConfigScript: | |||
def _set_metadata(self): | |||
print() | |||
self.data["metadata"] = OrderedDict([("version", 1)]) | |||
self._print("""I can encrypt passwords stored in your config file in | |||
addition to preventing other users on your system from | |||
reading the file. Encryption is recommended if the bot | |||
is to run on a public server like Toolforge, but the | |||
need to enter a key every time you start the bot may be | |||
an inconvenience.""") | |||
self.data["metadata"]["encryptPasswords"] = False | |||
metadata: dict[str, Any] = {"version": 1} | |||
self.data["metadata"] = metadata | |||
self._print( | |||
"""I can encrypt passwords stored in your config file in addition to | |||
preventing other users on your system from reading the file. Encryption is | |||
recommended if the bot is to run on a public server like Toolforge, but the | |||
need to enter a key every time you start the bot may be an | |||
inconvenience.""" | |||
) | |||
metadata["encryptPasswords"] = False | |||
if self._ask_bool("Encrypt stored passwords?"): | |||
key = getpass(self.PROMPT + "Enter an encryption key: ") | |||
key = getpass.getpass(self.PROMPT + "Enter an encryption key: ") | |||
self._print_no_nl("Generating key...") | |||
try: | |||
from cryptography import fernet | |||
from cryptography.hazmat.primitives import hashes | |||
from cryptography.hazmat.primitives.kdf import pbkdf2 | |||
salt = os.urandom(16) | |||
kdf = pbkdf2.PBKDF2HMAC( | |||
algorithm=hashes.SHA256(), | |||
@@ -162,44 +178,52 @@ class ConfigScript: | |||
self._cipher = fernet.Fernet( | |||
base64.urlsafe_b64encode(kdf.derive(key.encode())) | |||
) | |||
except ImportError: | |||
except ModuleNotFoundError: | |||
print(" error!") | |||
self._print("""Encryption requires the 'cryptography' package: | |||
https://cryptography.io/""") | |||
self._print("""I will disable encryption for now; restart | |||
configuration after installing these packages if | |||
you want it.""") | |||
self._print( | |||
"Encryption requires the 'cryptography' package: https://cryptography.io/" | |||
) | |||
self._print( | |||
"""I will disable encryption for now; restart configuration after | |||
installing these packages if you want it.""" | |||
) | |||
self._pause() | |||
else: | |||
self.data["metadata"]["encryptPasswords"] = True | |||
self.data["metadata"]["salt"] = base64.b64encode(salt).decode() | |||
metadata["encryptPasswords"] = True | |||
metadata["salt"] = base64.b64encode(salt).decode() | |||
print(" done.") | |||
print() | |||
self._print("""The bot can temporarily store its logs in the logs/ | |||
subdirectory. Error logs are kept for a month whereas | |||
normal logs are kept for a week. If you disable this, | |||
the bot will still print logs to stdout.""") | |||
self._print( | |||
"""The bot can temporarily store its logs in the logs/ subdirectory. Error | |||
logs are kept for a month whereas normal logs are kept for a week. If you | |||
disable this, the bot will still print logs to stdout.""" | |||
) | |||
logging = self._ask_bool("Enable logging?") | |||
self.data["metadata"]["enableLogging"] = logging | |||
metadata["enableLogging"] = logging | |||
def _set_components(self): | |||
print() | |||
self._print("""The bot contains three separate components that can run | |||
independently of each other.""") | |||
self._print("""- The IRC front-end runs on a normal IRC server, like | |||
Libera, and expects users to interact with it through | |||
commands.""") | |||
self._print("""- The IRC watcher runs on a wiki recent-changes server, | |||
like irc.wikimedia.org, and listens for edits. Users | |||
cannot interact with this component. It can detect | |||
specific events and report them to "feed" channels on | |||
the front-end or start bot tasks.""") | |||
self._print("""- The wiki task scheduler runs wiki-editing bot tasks in | |||
separate threads at user-defined times through a | |||
cron-like interface. Tasks which are not scheduled can | |||
be started by the IRC watcher manually through the IRC | |||
front-end.""") | |||
self._print( | |||
"""The bot contains three separate components that can run independently of | |||
each other.""" | |||
) | |||
self._print( | |||
"""- The IRC front-end runs on a normal IRC server, like Libera, and | |||
expects users to interact with it through commands.""" | |||
) | |||
self._print( | |||
"""- The IRC watcher runs on a wiki recent-changes server, like | |||
irc.wikimedia.org, and listens for edits. Users cannot interact with this | |||
component. It can detect specific events and report them to "feed" channels | |||
on the front-end or start bot tasks.""" | |||
) | |||
self._print( | |||
"""- The wiki task scheduler runs wiki-editing bot tasks in separate | |||
threads at user-defined times through a cron-like interface. Tasks which | |||
are not scheduled can be started by the IRC watcher manually through the | |||
IRC front-end.""" | |||
) | |||
frontend = self._ask_bool("Enable the IRC front-end?") | |||
watcher = self._ask_bool("Enable the IRC watcher?") | |||
scheduler = self._ask_bool("Enable the wiki task scheduler?") | |||
@@ -214,17 +238,17 @@ class ConfigScript: | |||
site = self.config.bot.wiki.add_site(**kwargs) | |||
except exceptions.APIError as exc: | |||
print(" API error!") | |||
print("\x1b[31m" + exc.message + "\x1b[0m") | |||
print(f"\x1b[31m{exc}\x1b[0m") | |||
question = "Would you like to re-enter the site information?" | |||
if self._ask_bool(question): | |||
return self._set_wiki() | |||
raise RetryError() | |||
question = "This will cancel the setup process. Are you sure?" | |||
if self._ask_bool(question, default=False): | |||
raise exceptions.NoConfigError() | |||
return self._set_wiki() | |||
raise RetryError() | |||
except exceptions.LoginError as exc: | |||
print(" login error!") | |||
print("\x1b[31m" + exc.message + "\x1b[0m") | |||
print(f"\x1b[31m{exc}\x1b[0m") | |||
question = "Would you like to re-enter your login information?" | |||
if self._ask_bool(question): | |||
self.data["wiki"]["username"] = self._ask("Bot username:") | |||
@@ -235,10 +259,12 @@ class ConfigScript: | |||
password = self.data["wiki"]["password"] | |||
question = "Would you like to re-enter the site information?" | |||
if self._ask_bool(question): | |||
return self._set_wiki() | |||
raise RetryError() | |||
print() | |||
self._print("""Moving on. You can modify the login information | |||
stored in the bot's config in the future.""") | |||
self._print( | |||
"""Moving on. You can modify the login information stored in the bot's | |||
config in the future.""" | |||
) | |||
self.data["wiki"]["password"] = None # Clear so we don't login | |||
self.config.wiki._load(self.data["wiki"]) | |||
self._print_no_nl("Trying to connect to the site...") | |||
@@ -255,8 +281,9 @@ class ConfigScript: | |||
def _set_wiki(self): | |||
print() | |||
self._wmf = self._ask_bool("""Will this bot run on Wikimedia Foundation | |||
wikis, like Wikipedia?""") | |||
self._wmf = self._ask_bool( | |||
"Will this bot run on Wikimedia Foundation wikis, like Wikipedia?" | |||
) | |||
if self._wmf: | |||
msg = "Site project (e.g. 'wikipedia', 'wiktionary', 'wikimedia'):" | |||
self._proj = project = self._ask(msg, "wikipedia").lower() | |||
@@ -288,39 +315,32 @@ class ConfigScript: | |||
msg = "Will this bot run from the Wikimedia Tool Labs?" | |||
labs = self._ask_bool(msg, default=False) | |||
if labs: | |||
args = [ | |||
("host", "$1.labsdb"), | |||
("db", "$1_p"), | |||
("read_default_file", "~/replica.my.cnf"), | |||
] | |||
self.data["wiki"]["sql"] = OrderedDict(args) | |||
else: | |||
msg = "Will this bot run from the Wikimedia Toolserver?" | |||
toolserver = self._ask_bool(msg, default=False) | |||
if toolserver: | |||
args = [("host", "$1-p.rrdb.toolserver.org"), ("db", "$1_p")] | |||
self.data["wiki"]["sql"] = OrderedDict(args) | |||
self.data["wiki"]["sql"] = { | |||
"host": "$1.labsdb", | |||
"db": "$1_p", | |||
"read_default_file": "~/replica.my.cnf", | |||
} | |||
self.data["wiki"]["shutoff"] = {} | |||
msg = "Would you like to enable an automatic shutoff page for the bot?" | |||
if self._ask_bool(msg): | |||
print() | |||
self._print("""The page title can contain two wildcards: $1 will be | |||
substituted with the bot's username, and $2 with the | |||
current task number. This can be used to implement a | |||
separate shutoff page for each task.""") | |||
self._print( | |||
"""The page title can contain two wildcards: $1 will be substituted | |||
with the bot's username, and $2 with the current task number. This can | |||
be used to implement a separate shutoff page for each task.""" | |||
) | |||
page = self._ask("Page title:", "User:$1/Shutoff") | |||
msg = "Page content to indicate the bot is *not* shut off:" | |||
disabled = self._ask(msg, "run") | |||
args = [("page", page), ("disabled", disabled)] | |||
self.data["wiki"]["shutoff"] = OrderedDict(args) | |||
self.data["wiki"]["shutoff"] = {"page": page, "disabled": disabled} | |||
self.data["wiki"]["search"] = {} | |||
def _set_irc(self): | |||
if self.data["components"]["irc_frontend"]: | |||
print() | |||
frontend = self.data["irc"]["frontend"] = OrderedDict() | |||
frontend = self.data["irc"]["frontend"] = {} | |||
frontend["host"] = self._ask( | |||
"Hostname of the frontend's IRC server:", "irc.libera.chat" | |||
) | |||
@@ -339,14 +359,14 @@ class ConfigScript: | |||
chan_question = "Frontend channels to join by default:" | |||
frontend["channels"] = self._ask_list(chan_question) | |||
print() | |||
self._print("""The bot keeps a database of its admins (users who | |||
can use certain sensitive commands) and owners | |||
(users who can quit the bot and modify its access | |||
list), identified by nick, ident, and/or hostname. | |||
Hostname is the most secure option since it cannot | |||
be easily spoofed. If you have a cloak, this will | |||
probably look like 'wikipedia/Username' or | |||
'user/nickname'.""") | |||
self._print( | |||
"""The bot keeps a database of its admins (users who can use certain | |||
sensitive commands) and owners (users who can quit the bot and modify | |||
its access list), identified by nick, ident, and/or hostname. Hostname | |||
is the most secure option since it cannot be easily spoofed. If you | |||
have a cloak, this will probably look like 'wikipedia/Username' or | |||
'user/nickname'.""" | |||
) | |||
host = self._ask("Your hostname on the frontend:", require=False) | |||
if host: | |||
permdb = self.config._permissions | |||
@@ -358,7 +378,7 @@ class ConfigScript: | |||
if self.data["components"]["irc_watcher"]: | |||
print() | |||
watcher = self.data["irc"]["watcher"] = OrderedDict() | |||
watcher = self.data["irc"]["watcher"] = {} | |||
if self._wmf: | |||
watcher["host"] = "irc.wikimedia.org" | |||
watcher["port"] = 6667 | |||
@@ -386,14 +406,14 @@ class ConfigScript: | |||
chan_question = "Watcher channels to join by default:" | |||
watcher["channels"] = self._ask_list(chan_question) | |||
print() | |||
self._print("""I am now creating a blank 'rules.py' file, which | |||
will determine how the bot handles messages received | |||
from the IRC watcher. It contains a process() | |||
function that takes a Bot object (allowing you to | |||
start tasks) and an RC object (storing the message | |||
from the watcher). See the documentation for | |||
details.""") | |||
with open(path.join(self.config.root_dir, "rules.py"), "w") as fp: | |||
self._print( | |||
"""I am now creating a blank 'rules.py' file, which will determine how | |||
the bot handles messages received from the IRC watcher. It contains a | |||
process() function that takes a Bot object (allowing you to start | |||
tasks) and an RC object (storing the message from the watcher). See the | |||
documentation for details.""" | |||
) | |||
with open(os.path.join(self.config.root_dir, "rules.py"), "w") as fp: | |||
fp.write(RULES_TEMPLATE) | |||
self._pause() | |||
@@ -403,47 +423,55 @@ class ConfigScript: | |||
def _set_commands(self): | |||
print() | |||
msg = """Would you like to disable the default IRC commands? You can | |||
fine-tune which commands are disabled later on.""" | |||
msg = """Would you like to disable the default IRC commands? You can fine-tune | |||
which commands are disabled later on.""" | |||
if not self.data["components"]["irc_frontend"] or self._ask_bool( | |||
msg, default=False | |||
): | |||
self.data["commands"]["disable"] = True | |||
print() | |||
self._print("""I am now creating the 'commands/' directory, where you | |||
can place custom IRC commands and plugins. Creating your | |||
own commands is described in the documentation.""") | |||
mkdir(path.join(self.config.root_dir, "commands")) | |||
self._print( | |||
"""I am now creating the 'commands/' directory, where you can place custom | |||
IRC commands and plugins. Creating your own commands is described in the | |||
documentation.""" | |||
) | |||
os.mkdir(os.path.join(self.config.root_dir, "commands")) | |||
self._pause() | |||
def _set_tasks(self): | |||
print() | |||
self._print("""I am now creating the 'tasks/' directory, where you can | |||
place custom bot tasks and plugins. Creating your own | |||
tasks is described in the documentation.""") | |||
mkdir(path.join(self.config.root_dir, "tasks")) | |||
self._print( | |||
"""I am now creating the 'tasks/' directory, where you can place custom bot | |||
tasks and plugins. Creating your own tasks is described in the | |||
documentation.""" | |||
) | |||
os.mkdir(os.path.join(self.config.root_dir, "tasks")) | |||
self._pause() | |||
def _set_schedule(self): | |||
print() | |||
self._print("""The final section of your config file, 'schedule', is a | |||
list of bot tasks to be started by the wiki scheduler. | |||
Each entry contains cron-like time quantifiers and a | |||
list of tasks. For example, the following starts the | |||
'foobot' task every hour on the half-hour:""") | |||
self._print( | |||
"""The final section of your config file, 'schedule', is a list of bot | |||
tasks to be started by the wiki scheduler. Each entry contains cron-like | |||
time quantifiers and a list of tasks. For example, the following starts the | |||
'foobot' task every hour on the half-hour:""" | |||
) | |||
print("\x1b[33mschedule:") | |||
print(" - minute: 30") | |||
print(" tasks:") | |||
print(" - foobot\x1b[0m") | |||
self._print("""The following starts the 'barbot' task with the keyword | |||
arguments 'action="baz"' every Monday at 05:00 UTC:""") | |||
self._print( | |||
"""The following starts the 'barbot' task with the keyword arguments | |||
'action="baz"' every Monday at 05:00 UTC:""" | |||
) | |||
print("\x1b[33m - week_day: 1") | |||
print(" hour: 5") | |||
print(" tasks:") | |||
print(' - ["barbot", {"action": "baz"}]\x1b[0m') | |||
self._print("""The full list of quantifiers is minute, hour, month_day, | |||
month, and week_day. See the documentation for more | |||
information.""") | |||
self._print( | |||
"""The full list of quantifiers is minute, hour, month_day, month, and | |||
week_day. See the documentation for more information.""" | |||
) | |||
self._pause() | |||
def _save(self): | |||
@@ -451,7 +479,7 @@ class ConfigScript: | |||
yaml.dump( | |||
self.data, | |||
stream, | |||
OrderedDumper, | |||
yaml.CSafeDumper, | |||
indent=4, | |||
allow_unicode=True, | |||
default_flow_style=False, | |||
@@ -460,19 +488,24 @@ class ConfigScript: | |||
def make_new(self): | |||
"""Make a new config file based on the user's input.""" | |||
try: | |||
makedirs(path.dirname(self.config.path)) | |||
os.makedirs(os.path.dirname(self.config.path)) | |||
except OSError as exc: | |||
if exc.errno != 17: | |||
raise | |||
try: | |||
open(self.config.path, "w").close() | |||
chmod(self.config.path, stat.S_IRUSR | stat.S_IWUSR) | |||
os.chmod(self.config.path, stat.S_IRUSR | stat.S_IWUSR) | |||
except OSError: | |||
print("I can't seem to write to the config file:") | |||
raise | |||
self._set_metadata() | |||
self._set_components() | |||
self._set_wiki() | |||
while True: | |||
try: | |||
self._set_wiki() | |||
break | |||
except RetryError: | |||
continue | |||
components = self.data["components"] | |||
if components["irc_frontend"] or components["irc_watcher"]: | |||
self._set_irc() | |||
@@ -481,12 +514,12 @@ class ConfigScript: | |||
if components["wiki_scheduler"]: | |||
self._set_schedule() | |||
print() | |||
self._print("""I am now saving config.yml with your settings. YAML is a | |||
relatively straightforward format and you should be able | |||
to update these settings in the future when necessary. | |||
I will start the bot at your signal. Feel free to | |||
contact me at wikipedia.earwig@gmail.com if you have any | |||
questions.""") | |||
self._print( | |||
"""I am now saving config.yml with your settings. YAML is a relatively | |||
straightforward format and you should be able to update these settings in | |||
the future when necessary. I will start the bot at your signal. Feel free | |||
to contact me at wikipedia.earwig@gmail.com if you have any questions.""" | |||
) | |||
self._save() | |||
if not self._ask_bool("Start the bot now?"): | |||
exit() |
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -18,8 +18,10 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from earwigbot.irc.connection import * | |||
from earwigbot.irc.data import * | |||
from earwigbot.irc.frontend import * | |||
from earwigbot.irc.rc import * | |||
from earwigbot.irc.watcher import * | |||
__all__ = ["Data", "Frontend", "IRCConnection", "RC", "Watcher"] | |||
from earwigbot.irc.connection import IRCConnection | |||
from earwigbot.irc.data import Data | |||
from earwigbot.irc.frontend import Frontend | |||
from earwigbot.irc.rc import RC | |||
from earwigbot.irc.watcher import Watcher |
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -18,14 +18,14 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
__all__ = ["IRCConnection"] | |||
import socket | |||
from threading import Lock | |||
from time import sleep, time | |||
from earwigbot.exceptions import BrokenSocketError | |||
__all__ = ["IRCConnection"] | |||
class IRCConnection: | |||
"""Interface with an IRC server.""" | |||
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -18,10 +18,10 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import re | |||
__all__ = ["Data"] | |||
import re | |||
class Data: | |||
"""Store data from an individual line received on IRC.""" | |||
@@ -78,6 +78,7 @@ class Data: | |||
bot's name); self.is_command will be set to True, and self.trigger will | |||
store the trigger string. Otherwise, is_command will be set to False. | |||
""" | |||
assert self.msg is not None | |||
self._args = self.msg.strip().split() | |||
try: | |||
@@ -87,16 +88,16 @@ class Data: | |||
return | |||
# e.g. "!command>user arg1 arg2" | |||
if ">" in self.command: | |||
if ">" in self._command: | |||
command_uc, self._reply_nick = command_uc.split(">", 1) | |||
self._command = command_uc.lower() | |||
if self.command.startswith("!") or self.command.startswith("."): | |||
if self._command.startswith("!") or self._command.startswith("."): | |||
# e.g. "!command arg1 arg2" | |||
self._is_command = True | |||
self._trigger = self.command[0] | |||
self._command = self.command[1:] # Strip the "!" or "." | |||
elif re.match(rf"{re.escape(self.my_nick)}\W*?$", self.command, re.U): | |||
self._trigger = self._command[0] | |||
self._command = self._command[1:] # Strip the "!" or "." | |||
elif re.match(rf"{re.escape(self.my_nick)}\W*?$", self._command, re.U): | |||
# e.g. "EarwigBot, command arg1 arg2" | |||
self._is_command = True | |||
self._trigger = self.my_nick | |||
@@ -110,7 +111,7 @@ class Data: | |||
if self.args: | |||
self.args[-1] = self.args[-1][:-1] | |||
else: | |||
self._command = self.command[:-1] | |||
self._command = self._command[:-1] | |||
except IndexError: | |||
pass | |||
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2021 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -18,11 +18,11 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from time import sleep | |||
__all__ = ["Frontend"] | |||
from earwigbot.irc import Data, IRCConnection | |||
import time | |||
__all__ = ["Frontend"] | |||
from earwigbot.irc import Data, IRCConnection | |||
class Frontend(IRCConnection): | |||
@@ -121,10 +121,11 @@ class Frontend(IRCConnection): | |||
elif line[1] == "NOTICE": | |||
data = Data(self.nick, line, msgtype="NOTICE") | |||
if self._auth_wait and data.nick == self.NICK_SERVICES: | |||
assert data.msg is not None | |||
if data.msg.startswith("This nickname is registered."): | |||
return | |||
self._auth_wait = False | |||
sleep(2) # Wait for hostname change to propagate | |||
time.sleep(2) # Wait for hostname change to propagate | |||
self._join_channels() | |||
elif line[1] == "KICK": | |||
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2021 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -18,19 +18,19 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import re | |||
__all__ = ["RC"] | |||
import re | |||
class RC: | |||
"""Store data from an event received from our IRC watcher.""" | |||
re_color = re.compile("\x03([0-9]{1,2}(,[0-9]{1,2})?)?") | |||
re_edit = re.compile( | |||
"\A\[\[(.*?)\]\]\s(.*?)\s(https?://.*?)\s\*\s(.*?)\s\*\s(.*?)\Z" | |||
r"\A\[\[(.*?)\]\]\s(.*?)\s(https?://.*?)\s\*\s(.*?)\s\*\s(.*?)\Z" | |||
) | |||
re_log = re.compile("\A\[\[(.*?)\]\]\s(.*?)\s\s\*\s(.*?)\s\*\s(.*?)\Z") | |||
re_log = re.compile(r"\A\[\[(.*?)\]\]\s(.*?)\s\s\*\s(.*?)\s\*\s(.*?)\Z") | |||
pretty_edit = "\x02New {0}\x0f: \x0314[[\x0307{1}\x0314]]\x0306 * \x0303{2}\x0306 * \x0302{3}\x0306 * \x0310{4}" | |||
pretty_log = "\x02New {0}\x0f: \x0303{1}\x0306 * \x0302{2}\x0306 * \x0310{3}" | |||
@@ -1,100 +0,0 @@ | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
""" | |||
Implements a hierarchy of importing classes as defined in `PEP 302 | |||
<https://www.python.org/dev/peps/pep-0302/>`_ to load modules in a safe yet lazy | |||
manner, so that they can be referred to by name but are not actually loaded | |||
until they are used (i.e. their attributes are read or modified). | |||
""" | |||
import importlib | |||
import sys | |||
from threading import RLock | |||
from types import ModuleType | |||
__all__ = ["LazyImporter"] | |||
_real_get = ModuleType.__getattribute__ | |||
_lazy_init_lock = RLock() | |||
def _create_failing_get(exc): | |||
def _fail(self, attr): | |||
raise exc | |||
return _fail | |||
def _mock_get(self, attr): | |||
with _real_get(self, "_lock"): | |||
if _real_get(self, "_unloaded"): | |||
type(self)._unloaded = False | |||
try: | |||
importlib.reload(self) | |||
except ImportError as exc: | |||
type(self).__getattribute__ = _create_failing_get(exc) | |||
del type(self)._lock | |||
raise | |||
type(self).__getattribute__ = _real_get | |||
del type(self)._lock | |||
return _real_get(self, attr) | |||
class _LazyModule(type): | |||
def __new__(cls, name): | |||
with _lazy_init_lock: | |||
if name not in sys.modules: | |||
attributes = { | |||
"__name__": name, | |||
"__getattribute__": _mock_get, | |||
"_unloaded": True, | |||
"_lock": RLock(), | |||
} | |||
parents = (ModuleType,) | |||
klass = type.__new__(cls, "module", parents, attributes) | |||
sys.modules[name] = klass(name) | |||
if "." in name: # Also ensure the parent exists | |||
_LazyModule(name.rsplit(".", 1)[0]) | |||
return sys.modules[name] | |||
class LazyImporter: | |||
"""An importer for modules that are loaded lazily. | |||
This inserts itself into :py:data:`sys.meta_path`, storing a dictionary of | |||
:py:class:`_LazyModule`\ s (which is added to with :py:meth:`new`). | |||
""" | |||
def __init__(self): | |||
self._modules = {} | |||
sys.meta_path.append(self) | |||
def new(self, name): | |||
module = _LazyModule(name) | |||
self._modules[name] = module | |||
return module | |||
def find_module(self, fullname, path=None): | |||
if fullname in self._modules and fullname not in sys.modules: | |||
return self | |||
def load_module(self, fullname): | |||
return self._modules.pop(fullname) |
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -18,7 +18,7 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from time import sleep | |||
import time | |||
from urllib.request import build_opener | |||
from earwigbot import exceptions | |||
@@ -73,7 +73,7 @@ class CopyvioMixIn: | |||
for dep in klass.requirements(): | |||
try: | |||
__import__(dep).__name__ | |||
except (ImportError, AttributeError): | |||
except (ModuleNotFoundError, AttributeError): | |||
e = "Missing a required dependency ({}) for the {} engine" | |||
e = e.format(dep, engine) | |||
raise exceptions.UnsupportedSearchEngineError(e) | |||
@@ -173,7 +173,7 @@ class CopyvioMixIn: | |||
self._logger.debug(log.format(self.title, searcher.name, chunk)) | |||
workspace.enqueue(searcher.search(chunk)) | |||
num_queries += 1 | |||
sleep(1) | |||
time.sleep(1) | |||
workspace.wait() | |||
result = workspace.get_result(num_queries) | |||
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -19,10 +19,10 @@ | |||
# SOFTWARE. | |||
import re | |||
import sqlite3 as sqlite | |||
from threading import Lock | |||
from time import time | |||
from urllib.parse import urlparse | |||
import sqlite3 | |||
import threading | |||
import time | |||
import urllib.parse | |||
from earwigbot import exceptions | |||
@@ -60,7 +60,7 @@ class ExclusionsDB: | |||
self._sitesdb = sitesdb | |||
self._dbfile = dbfile | |||
self._logger = logger | |||
self._db_access_lock = Lock() | |||
self._db_access_lock = threading.Lock() | |||
def __repr__(self): | |||
"""Return the canonical string representation of the ExclusionsDB.""" | |||
@@ -84,7 +84,7 @@ class ExclusionsDB: | |||
for page in pages: | |||
sources.append((sitename, page)) | |||
with sqlite.connect(self._dbfile) as conn: | |||
with sqlite3.connect(self._dbfile) as conn: | |||
conn.executescript(script) | |||
conn.executemany(query, sources) | |||
@@ -139,7 +139,7 @@ class ExclusionsDB: | |||
site = self._sitesdb.get_site("enwiki") | |||
else: | |||
site = self._sitesdb.get_site(sitename) | |||
with self._db_access_lock, sqlite.connect(self._dbfile) as conn: | |||
with self._db_access_lock, sqlite3.connect(self._dbfile) as conn: | |||
urls = set() | |||
for (source,) in conn.execute(query1, (sitename,)): | |||
urls |= self._load_source(site, source) | |||
@@ -150,17 +150,17 @@ class ExclusionsDB: | |||
conn.execute(query3, (sitename, url)) | |||
conn.executemany(query4, [(sitename, url) for url in urls]) | |||
if conn.execute(query5, (sitename,)).fetchone(): | |||
conn.execute(query6, (int(time()), sitename)) | |||
conn.execute(query6, (int(time.time()), sitename)) | |||
else: | |||
conn.execute(query7, (sitename, int(time()))) | |||
conn.execute(query7, (sitename, int(time.time()))) | |||
def _get_last_update(self, sitename): | |||
"""Return the UNIX timestamp of the last time the db was updated.""" | |||
query = "SELECT update_time FROM updates WHERE update_sitename = ?" | |||
with self._db_access_lock, sqlite.connect(self._dbfile) as conn: | |||
with self._db_access_lock, sqlite3.connect(self._dbfile) as conn: | |||
try: | |||
result = conn.execute(query, (sitename,)).fetchone() | |||
except sqlite.OperationalError: | |||
except sqlite3.OperationalError: | |||
self._create() | |||
return 0 | |||
return result[0] if result else 0 | |||
@@ -174,7 +174,7 @@ class ExclusionsDB: | |||
after 12 hours. | |||
""" | |||
max_staleness = 60 * 60 * (12 if sitename == "all" else 48) | |||
time_since_update = int(time() - self._get_last_update(sitename)) | |||
time_since_update = int(time.time() - self._get_last_update(sitename)) | |||
if force or time_since_update > max_staleness: | |||
log = "Updating stale database: {0} (last updated {1} seconds ago)" | |||
self._logger.info(log.format(sitename, time_since_update)) | |||
@@ -191,10 +191,10 @@ class ExclusionsDB: | |||
Return ``True`` if the URL is in the database, or ``False`` otherwise. | |||
""" | |||
normalized = re.sub(_RE_STRIP_PREFIX, "", url.lower()) | |||
parsed = urlparse(url.lower()) | |||
parsed = urllib.parse.urlparse(url.lower()) | |||
query = """SELECT exclusion_url FROM exclusions | |||
WHERE exclusion_sitename = ? OR exclusion_sitename = ?""" | |||
with self._db_access_lock, sqlite.connect(self._dbfile) as conn: | |||
with self._db_access_lock, sqlite3.connect(self._dbfile) as conn: | |||
for (excl,) in conn.execute(query, (sitename, "all")): | |||
excl = excl.lower() | |||
if excl.startswith("*."): | |||
@@ -231,7 +231,7 @@ class ExclusionsDB: | |||
certain HTML tag attributes (``"href"`` and ``"src"``). | |||
""" | |||
site = page.site | |||
path = urlparse(page.url).path | |||
path = urllib.parse.urlparse(page.url).path | |||
roots = [site.domain] | |||
scripts = ["index.php", "load.php", "api.php"] | |||
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -18,7 +18,7 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from re import UNICODE, sub | |||
import re | |||
__all__ = ["EMPTY", "EMPTY_INTERSECTION", "MarkovChain", "MarkovChainIntersection"] | |||
@@ -38,7 +38,7 @@ class MarkovChain: | |||
def _build(self): | |||
"""Build and return the Markov chain from the input text.""" | |||
padding = self.degree - 1 | |||
words = sub(r"[^\w\s-]", "", self.text.lower(), flags=UNICODE).split() | |||
words = re.sub(r"[^\w\s-]", "", self.text.lower(), flags=re.UNICODE).split() | |||
words = ([self.START] * padding) + words + ([self.END] * padding) | |||
chain = {} | |||
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -18,24 +18,17 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import io | |||
import json | |||
import os.path | |||
import re | |||
import urllib.parse | |||
import urllib.request | |||
from io import StringIO | |||
from os import path | |||
import mwparserfromhell | |||
from earwigbot import importer | |||
from earwigbot.exceptions import ParserExclusionError, ParserRedirectError | |||
bs4 = importer.new("bs4") | |||
nltk = importer.new("nltk") | |||
converter = importer.new("pdfminer.converter") | |||
pdfinterp = importer.new("pdfminer.pdfinterp") | |||
pdfpage = importer.new("pdfminer.pdfpage") | |||
__all__ = ["ArticleTextParser", "get_parser"] | |||
@@ -101,9 +94,10 @@ class ArticleTextParser(_BaseTextParser): | |||
def _get_tokenizer(self): | |||
"""Return a NLTK punctuation tokenizer for the article's language.""" | |||
import nltk | |||
def datafile(lang): | |||
return "file:" + path.join( | |||
return "file:" + os.path.join( | |||
self._args["nltk_dir"], "tokenizers", "punkt", lang + ".pickle" | |||
) | |||
@@ -213,11 +207,11 @@ class ArticleTextParser(_BaseTextParser): | |||
elif len(chunks) % 5 == 1: | |||
chunk = sentences.pop() # Pop from end | |||
elif len(chunks) % 5 == 2: | |||
chunk = sentences.pop(len(sentences) / 2) # Pop from Q2 | |||
chunk = sentences.pop(len(sentences) // 2) # Pop from Q2 | |||
elif len(chunks) % 5 == 3: | |||
chunk = sentences.pop(len(sentences) / 4) # Pop from Q1 | |||
chunk = sentences.pop(len(sentences) // 4) # Pop from Q1 | |||
else: | |||
chunk = sentences.pop(3 * len(sentences) / 4) # Pop from Q3 | |||
chunk = sentences.pop(3 * len(sentences) // 4) # Pop from Q3 | |||
chunks.append(chunk) | |||
return chunks | |||
@@ -256,6 +250,8 @@ class _HTMLParser(_BaseTextParser): | |||
@staticmethod | |||
def _get_soup(text): | |||
"""Parse some text using BeautifulSoup.""" | |||
import bs4 | |||
try: | |||
return bs4.BeautifulSoup(text, "lxml") | |||
except ValueError: | |||
@@ -263,6 +259,7 @@ class _HTMLParser(_BaseTextParser): | |||
def _clean_soup(self, soup): | |||
"""Clean a BeautifulSoup tree of invisible tags.""" | |||
import bs4 | |||
def is_comment(text): | |||
return isinstance(text, bs4.element.Comment) | |||
@@ -353,21 +350,23 @@ class _PDFParser(_BaseTextParser): | |||
def parse(self): | |||
"""Return extracted text from the PDF.""" | |||
output = StringIO() | |||
from pdfminer import converter, pdfinterp, pdfpage | |||
output = io.StringIO() | |||
manager = pdfinterp.PDFResourceManager() | |||
conv = converter.TextConverter(manager, output) | |||
interp = pdfinterp.PDFPageInterpreter(manager, conv) | |||
try: | |||
pages = pdfpage.PDFPage.get_pages(StringIO(self.text)) | |||
pages = pdfpage.PDFPage.get_pages(io.StringIO(self.text)) | |||
for page in pages: | |||
interp.process_page(page) | |||
except Exception: # pylint: disable=broad-except | |||
return output.getvalue().decode("utf8") | |||
return output.getvalue() | |||
finally: | |||
conv.close() | |||
value = output.getvalue().decode("utf8") | |||
value = output.getvalue() | |||
for orig, new in self.substitutions: | |||
value = value.replace(orig, new) | |||
return re.sub(r"\n\n+", "\n", value).strip() | |||
@@ -380,7 +379,9 @@ class _PlainTextParser(_BaseTextParser): | |||
def parse(self): | |||
"""Unicode-ify and strip whitespace from the plain text document.""" | |||
converted = bs4.UnicodeDammit(self.text).unicode_markup | |||
from bs4.dammit import UnicodeDammit | |||
converted = UnicodeDammit(self.text).unicode_markup | |||
return converted.strip() if converted else "" | |||
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2009-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -18,18 +18,15 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import re | |||
from gzip import GzipFile | |||
from io import StringIO | |||
from json import loads | |||
from re import sub as re_sub | |||
from urllib.error import URLError | |||
from urllib.parse import urlencode | |||
from earwigbot import importer | |||
from earwigbot.exceptions import SearchQueryError | |||
lxml = importer.new("lxml") | |||
__all__ = [ | |||
"BingSearchEngine", | |||
"GoogleSearchEngine", | |||
@@ -104,7 +101,7 @@ class BingSearchEngine(_BaseSearchEngine): | |||
auth = (key + ":" + key).encode("base64").replace("\n", "") | |||
self.opener.addheaders.append(("Authorization", "Basic " + auth)) | |||
def search(self, query): | |||
def search(self, query: str) -> list[str]: | |||
"""Do a Bing web search for *query*. | |||
Returns a list of URLs ranked by relevance (as determined by Bing). | |||
@@ -142,7 +139,7 @@ class GoogleSearchEngine(_BaseSearchEngine): | |||
name = "Google" | |||
def search(self, query): | |||
def search(self, query: str) -> list[str]: | |||
"""Do a Google web search for *query*. | |||
Returns a list of URLs ranked by relevance (as determined by Google). | |||
@@ -153,7 +150,7 @@ class GoogleSearchEngine(_BaseSearchEngine): | |||
params = { | |||
"cx": self.cred["id"], | |||
"key": self.cred["key"], | |||
"q": '"' + query.replace('"', "").encode("utf8") + '"', | |||
"q": '"' + query.replace('"', "") + '"', | |||
"alt": "json", | |||
"num": str(self.count), | |||
"safe": "off", | |||
@@ -183,15 +180,17 @@ class YandexSearchEngine(_BaseSearchEngine): | |||
def requirements(): | |||
return ["lxml.etree"] | |||
def search(self, query): | |||
def search(self, query: str) -> list[str]: | |||
"""Do a Yandex web search for *query*. | |||
Returns a list of URLs ranked by relevance (as determined by Yandex). | |||
Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors. | |||
""" | |||
import lxml.etree | |||
domain = self.cred.get("proxy", "yandex.com") | |||
url = f"https://{domain}/search/xml?" | |||
query = re_sub(r"[^a-zA-Z0-9 ]", "", query).encode("utf8") | |||
query = re.sub(r"[^a-zA-Z0-9 ]", "", query) | |||
params = { | |||
"user": self.cred["user"], | |||
"key": self.cred["key"], | |||
@@ -205,7 +204,7 @@ class YandexSearchEngine(_BaseSearchEngine): | |||
result = self._open(url + urlencode(params)) | |||
try: | |||
data = lxml.etree.fromstring(result) | |||
data = lxml.etree.fromstring(result) # type: ignore | |||
return [elem.text for elem in data.xpath(".//url")] | |||
except lxml.etree.Error as exc: | |||
raise SearchQueryError("Yandex XML parse error: " + str(exc)) | |||
@@ -35,7 +35,7 @@ import requests | |||
from requests.cookies import RequestsCookieJar | |||
from requests_oauthlib import OAuth1 | |||
from earwigbot import exceptions, importer | |||
from earwigbot import exceptions | |||
from earwigbot.wiki import constants | |||
from earwigbot.wiki.category import Category | |||
from earwigbot.wiki.constants import Service | |||
@@ -47,7 +47,11 @@ if typing.TYPE_CHECKING: | |||
import pymysql.cursors | |||
from pymysql.cursors import Cursor | |||
else: | |||
pymysql = importer.new("pymysql") | |||
try: | |||
import pymysql | |||
import pymysql.cursors | |||
except ModuleNotFoundError: | |||
pymysql = None | |||
__all__ = ["Site"] | |||
@@ -711,11 +715,11 @@ class Site: | |||
if "autoreconnect" not in args: | |||
args["autoreconnect"] = True | |||
try: | |||
return pymysql.connect(**args) | |||
except ImportError: | |||
e = "SQL querying requires the 'pymysql' package: https://pymysql.readthedocs.io/" | |||
raise exceptions.SQLError(e) | |||
if pymysql is None: | |||
raise exceptions.SQLError( | |||
"SQL querying requires the 'pymysql' package: https://pymysql.readthedocs.io/" | |||
) | |||
return pymysql.connect(**args) | |||
def _get_service_order(self) -> list[Service]: | |||
""" | |||
@@ -731,6 +735,10 @@ class Site: | |||
lag is also very high. self.SERVICE_SQL will not be included in the list if we | |||
cannot form a proper SQL connection. | |||
""" | |||
if pymysql is None: | |||
self._sql_info_cache["usable"] = False | |||
return [Service.API] | |||
now = time.time() | |||
if now - self._sql_info_cache["lastcheck"] > 120: | |||
self._sql_info_cache["lastcheck"] = now | |||
@@ -739,7 +747,7 @@ class Site: | |||
self._sql_info_cache["replag"] = sqllag = self.get_replag() | |||
except pymysql.Error as exc: | |||
raise exceptions.SQLError(str(exc)) | |||
except (exceptions.SQLError, ImportError): | |||
except exceptions.SQLError: | |||
self._sql_info_cache["usable"] = False | |||
return [Service.API] | |||
self._sql_info_cache["usable"] = True | |||