A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

474 lines
21 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2009-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. import base64
  23. from collections import OrderedDict
  24. from getpass import getpass
  25. import os
  26. from os import chmod, makedirs, mkdir, path
  27. import re
  28. import stat
  29. import sys
  30. from textwrap import fill, wrap
  31. import yaml
  32. from earwigbot import exceptions, importer
  33. from earwigbot.config.ordered_yaml import OrderedDumper
  34. fernet = importer.new("cryptography.fernet")
  35. hashes = importer.new("cryptography.hazmat.primitives.hashes")
  36. pbkdf2 = importer.new("cryptography.hazmat.primitives.kdf.pbkdf2")
  37. __all__ = ["ConfigScript"]
  38. RULES_TEMPLATE = """# -*- coding: utf-8 -*-
  39. def process(bot, rc):
  40. \"\"\"Given a Bot() object and an RC() object, return a list of channels
  41. to report this event to. Also, start any wiki bot tasks within this
  42. function if necessary.\"\"\"
  43. pass
  44. """
  45. class ConfigScript:
  46. """A script to guide a user through the creation of a new config file."""
  47. WIDTH = 79
  48. PROMPT = "\x1b[32m> \x1b[0m"
  49. PBKDF_ROUNDS = 100000
  50. def __init__(self, config):
  51. self.config = config
  52. self.data = OrderedDict([
  53. ("metadata", OrderedDict()),
  54. ("components", OrderedDict()),
  55. ("wiki", OrderedDict()),
  56. ("irc", OrderedDict()),
  57. ("commands", OrderedDict()),
  58. ("tasks", OrderedDict()),
  59. ("schedule", [])
  60. ])
  61. self._cipher = None
  62. self._wmf = False
  63. self._proj = None
  64. self._lang = None
  65. def _print(self, text):
  66. print(fill(re.sub(r"\s\s+", " ", text), self.WIDTH))
  67. def _print_no_nl(self, text):
  68. sys.stdout.write(fill(re.sub(r"\s\s+", " ", text), self.WIDTH))
  69. sys.stdout.flush()
  70. def _pause(self):
  71. input(self.PROMPT + "Press enter to continue: ")
  72. def _ask(self, text, default=None, require=True):
  73. text = self.PROMPT + text
  74. if default:
  75. text += " \x1b[33m[{0}]\x1b[0m".format(default)
  76. lines = wrap(re.sub(r"\s\s+", " ", text), self.WIDTH)
  77. if len(lines) > 1:
  78. print("\n".join(lines[:-1]))
  79. while True:
  80. answer = input(lines[-1] + " ") or default
  81. if answer or not require:
  82. return answer
  83. def _ask_bool(self, text, default=True):
  84. text = self.PROMPT + text
  85. if default:
  86. text += " \x1b[33m[Y/n]\x1b[0m"
  87. else:
  88. text += " \x1b[33m[y/N]\x1b[0m"
  89. lines = wrap(re.sub(r"\s\s+", " ", text), self.WIDTH)
  90. if len(lines) > 1:
  91. print("\n".join(lines[:-1]))
  92. while True:
  93. answer = input(lines[-1] + " ").lower()
  94. if not answer:
  95. return default
  96. if answer.startswith("y"):
  97. return True
  98. if answer.startswith("n"):
  99. return False
  100. def _ask_pass(self, text, encrypt=True):
  101. password = getpass(self.PROMPT + text + " ")
  102. if encrypt:
  103. return self._encrypt(password)
  104. return password
  105. def _encrypt(self, password):
  106. if self._cipher:
  107. return base64.b64encode(self._cipher.encrypt(password.encode())).decode()
  108. else:
  109. return password
  110. def _ask_list(self, text):
  111. print(fill(re.sub(r"\s\s+", " ", self.PROMPT + text), self.WIDTH))
  112. print("[one item per line; blank line to end]:")
  113. result = []
  114. while True:
  115. line = input(self.PROMPT)
  116. if line:
  117. result.append(line)
  118. else:
  119. return result
  120. def _set_metadata(self):
  121. print()
  122. self.data["metadata"] = OrderedDict([("version", 1)])
  123. self._print("""I can encrypt passwords stored in your config file in
  124. addition to preventing other users on your system from
  125. reading the file. Encryption is recommended if the bot
  126. is to run on a public server like Toolforge, but the
  127. need to enter a key every time you start the bot may be
  128. an inconvenience.""")
  129. self.data["metadata"]["encryptPasswords"] = False
  130. if self._ask_bool("Encrypt stored passwords?"):
  131. key = getpass(self.PROMPT + "Enter an encryption key: ")
  132. self._print_no_nl("Generating key...")
  133. try:
  134. salt = os.urandom(16)
  135. kdf = pbkdf2.PBKDF2HMAC(
  136. algorithm=hashes.SHA256(),
  137. length=32,
  138. salt=salt,
  139. iterations=self.PBKDF_ROUNDS,
  140. )
  141. self._cipher = fernet.Fernet(base64.urlsafe_b64encode(kdf.derive(key.encode())))
  142. except ImportError:
  143. print(" error!")
  144. self._print("""Encryption requires the 'cryptography' package:
  145. https://cryptography.io/""")
  146. self._print("""I will disable encryption for now; restart
  147. configuration after installing these packages if
  148. you want it.""")
  149. self._pause()
  150. else:
  151. self.data["metadata"]["encryptPasswords"] = True
  152. self.data["metadata"]["salt"] = base64.b64encode(salt).decode()
  153. print(" done.")
  154. print()
  155. self._print("""The bot can temporarily store its logs in the logs/
  156. subdirectory. Error logs are kept for a month whereas
  157. normal logs are kept for a week. If you disable this,
  158. the bot will still print logs to stdout.""")
  159. logging = self._ask_bool("Enable logging?")
  160. self.data["metadata"]["enableLogging"] = logging
  161. def _set_components(self):
  162. print()
  163. self._print("""The bot contains three separate components that can run
  164. independently of each other.""")
  165. self._print("""- The IRC front-end runs on a normal IRC server, like
  166. freenode, and expects users to interact with it through
  167. commands.""")
  168. self._print("""- The IRC watcher runs on a wiki recent-changes server,
  169. like irc.wikimedia.org, and listens for edits. Users
  170. cannot interact with this component. It can detect
  171. specific events and report them to "feed" channels on
  172. the front-end or start bot tasks.""")
  173. self._print("""- The wiki task scheduler runs wiki-editing bot tasks in
  174. separate threads at user-defined times through a
  175. cron-like interface. Tasks which are not scheduled can
  176. be started by the IRC watcher manually through the IRC
  177. front-end.""")
  178. frontend = self._ask_bool("Enable the IRC front-end?")
  179. watcher = self._ask_bool("Enable the IRC watcher?")
  180. scheduler = self._ask_bool("Enable the wiki task scheduler?")
  181. self.data["components"]["irc_frontend"] = frontend
  182. self.data["components"]["irc_watcher"] = watcher
  183. self.data["components"]["wiki_scheduler"] = scheduler
  184. def _login(self, kwargs):
  185. self.config.wiki._load(self.data["wiki"])
  186. self._print_no_nl("Trying to connect to the site...")
  187. try:
  188. site = self.config.bot.wiki.add_site(**kwargs)
  189. except exceptions.APIError as exc:
  190. print(" API error!")
  191. print("\x1b[31m" + exc.message + "\x1b[0m")
  192. question = "Would you like to re-enter the site information?"
  193. if self._ask_bool(question):
  194. return self._set_wiki()
  195. question = "This will cancel the setup process. Are you sure?"
  196. if self._ask_bool(question, default=False):
  197. raise exceptions.NoConfigError()
  198. return self._set_wiki()
  199. except exceptions.LoginError as exc:
  200. print(" login error!")
  201. print("\x1b[31m" + exc.message + "\x1b[0m")
  202. question = "Would you like to re-enter your login information?"
  203. if self._ask_bool(question):
  204. self.data["wiki"]["username"] = self._ask("Bot username:")
  205. password = self._ask_pass("Bot password:", encrypt=False)
  206. self.data["wiki"]["password"] = password
  207. return self._login(kwargs)
  208. else:
  209. password = self.data["wiki"]["password"]
  210. question = "Would you like to re-enter the site information?"
  211. if self._ask_bool(question):
  212. return self._set_wiki()
  213. print()
  214. self._print("""Moving on. You can modify the login information
  215. stored in the bot's config in the future.""")
  216. self.data["wiki"]["password"] = None # Clear so we don't login
  217. self.config.wiki._load(self.data["wiki"])
  218. self._print_no_nl("Trying to connect to the site...")
  219. site = self.config.bot.wiki.add_site(**kwargs)
  220. print(" success.")
  221. self.data["wiki"]["password"] = password # Reset original value
  222. else:
  223. print(" success.")
  224. # Remember to store the encrypted password:
  225. password = self._encrypt(self.data["wiki"]["password"])
  226. self.data["wiki"]["password"] = password
  227. return site
  228. def _set_wiki(self):
  229. print()
  230. self._wmf = self._ask_bool("""Will this bot run on Wikimedia Foundation
  231. wikis, like Wikipedia?""")
  232. if self._wmf:
  233. msg = "Site project (e.g. 'wikipedia', 'wiktionary', 'wikimedia'):"
  234. self._proj = project = self._ask(msg, "wikipedia").lower()
  235. msg = "Site language code (e.g. 'en', 'fr', 'commons'):"
  236. self._lang = lang = self._ask(msg, "en").lower()
  237. kwargs = {"project": project, "lang": lang}
  238. else:
  239. msg = "Site base URL, without the script path and trailing slash;"
  240. msg += " can be protocol-insensitive (e.g. '//en.wikipedia.org'):"
  241. url = self._ask(msg)
  242. script = self._ask("Site script path:", "/w")
  243. kwargs = {"base_url": url, "script_path": script}
  244. self.data["wiki"]["username"] = self._ask("Bot username:")
  245. password = self._ask_pass("Bot password:", encrypt=False)
  246. self.data["wiki"]["password"] = password
  247. self.data["wiki"]["userAgent"] = "EarwigBot/$1 (Python/$2; https://github.com/earwig/earwigbot)"
  248. self.data["wiki"]["summary"] = "([[WP:BOT|Bot]]) $2"
  249. self.data["wiki"]["useHTTPS"] = True
  250. self.data["wiki"]["assert"] = "user"
  251. self.data["wiki"]["maxlag"] = 10
  252. self.data["wiki"]["waitTime"] = 2
  253. self.data["wiki"]["defaultSite"] = self._login(kwargs).name
  254. self.data["wiki"]["sql"] = {}
  255. if self._wmf:
  256. msg = "Will this bot run from the Wikimedia Tool Labs?"
  257. labs = self._ask_bool(msg, default=False)
  258. if labs:
  259. args = [("host", "$1.labsdb"), ("db", "$1_p"),
  260. ("read_default_file", "~/replica.my.cnf")]
  261. self.data["wiki"]["sql"] = OrderedDict(args)
  262. else:
  263. msg = "Will this bot run from the Wikimedia Toolserver?"
  264. toolserver = self._ask_bool(msg, default=False)
  265. if toolserver:
  266. args = [("host", "$1-p.rrdb.toolserver.org"),
  267. ("db", "$1_p")]
  268. self.data["wiki"]["sql"] = OrderedDict(args)
  269. self.data["wiki"]["shutoff"] = {}
  270. msg = "Would you like to enable an automatic shutoff page for the bot?"
  271. if self._ask_bool(msg):
  272. print()
  273. self._print("""The page title can contain two wildcards: $1 will be
  274. substituted with the bot's username, and $2 with the
  275. current task number. This can be used to implement a
  276. separate shutoff page for each task.""")
  277. page = self._ask("Page title:", "User:$1/Shutoff")
  278. msg = "Page content to indicate the bot is *not* shut off:"
  279. disabled = self._ask(msg, "run")
  280. args = [("page", page), ("disabled", disabled)]
  281. self.data["wiki"]["shutoff"] = OrderedDict(args)
  282. self.data["wiki"]["search"] = {}
  283. def _set_irc(self):
  284. if self.data["components"]["irc_frontend"]:
  285. print()
  286. frontend = self.data["irc"]["frontend"] = OrderedDict()
  287. msg = "Hostname of the frontend's IRC server, without 'irc://':"
  288. frontend["host"] = self._ask(msg, "irc.freenode.net")
  289. frontend["port"] = self._ask("Frontend port:", 6667)
  290. frontend["nick"] = self._ask("Frontend bot's nickname:")
  291. frontend["ident"] = self._ask("Frontend bot's ident:",
  292. frontend["nick"].lower())
  293. question = "Frontend bot's real name (gecos):"
  294. frontend["realname"] = self._ask(question, "EarwigBot")
  295. if self._ask_bool("Should the bot identify to NickServ?"):
  296. ns_user = self._ask("NickServ username:", frontend["nick"])
  297. ns_pass = self._ask_pass("Nickserv password:")
  298. frontend["nickservUsername"] = ns_user
  299. frontend["nickservPassword"] = ns_pass
  300. chan_question = "Frontend channels to join by default:"
  301. frontend["channels"] = self._ask_list(chan_question)
  302. print()
  303. self._print("""The bot keeps a database of its admins (users who
  304. can use certain sensitive commands) and owners
  305. (users who can quit the bot and modify its access
  306. list), identified by nick, ident, and/or hostname.
  307. Hostname is the most secure option since it cannot
  308. be easily spoofed. If you have a cloak, this will
  309. probably look like 'wikipedia/Username' or
  310. 'unaffiliated/nickname'.""")
  311. host = self._ask("Your hostname on the frontend:", require=False)
  312. if host:
  313. permdb = self.config._permissions
  314. permdb.load()
  315. permdb.add_owner(host=host)
  316. permdb.add_admin(host=host)
  317. else:
  318. frontend = {}
  319. if self.data["components"]["irc_watcher"]:
  320. print()
  321. watcher = self.data["irc"]["watcher"] = OrderedDict()
  322. if self._wmf:
  323. watcher["host"] = "irc.wikimedia.org"
  324. watcher["port"] = 6667
  325. else:
  326. msg = "Hostname of the watcher's IRC server, without 'irc://':"
  327. watcher["host"] = self._ask(msg)
  328. watcher["port"] = self._ask("Watcher port:", 6667)
  329. nick = self._ask("Watcher bot's nickname:", frontend.get("nick"))
  330. ident = self._ask("Watcher bot's ident:", nick.lower())
  331. watcher["nick"] = nick
  332. watcher["ident"] = ident
  333. question = "Watcher bot's real name (gecos):"
  334. default = frontend.get("realname", "EarwigBot")
  335. watcher["realname"] = self._ask(question, default)
  336. watcher_ns = "Should the bot identify to NickServ?"
  337. if not self._wmf and self._ask_bool(watcher_ns):
  338. ns_user = self._ask("NickServ username:", watcher["nick"])
  339. ns_pass = self._ask_pass("Nickserv password:")
  340. watcher["nickservUsername"] = ns_user
  341. watcher["nickservPassword"] = ns_pass
  342. if self._wmf:
  343. chan = "#{0}.{1}".format(self._lang, self._proj)
  344. watcher["channels"] = [chan]
  345. else:
  346. chan_question = "Watcher channels to join by default:"
  347. watcher["channels"] = self._ask_list(chan_question)
  348. print()
  349. self._print("""I am now creating a blank 'rules.py' file, which
  350. will determine how the bot handles messages received
  351. from the IRC watcher. It contains a process()
  352. function that takes a Bot object (allowing you to
  353. start tasks) and an RC object (storing the message
  354. from the watcher). See the documentation for
  355. details.""")
  356. with open(path.join(self.config.root_dir, "rules.py"), "w") as fp:
  357. fp.write(RULES_TEMPLATE)
  358. self._pause()
  359. self.data["irc"]["version"] = "EarwigBot - $1 - Python/$2 https://github.com/earwig/earwigbot"
  360. def _set_commands(self):
  361. print()
  362. msg = """Would you like to disable the default IRC commands? You can
  363. fine-tune which commands are disabled later on."""
  364. if (not self.data["components"]["irc_frontend"] or
  365. self._ask_bool(msg, default=False)):
  366. self.data["commands"]["disable"] = True
  367. print()
  368. self._print("""I am now creating the 'commands/' directory, where you
  369. can place custom IRC commands and plugins. Creating your
  370. own commands is described in the documentation.""")
  371. mkdir(path.join(self.config.root_dir, "commands"))
  372. self._pause()
  373. def _set_tasks(self):
  374. print()
  375. self._print("""I am now creating the 'tasks/' directory, where you can
  376. place custom bot tasks and plugins. Creating your own
  377. tasks is described in the documentation.""")
  378. mkdir(path.join(self.config.root_dir, "tasks"))
  379. self._pause()
  380. def _set_schedule(self):
  381. print()
  382. self._print("""The final section of your config file, 'schedule', is a
  383. list of bot tasks to be started by the wiki scheduler.
  384. Each entry contains cron-like time quantifiers and a
  385. list of tasks. For example, the following starts the
  386. 'foobot' task every hour on the half-hour:""")
  387. print("\x1b[33mschedule:")
  388. print(" - minute: 30")
  389. print(" tasks:")
  390. print(" - foobot\x1b[0m")
  391. self._print("""The following starts the 'barbot' task with the keyword
  392. arguments 'action="baz"' every Monday at 05:00 UTC:""")
  393. print("\x1b[33m - week_day: 1")
  394. print(" hour: 5")
  395. print(" tasks:")
  396. print(' - ["barbot", {"action": "baz"}]\x1b[0m')
  397. self._print("""The full list of quantifiers is minute, hour, month_day,
  398. month, and week_day. See the documentation for more
  399. information.""")
  400. self._pause()
  401. def _save(self):
  402. with open(self.config.path, "w") as stream:
  403. yaml.dump(self.data, stream, OrderedDumper, indent=4,
  404. allow_unicode=True, default_flow_style=False)
  405. def make_new(self):
  406. """Make a new config file based on the user's input."""
  407. try:
  408. makedirs(path.dirname(self.config.path))
  409. except OSError as exc:
  410. if exc.errno != 17:
  411. raise
  412. try:
  413. open(self.config.path, "w").close()
  414. chmod(self.config.path, stat.S_IRUSR|stat.S_IWUSR)
  415. except IOError:
  416. print("I can't seem to write to the config file:")
  417. raise
  418. self._set_metadata()
  419. self._set_components()
  420. self._set_wiki()
  421. components = self.data["components"]
  422. if components["irc_frontend"] or components["irc_watcher"]:
  423. self._set_irc()
  424. self._set_commands()
  425. self._set_tasks()
  426. if components["wiki_scheduler"]:
  427. self._set_schedule()
  428. print()
  429. self._print("""I am now saving config.yml with your settings. YAML is a
  430. relatively straightforward format and you should be able
  431. to update these settings in the future when necessary.
  432. I will start the bot at your signal. Feel free to
  433. contact me at wikipedia.earwig@gmail.com if you have any
  434. questions.""")
  435. self._save()
  436. if not self._ask_bool("Start the bot now?"):
  437. exit()