A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

469 lines
21 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from collections import OrderedDict
  23. from getpass import getpass
  24. from hashlib import sha256
  25. from os import chmod, mkdir, path
  26. import re
  27. import stat
  28. import sys
  29. from textwrap import fill, wrap
  30. import yaml
  31. from earwigbot import exceptions, importer
  32. from earwigbot.config.ordered_yaml import OrderedDumper
  33. Blowfish = importer.new("Crypto.Cipher.Blowfish")
  34. bcrypt = importer.new("bcrypt")
  35. __all__ = ["ConfigScript"]
  36. RULES_TEMPLATE = """# -*- coding: utf-8 -*-
  37. def process(bot, rc):
  38. \"\"\"Given a Bot() object and an RC() object, return a list of channels
  39. to report this event to. Also, start any wiki bot tasks within this
  40. function if necessary.\"\"\"
  41. pass
  42. """
  43. class ConfigScript(object):
  44. """A script to guide a user through the creation of a new config file."""
  45. WIDTH = 79
  46. PROMPT = "\x1b[32m> \x1b[0m"
  47. BCRYPT_ROUNDS = 12
  48. def __init__(self, config):
  49. self.config = config
  50. self.data = OrderedDict([
  51. ("metadata", OrderedDict()),
  52. ("components", OrderedDict()),
  53. ("wiki", OrderedDict()),
  54. ("irc", OrderedDict()),
  55. ("commands", OrderedDict()),
  56. ("tasks", OrderedDict()),
  57. ("schedule", [])
  58. ])
  59. self._cipher = None
  60. self._wmf = False
  61. self._proj = None
  62. self._lang = None
  63. def _print(self, text):
  64. print fill(re.sub("\s\s+", " ", text), self.WIDTH)
  65. def _print_no_nl(self, text):
  66. sys.stdout.write(fill(re.sub("\s\s+", " ", text), self.WIDTH))
  67. sys.stdout.flush()
  68. def _pause(self):
  69. raw_input(self.PROMPT + "Press enter to continue: ")
  70. def _ask(self, text, default=None, require=True):
  71. text = self.PROMPT + text
  72. if default:
  73. text += " \x1b[33m[{0}]\x1b[0m".format(default)
  74. lines = wrap(re.sub("\s\s+", " ", text), self.WIDTH)
  75. if len(lines) > 1:
  76. print "\n".join(lines[:-1])
  77. while True:
  78. answer = raw_input(lines[-1] + " ") or default
  79. if answer or not require:
  80. return answer
  81. def _ask_bool(self, text, default=True):
  82. text = self.PROMPT + text
  83. if default:
  84. text += " \x1b[33m[Y/n]\x1b[0m"
  85. else:
  86. text += " \x1b[33m[y/N]\x1b[0m"
  87. lines = wrap(re.sub("\s\s+", " ", text), self.WIDTH)
  88. if len(lines) > 1:
  89. print "\n".join(lines[:-1])
  90. while True:
  91. answer = raw_input(lines[-1] + " ").lower()
  92. if not answer:
  93. return default
  94. if answer.startswith("y"):
  95. return True
  96. if answer.startswith("n"):
  97. return False
  98. def _ask_pass(self, text, encrypt=True):
  99. password = getpass(self.PROMPT + text + " ")
  100. if encrypt:
  101. return self._encrypt(password)
  102. return password
  103. def _encrypt(self, password):
  104. if self._cipher:
  105. mod = len(password) % 8
  106. if mod:
  107. password = password.ljust(len(password) + (8 - mod), "\x00")
  108. return self._cipher.encrypt(password).encode("hex")
  109. else:
  110. return password
  111. def _ask_list(self, text):
  112. print fill(re.sub("\s\s+", " ", self.PROMPT + text), self.WIDTH)
  113. print "[one item per line; blank line to end]:"
  114. result = []
  115. while True:
  116. line = raw_input(self.PROMPT)
  117. if line:
  118. result.append(line)
  119. else:
  120. return result
  121. def _set_metadata(self):
  122. print
  123. self.data["metadata"] = OrderedDict([("version", 1)])
  124. self._print("""I can encrypt passwords stored in your config file in
  125. addition to preventing other users on your system from
  126. reading the file. Encryption is recommended if the bot
  127. is to run on a public computer like the Toolserver, but
  128. otherwise the need to enter a key everytime you start
  129. the bot may be annoying.""")
  130. self.data["metadata"]["encryptPasswords"] = False
  131. if self._ask_bool("Encrypt stored passwords?"):
  132. key = getpass(self.PROMPT + "Enter an encryption key: ")
  133. msg = "Running {0} rounds of bcrypt...".format(self.BCRYPT_ROUNDS)
  134. self._print_no_nl(msg)
  135. try:
  136. salt = bcrypt.gensalt(self.BCRYPT_ROUNDS)
  137. signature = bcrypt.hashpw(key, salt)
  138. self._cipher = Blowfish.new(sha256(key).digest())
  139. except ImportError:
  140. print " error!"
  141. self._print("""Encryption requires the 'py-bcrypt' and
  142. 'pycrypto' packages:""")
  143. strt, end = " * \x1b[36m", "\x1b[0m"
  144. print strt + "http://www.mindrot.org/projects/py-bcrypt/" + end
  145. print strt + "https://www.dlitz.net/software/pycrypto/" + end
  146. self._print("""I will disable encryption for now; restart
  147. configuration after installing these packages if
  148. you want it.""")
  149. self._pause()
  150. else:
  151. self.data["metadata"]["encryptPasswords"] = True
  152. self.data["metadata"]["signature"] = signature
  153. print " done."
  154. print
  155. self._print("""The bot can temporarily store its logs in the logs/
  156. subdirectory. Error logs are kept for a month whereas
  157. normal logs are kept for a week. If you disable this,
  158. the bot will still print logs to stdout.""")
  159. logging = self._ask_bool("Enable logging?")
  160. self.data["metadata"]["enableLogging"] = logging
  161. def _set_components(self):
  162. print
  163. self._print("""The bot contains three separate components that can run
  164. independently of each other.""")
  165. self._print("""- The IRC front-end runs on a normal IRC server, like
  166. freenode, and expects users to interact with it through
  167. commands.""")
  168. self._print("""- The IRC watcher runs on a wiki recent-changes server,
  169. like irc.wikimedia.org, and listens for edits. Users
  170. cannot interact with this component. It can detect
  171. specific events and report them to "feed" channels on
  172. the front-end or start bot tasks.""")
  173. self._print("""- The wiki task scheduler runs wiki-editing bot tasks in
  174. separate threads at user-defined times through a
  175. cron-like interface. Tasks which are not scheduled can
  176. be started by the IRC watcher manually through the IRC
  177. front-end.""")
  178. frontend = self._ask_bool("Enable the IRC front-end?")
  179. watcher = self._ask_bool("Enable the IRC watcher?")
  180. scheduler = self._ask_bool("Enable the wiki task scheduler?")
  181. self.data["components"]["irc_frontend"] = frontend
  182. self.data["components"]["irc_watcher"] = watcher
  183. self.data["components"]["wiki_scheduler"] = scheduler
  184. def _login(self, kwargs):
  185. self.config.wiki._load(self.data["wiki"])
  186. self._print_no_nl("Trying to connect to the site...")
  187. try:
  188. site = self.config.bot.wiki.add_site(**kwargs)
  189. except exceptions.APIError as exc:
  190. print " API error!"
  191. print "\x1b[31m" + exc.message + "\x1b[0m"
  192. question = "Would you like to re-enter the site information?"
  193. if self._ask_bool(question):
  194. return self._set_wiki()
  195. question = "This will cancel the setup process. Are you sure?"
  196. if self._ask_bool(question, default=False):
  197. raise exceptions.NoConfigError()
  198. return self._set_wiki()
  199. except exceptions.LoginError as exc:
  200. print " login error!"
  201. print "\x1b[31m" + exc.message + "\x1b[0m"
  202. question = "Would you like to re-enter your login information?"
  203. if self._ask_bool(question):
  204. self.data["wiki"]["username"] = self._ask("Bot username:")
  205. password = self._ask_pass("Bot password:", encrypt=False)
  206. self.data["wiki"]["password"] = password
  207. return self._login(kwargs)
  208. else:
  209. password = self.data["wiki"]["password"]
  210. question = "Would you like to re-enter the site information?"
  211. if self._ask_bool(question):
  212. return self._set_wiki()
  213. print
  214. self._print("""Moving on. You can modify the login information
  215. stored in the bot's config in the future.""")
  216. self.data["wiki"]["password"] = None # Clear so we don't login
  217. self.config.wiki._load(self.data["wiki"])
  218. self._print_no_nl("Trying to connect to the site...")
  219. site = self.config.bot.wiki.add_site(**kwargs)
  220. print " success."
  221. self.data["wiki"]["password"] = password # Reset original value
  222. else:
  223. print " success."
  224. # Remember to store the encrypted password:
  225. password = self._encrypt(self.data["wiki"]["password"])
  226. self.data["wiki"]["password"] = password
  227. return site
  228. def _set_wiki(self):
  229. print
  230. self._wmf = self._ask_bool("""Will this bot run on Wikimedia Foundation
  231. wikis, like Wikipedia?""")
  232. if self._wmf:
  233. msg = "Site project (e.g. 'wikipedia', 'wiktionary', 'wikimedia'):"
  234. self._proj = project = self._ask(msg, "wikipedia").lower()
  235. msg = "Site language code (e.g. 'en', 'fr', 'commons'):"
  236. self._lang = lang = self._ask(msg, "en").lower()
  237. kwargs = {"project": project, "lang": lang}
  238. else:
  239. msg = "Site base URL, without the script path and trailing slash;"
  240. msg += " can be protocol-insensitive (e.g. '//en.wikipedia.org'):"
  241. url = self._ask(msg)
  242. script = self._ask("Site script path:", "/w")
  243. kwargs = {"base_url": url, "script_path": script}
  244. self.data["wiki"]["username"] = self._ask("Bot username:")
  245. password = self._ask_pass("Bot password:", encrypt=False)
  246. self.data["wiki"]["password"] = password
  247. self.data["wiki"]["userAgent"] = "EarwigBot/$1 (Python/$2; https://github.com/earwig/earwigbot)"
  248. self.data["wiki"]["summary"] = "([[WP:BOT|Bot]]): $2"
  249. self.data["wiki"]["useHTTPS"] = True
  250. self.data["wiki"]["assert"] = "user"
  251. self.data["wiki"]["maxlag"] = 10
  252. self.data["wiki"]["waitTime"] = 2
  253. self.data["wiki"]["defaultSite"] = self._login(kwargs).name
  254. self.data["wiki"]["sql"] = {}
  255. if self._wmf:
  256. msg = "Will this bot run from the Wikimedia Tool Labs?"
  257. labs = self._ask_bool(msg, default=False)
  258. if labs:
  259. args = [("host", "$1.labsdb"), ("db", "$1_p"),
  260. ("read_default_file", "~/replica.my.cnf")]
  261. self.data["wiki"]["sql"] = OrderedDict(args)
  262. else:
  263. msg = "Will this bot run from the Wikimedia Toolserver?"
  264. toolserver = self._ask_bool(msg, default=False)
  265. if toolserver:
  266. args = [("host", "$1-p.rrdb.toolserver.org"),
  267. ("db", "$1_p")]
  268. self.data["wiki"]["sql"] = OrderedDict(args)
  269. self.data["wiki"]["shutoff"] = {}
  270. msg = "Would you like to enable an automatic shutoff page for the bot?"
  271. if self._ask_bool(msg):
  272. print
  273. self._print("""The page title can contain two wildcards: $1 will be
  274. substituted with the bot's username, and $2 with the
  275. current task number. This can be used to implement a
  276. separate shutoff page for each task.""")
  277. page = self._ask("Page title:", "User:$1/Shutoff")
  278. msg = "Page content to indicate the bot is *not* shut off:"
  279. disabled = self._ask(msg, "run")
  280. args = [("page", page), ("disabled", disabled)]
  281. self.data["wiki"]["shutoff"] = OrderedDict(args)
  282. self.data["wiki"]["search"] = {}
  283. def _set_irc(self):
  284. if self.data["components"]["irc_frontend"]:
  285. print
  286. frontend = self.data["irc"]["frontend"] = OrderedDict()
  287. msg = "Hostname of the frontend's IRC server, without 'irc://':"
  288. frontend["host"] = self._ask(msg, "irc.freenode.net")
  289. frontend["port"] = self._ask("Frontend port:", 6667)
  290. frontend["nick"] = self._ask("Frontend bot's nickname:")
  291. frontend["ident"] = self._ask("Frontend bot's ident:",
  292. frontend["nick"].lower())
  293. question = "Frontend bot's real name (gecos):"
  294. frontend["realname"] = self._ask(question, "EarwigBot")
  295. if self._ask_bool("Should the bot identify to NickServ?"):
  296. ns_user = self._ask("NickServ username:", frontend["nick"])
  297. ns_pass = self._ask_pass("Nickserv password:")
  298. frontend["nickservUsername"] = ns_user
  299. frontend["nickservPassword"] = ns_pass
  300. chan_question = "Frontend channels to join by default:"
  301. frontend["channels"] = self._ask_list(chan_question)
  302. print
  303. self._print("""The bot keeps a database of its admins (users who
  304. can use certain sensitive commands) and owners
  305. (users who can quit the bot and modify its access
  306. list), identified by nick, ident, and/or hostname.
  307. Hostname is the most secure option since it cannot
  308. be easily spoofed. If you have a cloak, this will
  309. probably look like 'wikipedia/Username' or
  310. 'unaffiliated/nickname'.""")
  311. host = self._ask("Your hostname on the frontend:", require=False)
  312. if host:
  313. permdb = self.config._permissions
  314. permdb.load()
  315. permdb.add_owner(host=host)
  316. permdb.add_admin(host=host)
  317. else:
  318. frontend = {}
  319. if self.data["components"]["irc_watcher"]:
  320. print
  321. watcher = self.data["irc"]["watcher"] = OrderedDict()
  322. if self._wmf:
  323. watcher["host"] = "irc.wikimedia.org"
  324. watcher["port"] = 6667
  325. else:
  326. msg = "Hostname of the watcher's IRC server, without 'irc://':"
  327. watcher["host"] = self._ask(msg)
  328. watcher["port"] = self._ask("Watcher port:", 6667)
  329. nick = self._ask("Watcher bot's nickname:", frontend.get("nick"))
  330. ident = self._ask("Watcher bot's ident:", nick.lower())
  331. watcher["nick"] = nick
  332. watcher["ident"] = ident
  333. question = "Watcher bot's real name (gecos):"
  334. default = frontend.get("realname", "EarwigBot")
  335. watcher["realname"] = self._ask(question, default)
  336. watcher_ns = "Should the bot identify to NickServ?"
  337. if not self._wmf and self._ask_bool(watcher_ns):
  338. ns_user = self._ask("NickServ username:", watcher["nick"])
  339. ns_pass = self._ask_pass("Nickserv password:")
  340. watcher["nickservUsername"] = ns_user
  341. watcher["nickservPassword"] = ns_pass
  342. if self._wmf:
  343. chan = "#{0}.{1}".format(self._lang, self._proj)
  344. watcher["channels"] = [chan]
  345. else:
  346. chan_question = "Watcher channels to join by default:"
  347. watcher["channels"] = self._ask_list(chan_question)
  348. print
  349. self._print("""I am now creating a blank 'rules.py' file, which
  350. will determine how the bot handles messages received
  351. from the IRC watcher. It contains a process()
  352. function that takes a Bot object (allowing you to
  353. start tasks) and an RC object (storing the message
  354. from the watcher). See the documentation for
  355. details.""")
  356. with open(path.join(self.config.root_dir, "rules.py"), "w") as fp:
  357. fp.write(RULES_TEMPLATE)
  358. self._pause()
  359. self.data["irc"]["version"] = "EarwigBot - $1 - Python/$2 https://github.com/earwig/earwigbot"
  360. def _set_commands(self):
  361. print
  362. msg = """Would you like to disable the default IRC commands? You can
  363. fine-tune which commands are disabled later on."""
  364. if (not self.data["components"]["irc_frontend"] or
  365. self._ask_bool(msg, default=False)):
  366. self.data["commands"]["disable"] = True
  367. print
  368. self._print("""I am now creating the 'commands/' directory, where you
  369. can place custom IRC commands and plugins. Creating your
  370. own commands is described in the documentation.""")
  371. mkdir(path.join(self.config.root_dir, "commands"))
  372. self._pause()
  373. def _set_tasks(self):
  374. print
  375. self._print("""I am now creating the 'tasks/' directory, where you can
  376. place custom bot tasks and plugins. Creating your own
  377. tasks is described in the documentation.""")
  378. mkdir(path.join(self.config.root_dir, "tasks"))
  379. self._pause()
  380. def _set_schedule(self):
  381. print
  382. self._print("""The final section of your config file, 'schedule', is a
  383. list of bot tasks to be started by the wiki scheduler.
  384. Each entry contains cron-like time quantifiers and a
  385. list of tasks. For example, the following starts the
  386. 'foobot' task every hour on the half-hour:""")
  387. print "\x1b[33mschedule:"
  388. print " - minute: 30"
  389. print " tasks:"
  390. print " - foobot\x1b[0m"
  391. self._print("""The following starts the 'barbot' task with the keyword
  392. arguments 'action="baz"' every Monday at 05:00 UTC:""")
  393. print "\x1b[33m - week_day: 1"
  394. print " hour: 5"
  395. print " tasks:"
  396. print ' - ["barbot", {"action": "baz"}]\x1b[0m'
  397. self._print("""The full list of quantifiers is minute, hour, month_day,
  398. month, and week_day. See the documentation for more
  399. information.""")
  400. self._pause()
  401. def _save(self):
  402. with open(self.config.path, "w") as stream:
  403. yaml.dump(self.data, stream, OrderedDumper, indent=4,
  404. allow_unicode=True, default_flow_style=False)
  405. def make_new(self):
  406. """Make a new config file based on the user's input."""
  407. try:
  408. open(self.config.path, "w").close()
  409. chmod(self.config.path, stat.S_IRUSR|stat.S_IWUSR)
  410. except IOError:
  411. print "I can't seem to write to the config file:"
  412. raise
  413. self._set_metadata()
  414. self._set_components()
  415. self._set_wiki()
  416. components = self.data["components"]
  417. if components["irc_frontend"] or components["irc_watcher"]:
  418. self._set_irc()
  419. self._set_commands()
  420. self._set_tasks()
  421. if components["wiki_scheduler"]:
  422. self._set_schedule()
  423. print
  424. self._print("""I am now saving config.yml with your settings. YAML is a
  425. relatively straightforward format and you should be able
  426. to update these settings in the future when necessary.
  427. I will start the bot at your signal. Feel free to
  428. contact me at wikipedia.earwig@gmail.com if you have any
  429. questions.""")
  430. self._save()
  431. if not self._ask_bool("Start the bot now?"):
  432. exit()