A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

447 рядки
20 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from collections import OrderedDict
  23. from getpass import getpass
  24. from hashlib import sha256
  25. from os import chmod, mkdir, path
  26. import re
  27. import stat
  28. import sys
  29. from textwrap import fill, wrap
  30. from Crypto.Cipher import Blowfish
  31. import bcrypt
  32. import yaml
  33. from earwigbot import exceptions
  34. from earwigbot.config.ordered_yaml import OrderedDumper
  35. __all__ = ["ConfigScript"]
  36. RULES_TEMPLATE = """# -*- coding: utf-8 -*-
  37. def process(bot, rc):
  38. \"\"\"Given a Bot() object and an RC() object, return a list of channels
  39. to report this event to. Also, start any wiki bot tasks within this
  40. function if necessary.\"\"\"
  41. pass
  42. """
  43. class ConfigScript(object):
  44. """A script to guide a user through the creation of a new config file."""
  45. WIDTH = 79
  46. PROMPT = "\x1b[32m> \x1b[0m"
  47. BCRYPT_ROUNDS = 12
  48. def __init__(self, config):
  49. self.config = config
  50. self.data = OrderedDict([
  51. ("metadata", OrderedDict()),
  52. ("components", OrderedDict()),
  53. ("wiki", OrderedDict()),
  54. ("irc", OrderedDict()),
  55. ("commands", OrderedDict()),
  56. ("tasks", OrderedDict()),
  57. ("schedule", [])
  58. ])
  59. self._cipher = None
  60. self._wmf = False
  61. self._proj = None
  62. self._lang = None
  63. def _print(self, text):
  64. print fill(re.sub("\s\s+", " ", text), self.WIDTH)
  65. def _print_no_nl(self, text):
  66. sys.stdout.write(fill(re.sub("\s\s+", " ", text), self.WIDTH))
  67. sys.stdout.flush()
  68. def _pause(self):
  69. raw_input(self.PROMPT + "Press enter to continue: ")
  70. def _ask(self, text, default=None, require=True):
  71. text = self.PROMPT + text
  72. if default:
  73. text += " \x1b[33m[{0}]\x1b[0m".format(default)
  74. lines = wrap(re.sub("\s\s+", " ", text), self.WIDTH)
  75. if len(lines) > 1:
  76. print "\n".join(lines[:-1])
  77. while True:
  78. answer = raw_input(lines[-1] + " ") or default
  79. if answer or not require:
  80. return answer
  81. def _ask_bool(self, text, default=True):
  82. text = self.PROMPT + text
  83. if default:
  84. text += " \x1b[33m[Y/n]\x1b[0m"
  85. else:
  86. text += " \x1b[33m[y/N]\x1b[0m"
  87. lines = wrap(re.sub("\s\s+", " ", text), self.WIDTH)
  88. if len(lines) > 1:
  89. print "\n".join(lines[:-1])
  90. while True:
  91. answer = raw_input(lines[-1] + " ").lower()
  92. if not answer:
  93. return default
  94. if answer.startswith("y"):
  95. return True
  96. if answer.startswith("n"):
  97. return False
  98. def _ask_pass(self, text, encrypt=True):
  99. password = getpass(self.PROMPT + text + " ")
  100. if encrypt:
  101. return self._encrypt(password)
  102. return password
  103. def _encrypt(self, password):
  104. if self._cipher:
  105. mod = len(password) % 8
  106. if mod:
  107. password = password.ljust(len(password) + (8 - mod), "\x00")
  108. return self._cipher.encrypt(password).encode("hex")
  109. else:
  110. return password
  111. def _ask_list(self, text):
  112. print fill(re.sub("\s\s+", " ", self.PROMPT + text), self.WIDTH)
  113. print "[one item per line; blank line to end]:"
  114. result = []
  115. while True:
  116. line = raw_input(self.PROMPT)
  117. if line:
  118. result.append(line)
  119. else:
  120. return result
  121. def _set_metadata(self):
  122. print
  123. self.data["metadata"] = OrderedDict([("version", 1)])
  124. self._print("""I can encrypt passwords stored in your config file in
  125. addition to preventing other users on your system from
  126. reading the file. Encryption is recommended if the bot
  127. is to run on a public computer like the Toolserver, but
  128. otherwise the need to enter a key everytime you start
  129. the bot may be annoying.""")
  130. if self._ask_bool("Encrypt stored passwords?"):
  131. self.data["metadata"]["encryptPasswords"] = True
  132. key = getpass(self.PROMPT + "Enter an encryption key: ")
  133. msg = "Running {0} rounds of bcrypt...".format(self.BCRYPT_ROUNDS)
  134. self._print_no_nl(msg)
  135. signature = bcrypt.hashpw(key, bcrypt.gensalt(self.BCRYPT_ROUNDS))
  136. self.data["metadata"]["signature"] = signature
  137. self._cipher = Blowfish.new(sha256(key).digest())
  138. print " done."
  139. else:
  140. self.data["metadata"]["encryptPasswords"] = False
  141. print
  142. self._print("""The bot can temporarily store its logs in the logs/
  143. subdirectory. Error logs are kept for a month whereas
  144. normal logs are kept for a week. If you disable this,
  145. the bot will still print logs to stdout.""")
  146. logging = self._ask_bool("Enable logging?")
  147. self.data["metadata"]["enableLogging"] = logging
  148. def _set_components(self):
  149. print
  150. self._print("""The bot contains three separate components that can run
  151. independently of each other.""")
  152. self._print("""- The IRC front-end runs on a normal IRC server, like
  153. freenode, and expects users to interact with it through
  154. commands.""")
  155. self._print("""- The IRC watcher runs on a wiki recent-changes server,
  156. like irc.wikimedia.org, and listens for edits. Users
  157. cannot interact with this component. It can detect
  158. specific events and report them to "feed" channels on
  159. the front-end or start bot tasks.""")
  160. self._print("""- The wiki task scheduler runs wiki-editing bot tasks in
  161. separate threads at user-defined times through a
  162. cron-like interface. Tasks which are not scheduled can
  163. be started by the IRC watcher manually through the IRC
  164. front-end.""")
  165. frontend = self._ask_bool("Enable the IRC front-end?")
  166. watcher = self._ask_bool("Enable the IRC watcher?")
  167. scheduler = self._ask_bool("Enable the wiki task scheduler?")
  168. self.data["components"]["irc_frontend"] = frontend
  169. self.data["components"]["irc_watcher"] = watcher
  170. self.data["components"]["wiki_scheduler"] = scheduler
  171. def _login(self, kwargs):
  172. self.config.wiki._load(self.data["wiki"])
  173. self._print_no_nl("Trying to connect to the site...")
  174. try:
  175. site = self.config.bot.wiki.add_site(**kwargs)
  176. except exceptions.APIError as exc:
  177. print " API error!"
  178. print "\x1b[31m" + exc.message + "\x1b[0m"
  179. question = "Would you like to re-enter the site information?"
  180. if self._ask_bool(question):
  181. return self._set_wiki()
  182. question = "This will cancel the setup process. Are you sure?"
  183. if self._ask_bool(question, default=False):
  184. raise exceptions.NoConfigError()
  185. return self._set_wiki()
  186. except exceptions.LoginError as exc:
  187. print " login error!"
  188. print "\x1b[31m" + exc.message + "\x1b[0m"
  189. question = "Would you like to re-enter your login information?"
  190. if self._ask_bool(question):
  191. self.data["wiki"]["username"] = self._ask("Bot username:")
  192. password = self._ask_pass("Bot password:", encrypt=False)
  193. self.data["wiki"]["password"] = password
  194. return self._login(kwargs)
  195. else:
  196. password = self.data["wiki"]["password"]
  197. question = "Would you like to re-enter the site information?"
  198. if self._ask_bool(question):
  199. return self._set_wiki()
  200. print
  201. self._print("""Moving on. You can modify the login information
  202. stored in the bot's config in the future.""")
  203. self.data["wiki"]["password"] = None # Clear so we don't login
  204. self.config.wiki._load(self.data["wiki"])
  205. self._print_no_nl("Trying to connect to the site...")
  206. site = self.config.bot.wiki.add_site(**kwargs)
  207. print " success."
  208. self.data["wiki"]["password"] = password # Reset original value
  209. else:
  210. print " success."
  211. # Remember to store the encrypted password:
  212. password = self._encrypt(self.data["wiki"]["password"])
  213. self.data["wiki"]["password"] = password
  214. return site
  215. def _set_wiki(self):
  216. print
  217. self._wmf = self._ask_bool("""Will this bot run on Wikimedia Foundation
  218. wikis, like Wikipedia?""")
  219. if self._wmf:
  220. msg = "Site project (e.g. 'wikipedia', 'wiktionary', 'wikimedia'):"
  221. self._proj = project = self._ask(msg, "wikipedia").lower()
  222. msg = "Site language code (e.g. 'en', 'fr', 'commons'):"
  223. self._lang = lang = self._ask(msg, "en").lower()
  224. kwargs = {"project": project, "lang": lang}
  225. else:
  226. msg = "Site base URL, without the script path and trailing slash;"
  227. msg += " can be protocol-insensitive (e.g. '//en.wikipedia.org'):"
  228. url = self._ask(msg)
  229. script = self._ask("Site script path:", "/w")
  230. kwargs = {"base_url": url, "script_path": script}
  231. self.data["wiki"]["username"] = self._ask("Bot username:")
  232. password = self._ask_pass("Bot password:", encrypt=False)
  233. self.data["wiki"]["password"] = password
  234. self.data["wiki"]["userAgent"] = "EarwigBot/$1 (Python/$2; https://github.com/earwig/earwigbot)"
  235. self.data["wiki"]["summary"] = "([[WP:BOT|Bot]]): $2"
  236. self.data["wiki"]["useHTTPS"] = True
  237. self.data["wiki"]["assert"] = "user"
  238. self.data["wiki"]["maxlag"] = 10
  239. self.data["wiki"]["waitTime"] = 2
  240. self.data["wiki"]["defaultSite"] = self._login(kwargs).name
  241. self.data["wiki"]["sql"] = {}
  242. if self._wmf:
  243. msg = "Will this bot run from the Wikimedia Toolserver?"
  244. toolserver = self._ask_bool(msg, default=False)
  245. if toolserver:
  246. args = [("host", "$1-p.rrdb.toolserver.org"), ("db", "$1_p")]
  247. self.data["wiki"]["sql"] = OrderedDict(args)
  248. self.data["wiki"]["shutoff"] = {}
  249. msg = "Would you like to enable an automatic shutoff page for the bot?"
  250. if self._ask_bool(msg):
  251. print
  252. self._print("""The page title can contain two wildcards: $1 will be
  253. substituted with the bot's username, and $2 with the
  254. current task number. This can be used to implement a
  255. separate shutoff page for each task.""")
  256. page = self._ask("Page title:", "User:$1/Shutoff")
  257. msg = "Page content to indicate the bot is *not* shut off:"
  258. disabled = self._ask(msg, "run")
  259. args = [("page", page), ("disabled", disabled)]
  260. self.data["wiki"]["shutoff"] = OrderedDict(args)
  261. self.data["wiki"]["search"] = {}
  262. def _set_irc(self):
  263. if self.data["components"]["irc_frontend"]:
  264. print
  265. frontend = self.data["irc"]["frontend"] = OrderedDict()
  266. msg = "Hostname of the frontend's IRC server, without 'irc://':"
  267. frontend["host"] = self._ask(msg, "irc.freenode.net")
  268. frontend["port"] = self._ask("Frontend port:", 6667)
  269. frontend["nick"] = self._ask("Frontend bot's nickname:")
  270. frontend["ident"] = self._ask("Frontend bot's ident:",
  271. frontend["nick"].lower())
  272. question = "Frontend bot's real name (gecos):"
  273. frontend["realname"] = self._ask(question, "EarwigBot")
  274. if self._ask_bool("Should the bot identify to NickServ?"):
  275. ns_user = self._ask("NickServ username:", frontend["nick"])
  276. ns_pass = self._ask_pass("Nickserv password:")
  277. frontend["nickservUsername"] = ns_user
  278. frontend["nickservPassword"] = ns_pass
  279. chan_question = "Frontend channels to join by default:"
  280. frontend["channels"] = self._ask_list(chan_question)
  281. print
  282. self._print("""The bot keeps a database of its admins (users who
  283. can use certain sensitive commands) and owners
  284. (users who can quit the bot and modify its access
  285. list), identified by nick, ident, and/or hostname.
  286. Hostname is the most secure option since it cannot
  287. be easily spoofed. If you have a cloak, this will
  288. probably look like 'wikipedia/Username' or
  289. 'unaffiliated/nickname'.""")
  290. host = self._ask("Your hostname on the frontend:", require=False)
  291. if host:
  292. permdb = self.config._permissions
  293. permdb.load()
  294. permdb.add_owner(host=host)
  295. permdb.add_admin(host=host)
  296. else:
  297. frontend = {}
  298. if self.data["components"]["irc_watcher"]:
  299. print
  300. watcher = self.data["irc"]["watcher"] = OrderedDict()
  301. if self._wmf:
  302. watcher["host"] = "irc.wikimedia.org"
  303. watcher["port"] = 6667
  304. else:
  305. msg = "Hostname of the watcher's IRC server, without 'irc://':"
  306. watcher["host"] = self._ask(msg)
  307. watcher["port"] = self._ask("Watcher port:", 6667)
  308. nick = self._ask("Watcher bot's nickname:", frontend.get("nick"))
  309. ident = self._ask("Watcher bot's ident:", nick.lower())
  310. watcher["nick"] = nick
  311. watcher["ident"] = ident
  312. question = "Watcher bot's real name (gecos):"
  313. default = frontend.get("realname", "EarwigBot")
  314. watcher["realname"] = self._ask(question, default)
  315. watcher_ns = "Should the bot identify to NickServ?"
  316. if not self._wmf and self._ask_bool(watcher_ns):
  317. ns_user = self._ask("NickServ username:", watcher["nick"])
  318. ns_pass = self._ask_pass("Nickserv password:")
  319. watcher["nickservUsername"] = ns_user
  320. watcher["nickservPassword"] = ns_pass
  321. if self._wmf:
  322. chan = "#{0}.{1}".format(self._lang, self._proj)
  323. watcher["channels"] = [chan]
  324. else:
  325. chan_question = "Watcher channels to join by default:"
  326. watcher["channels"] = self._ask_list(chan_question)
  327. print
  328. self._print("""I am now creating a blank 'rules.py' file, which
  329. will determine how the bot handles messages received
  330. from the IRC watcher. It contains a process()
  331. function that takes a Bot object (allowing you to
  332. start tasks) and an RC object (storing the message
  333. from the watcher). See the documentation for
  334. details.""")
  335. with open(path.join(self.config.root_dir, "rules.py"), "w") as fp:
  336. fp.write(RULES_TEMPLATE)
  337. self._pause()
  338. self.data["irc"]["version"] = "EarwigBot - $1 - Python/$2 https://github.com/earwig/earwigbot"
  339. def _set_commands(self):
  340. print
  341. msg = """Would you like to disable the default IRC commands? You can
  342. fine-tune which commands are disabled later on."""
  343. if (not self.data["components"]["irc_frontend"] or
  344. self._ask_bool(msg, default=False)):
  345. self.data["commands"]["disable"] = True
  346. print
  347. self._print("""I am now creating the 'commands/' directory, where you
  348. can place custom IRC commands and plugins. Creating your
  349. own commands is described in the documentation.""")
  350. mkdir(path.join(self.config.root_dir, "commands"))
  351. self._pause()
  352. def _set_tasks(self):
  353. print
  354. self._print("""I am now creating the 'tasks/' directory, where you can
  355. place custom bot tasks and plugins. Creating your own
  356. tasks is described in the documentation.""")
  357. mkdir(path.join(self.config.root_dir, "tasks"))
  358. self._pause()
  359. def _set_schedule(self):
  360. print
  361. self._print("""The final section of your config file, 'schedule', is a
  362. list of bot tasks to be started by the wiki scheduler.
  363. Each entry contains cron-like time quantifiers and a
  364. list of tasks. For example, the following starts the
  365. 'foobot' task every hour on the half-hour:""")
  366. print "\x1b[33mschedule:"
  367. print " - minute: 30"
  368. print " tasks:"
  369. print " - foobot\x1b[0m"
  370. self._print("""The following starts the 'barbot' task with the keyword
  371. arguments 'action="baz"' every Monday at 05:00 UTC:""")
  372. print "\x1b[33m - week_day: 1"
  373. print " hour: 5"
  374. print " tasks:"
  375. print ' - ["barbot", {"action": "baz"}]\x1b[0m'
  376. self._print("""The full list of quantifiers is minute, hour, month_day,
  377. month, and week_day. See the documentation for more
  378. information.""")
  379. self._pause()
  380. def _save(self):
  381. with open(self.config.path, "w") as stream:
  382. yaml.dump(self.data, stream, OrderedDumper, indent=4,
  383. allow_unicode=True, default_flow_style=False)
  384. def make_new(self):
  385. """Make a new config file based on the user's input."""
  386. try:
  387. open(self.config.path, "w").close()
  388. chmod(self.config.path, stat.S_IRUSR|stat.S_IWUSR)
  389. except IOError:
  390. print "I can't seem to write to the config file:"
  391. raise
  392. self._set_metadata()
  393. self._set_components()
  394. self._set_wiki()
  395. components = self.data["components"]
  396. if components["irc_frontend"] or components["irc_watcher"]:
  397. self._set_irc()
  398. self._set_commands()
  399. self._set_tasks()
  400. if components["wiki_scheduler"]:
  401. self._set_schedule()
  402. print
  403. self._print("""I am now saving config.yml with your settings. YAML is a
  404. relatively straightforward format and you should be able
  405. to update these settings in the future when necessary.
  406. I will start the bot at your signal. Feel free to
  407. contact me at wikipedia.earwig@gmail.com if you have any
  408. questions.""")
  409. self._save()
  410. if not self._ask_bool("Start the bot now?"):
  411. exit()