A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

312 行
11 KiB

  1. # -*- coding: utf-8 -*-
  2. """
  3. EarwigBot's XML Config File Parser
  4. This handles all tasks involving reading and writing to our config file,
  5. including encrypting and decrypting passwords and making a new config file from
  6. scratch at the inital bot run.
  7. Usually you'll just want to do "from core import config" and access config data
  8. from within config's four global variables:
  9. * config.components
  10. * config.wiki
  11. * config.irc
  12. * config.schedule
  13. """
  14. from collections import defaultdict
  15. from os import makedirs, path
  16. from xml.dom import minidom
  17. from xml.parsers.expat import ExpatError
  18. from lib import blowfish
  19. script_dir = path.dirname(path.abspath(__file__))
  20. root_dir = path.split(script_dir)[0]
  21. config_path = path.join(root_dir, "config.xml")
  22. _config = None # holds the parsed DOM object for our config file
  23. # initialize our five global variables to store config data
  24. components, wiki, irc, schedule, watcher = (None, None, None, None, None)
  25. class ConfigParseError(Exception):
  26. """Base exception for when we could not parse the config file."""
  27. class TypeMismatchError(ConfigParseError):
  28. """A field does not fit to its expected type; e.g., an arbitrary string
  29. where we expected a boolean or integer."""
  30. class MissingElementError(ConfigParseError):
  31. """An element in the config file is missing a required sub-element."""
  32. class MissingAttributeError(ConfigParseError):
  33. """An element is missing a required attribute to be parsed correctly."""
  34. class Container(object):
  35. """A class to hold information in a nice, accessable manner."""
  36. def _load_config():
  37. """Load data from our XML config file (config.xml) into a DOM object."""
  38. global _config
  39. _config = minidom.parse(config_path)
  40. def verify_config():
  41. """Check to see if we have a valid config file, and if not, notify the
  42. user. If there is no config file at all, offer to make one; otherwise,
  43. exit."""
  44. if path.exists(config_path):
  45. try:
  46. _load_config()
  47. except ExpatError as error:
  48. print "Could not parse config file {0}:\n{1}".format(config_path,
  49. error)
  50. exit()
  51. else:
  52. if not _config.getElementsByTagName("config"):
  53. e = "Config file is missing a <config> tag."
  54. raise MissingElementError(e)
  55. return are_passwords_encrypted()
  56. else:
  57. print "You haven't configured the bot yet!"
  58. choice = raw_input("Would you like to do this now? [y/n] ")
  59. if choice.lower().startswith("y"):
  60. return make_new_config()
  61. else:
  62. exit()
  63. def make_new_config():
  64. """Make a new XML config file based on the user's input."""
  65. makedirs(config_dir)
  66. encrypt = raw_input("Would you like to encrypt passwords stored in " +
  67. "config.xml? [y/n] ")
  68. if encrypt.lower().startswith("y"):
  69. is_encrypted = True
  70. else:
  71. is_encrypted = False
  72. return is_encrypted
  73. def are_passwords_encrypted():
  74. """Determine if the passwords in our config file are encrypted; return
  75. either True or False, or raise an exception if there was a problem reading
  76. the config file."""
  77. element = _config.getElementsByTagName("config")[0]
  78. attribute = element.getAttribute("encrypt-passwords")
  79. if not attribute:
  80. return False
  81. return attribute_to_bool(attribute, element, "encrypt-passwords")
  82. def get_first_element(parent, tag_name):
  83. """Return the first child of the parent element with the given tag name, or
  84. return None if no child of that name exists."""
  85. try:
  86. return parent.getElementsByTagName(tag_name)[0]
  87. except IndexError:
  88. return None
  89. def get_required_element(parent, tag_name):
  90. """Return the first child of the parent element with the given tag name, or
  91. raise MissingElementError() if no child of that name exists."""
  92. element = get_first_element(parent, tag_name)
  93. if not element:
  94. e = "A <{0}> tag is missing a required <{1}> child tag.".format(
  95. parent.tagName, tag_name)
  96. raise MissingElementError(e)
  97. return element
  98. def get_required_attribute(element, attr_name):
  99. """Return the value of the attribute 'attr_name' in 'element'. If
  100. undefined, raise MissingAttributeError()."""
  101. attribute = element.getAttribute(attr_name)
  102. if not attribute:
  103. e = "A <{0}> tag is missing the required attribute '{1}'.".format(
  104. element.tagName, attr_name)
  105. raise MissingAttributeError(e)
  106. return attribute
  107. def attribute_to_bool(value, element, attr_name):
  108. """Return True if 'value' is 'true', '1', or 'on', return False if it is
  109. 'false', '0', or 'off' (regardless of capitalization), or raise
  110. TypeMismatchError() if it does match any of those. 'element' and
  111. 'attr_name' are only used to generate the error message."""
  112. lcase = value.lower()
  113. if lcase in ["true", "1", "on"]:
  114. return True
  115. elif lcase in ["false", "0", "off"]:
  116. return False
  117. else:
  118. e = ("Expected a bool in attribute '{0}' of tag '{1}', but got '{2}'."
  119. ).format(attr_name, element.tagName, value)
  120. raise TypeMismatchError(e)
  121. def attribute_to_int(value, element, attr_name):
  122. """Return 'value' after it is converted to an integer. If it could not be
  123. converted, raise TypeMismatchError() using 'element' and 'attr_name' only
  124. to give the user information about what happened."""
  125. try:
  126. return int(value)
  127. except ValueError:
  128. e = ("Expected an integer in attribute '{0}' of tag '{1}', but got " +
  129. "'{2}'.").format(attr_name, element.tagName, value)
  130. raise TypeMismatchError(e)
  131. def parse_config(key):
  132. """A thin wrapper for the actual config parser in _parse_config(): catch
  133. parsing exceptions and report them to the user cleanly."""
  134. try:
  135. _parse_config(key)
  136. except ConfigParseError as error:
  137. print "\nError parsing config file:"
  138. print error
  139. exit(1)
  140. except blowfish.BlowfishError as error:
  141. print "\nError decrypting passwords:"
  142. print "{0}: {1}.".format(error.__class__.__name__, error)
  143. exit(1)
  144. def _parse_config(key):
  145. """Parse config data from a DOM object into the four global variables that
  146. store our config info. The key is used to unencrypt passwords stored in the
  147. XML config file."""
  148. global components, wiki, irc, schedule
  149. _load_config() # we might be re-loading unnecessarily here, but no harm in
  150. # that!
  151. data = _config.getElementsByTagName("config")[0]
  152. components = parse_components(data)
  153. wiki = parse_wiki(data, key)
  154. irc = parse_irc(data, key)
  155. schedule = parse_schedule(data)
  156. def parse_components(data):
  157. """Parse everything within the <components> XML tag of our config file.
  158. The components object here will exist as config.components, and is a dict
  159. of our enabled components: components[name] = True if it is enabled, False
  160. if it is disabled."""
  161. components = defaultdict(lambda: False) # all components are disabled by
  162. # default
  163. element = get_required_element(data, "components")
  164. for component in element.getElementsByTagName("component"):
  165. name = get_required_attribute(component, "name")
  166. components[name] = True
  167. return components
  168. def parse_wiki(data, key):
  169. """Parse everything within the <wiki> tag of our XML config file."""
  170. pass
  171. def parse_irc_server(data, key):
  172. """Parse everything within a <server> tag."""
  173. server = Container()
  174. connection = get_required_element(data, "connection")
  175. server.host = get_required_attribute(connection, "host")
  176. server.port = get_required_attribute(connection, "port")
  177. server.nick = get_required_attribute(connection, "nick")
  178. server.ident = get_required_attribute(connection, "ident")
  179. server.realname = get_required_attribute(connection, "realname")
  180. # convert the port from a string to an int
  181. server.port = attribute_to_int(server.port, connection, "port")
  182. nickserv = get_first_element(data, "nickserv")
  183. if nickserv:
  184. server.nickserv = Container()
  185. server.nickserv.username = get_required_attribute(nickserv, "username")
  186. password = get_required_attribute(nickserv, "password")
  187. if are_passwords_encrypted():
  188. server.nickserv.password = blowfish.decrypt(key, password)
  189. else:
  190. server.nickserv.password = password
  191. else:
  192. server.nickserv = None
  193. server.channels = list()
  194. channels = get_first_element(data, "channels")
  195. if channels:
  196. for channel in channels.getElementsByTagName("channel"):
  197. name = get_required_attribute(channel, "name")
  198. server.channels.append(name)
  199. return server
  200. def parse_irc(data, key):
  201. """Parse everything within the <irc> tag of our XML config file."""
  202. irc = Container()
  203. element = get_first_element(data, "irc")
  204. if not element:
  205. return irc
  206. servers = get_first_element(element, "servers")
  207. if servers:
  208. for server in servers.getElementsByTagName("server"):
  209. server_name = get_required_attribute(server, "name")
  210. if server_name == "frontend":
  211. irc.frontend = parse_irc_server(server, key)
  212. elif server_name == "watcher":
  213. irc.watcher = parse_irc_server(server, key)
  214. else:
  215. print ("Warning: config file specifies a <server> with " +
  216. "unknown name '{0}'. Ignoring.").format(server_name)
  217. permissions = get_first_element(element, "permissions")
  218. if permissions:
  219. irc.permissions = dict()
  220. for group in permissions.getElementsByTagName("group"):
  221. group_name = get_required_attribute(group, "name")
  222. irc.permissions[group_name] = list()
  223. for user in group.getElementsByTagName("user"):
  224. hostname = get_required_attribute(user, "host")
  225. irc.permissions[group_name].append(hostname)
  226. return irc
  227. def parse_schedule(data):
  228. """Store the <schedule> element in schedule.data and the _schedule()
  229. function as schedule.check()."""
  230. schedule = Container()
  231. schedule.check = _schedule
  232. schedule.data = get_first_element(data, "schedule")
  233. return schedule
  234. def _schedule(minute, hour, month_day, month, week_day):
  235. """Return a list of tasks that are scheduled to run at the time specified
  236. by the function args. The schedule data comes from our config file's
  237. <schedule> tag, which is stored as schedule.data. Call this function with
  238. config.schedule.check(args)."""
  239. tasks = [] # tasks to run this turn, each as a tuple of (task_name,
  240. # kwargs), or just task_name
  241. now = {"minute": minute, "hour": hour, "month_day": month_day,
  242. "month": month, "week_day": week_day}
  243. for when in schedule.data.getElementsByTagName("when"):
  244. do = True
  245. for key, value in now.items():
  246. if when.hasAttribute(key):
  247. req = when.getAttribute(key)
  248. if attribute_to_int(req, when, key) != value:
  249. do = False
  250. break
  251. if do:
  252. for task in when.getElementsByTagName("task"):
  253. name = get_required_attribute(task, "name")
  254. args = dict()
  255. for key in task.attributes.keys():
  256. args[key] = task.getAttribute(key)
  257. del args["name"]
  258. if args:
  259. tasks.append((name, args))
  260. else:
  261. tasks.append(name)
  262. return tasks