A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. # -*- coding: utf-8 -*-
  2. """
  3. EarwigBot's XML Config File Parser
  4. This handles all tasks involving reading and writing to our config file,
  5. including encrypting and decrypting passwords and making a new config file from
  6. scratch at the inital bot run.
  7. Usually you'll just want to do "from core import config" and access config data
  8. from within config's five global variables:
  9. * config.components
  10. * config.wiki
  11. * config.irc
  12. * config.schedule
  13. * config.watcher
  14. """
  15. from collections import defaultdict
  16. from os import makedirs, path
  17. from xml.dom import minidom
  18. from xml.parsers.expat import ExpatError
  19. from lib import blowfish
  20. script_dir = path.dirname(path.abspath(__file__))
  21. root_dir = path.split(script_dir)[0]
  22. config_path = path.join(root_dir, "config.xml")
  23. _config = None # holds the parsed DOM object for our config file
  24. # initialize our five global variables to store config data
  25. components, wiki, irc, schedule, watcher = (None, None, None, None, None)
  26. class ConfigParseError(Exception):
  27. """Base exception for when we could not parse the config file."""
  28. class TypeMismatchError(ConfigParseError):
  29. """A field does not fit to its expected type; e.g., an arbitrary string
  30. where we expected a boolean or integer."""
  31. class MissingElementError(ConfigParseError):
  32. """An element in the config file is missing a required sub-element."""
  33. class MissingAttributeError(ConfigParseError):
  34. """An element is missing a required attribute to be parsed correctly."""
  35. class Container(object):
  36. """A class to hold information in a nice, accessable manner."""
  37. def _load_config():
  38. """Load data from our XML config file (config.xml) into a DOM object."""
  39. global _config
  40. _config = minidom.parse(config_path)
  41. def verify_config():
  42. """Check to see if we have a valid config file, and if not, notify the
  43. user. If there is no config file at all, offer to make one; otherwise,
  44. exit."""
  45. if path.exists(config_path):
  46. try:
  47. _load_config()
  48. except ExpatError as error:
  49. print "Could not parse config file {0}:\n{1}".format(config_path,
  50. error)
  51. exit()
  52. else:
  53. if not _config.getElementsByTagName("config"):
  54. e = "Config file is missing a <config> tag."
  55. raise MissingElementError(e)
  56. return are_passwords_encrypted()
  57. else:
  58. print "You haven't configured the bot yet!"
  59. choice = raw_input("Would you like to do this now? [y/n] ")
  60. if choice.lower().startswith("y"):
  61. return make_new_config()
  62. else:
  63. exit()
  64. def make_new_config():
  65. """Make a new XML config file based on the user's input."""
  66. makedirs(config_dir)
  67. encrypt = raw_input("Would you like to encrypt passwords stored in " +
  68. "config.xml? [y/n] ")
  69. if encrypt.lower().startswith("y"):
  70. is_encrypted = True
  71. else:
  72. is_encrypted = False
  73. return is_encrypted
  74. def are_passwords_encrypted():
  75. """Determine if the passwords in our config file are encrypted; return
  76. either True or False, or raise an exception if there was a problem reading
  77. the config file."""
  78. element = _config.getElementsByTagName("config")[0]
  79. attribute = element.getAttribute("encrypt-passwords")
  80. if not attribute:
  81. return False
  82. return attribute_to_bool(attribute, element, "encrypt-passwords")
  83. def get_first_element(parent, tag_name):
  84. """Return the first child of the parent element with the given tag name, or
  85. return None if no child of that name exists."""
  86. try:
  87. return parent.getElementsByTagName(tag_name)[0]
  88. except IndexError:
  89. return None
  90. def get_required_element(parent, tag_name):
  91. """Return the first child of the parent element with the given tag name, or
  92. raise MissingElementError() if no child of that name exists."""
  93. element = get_first_element(parent, tag_name)
  94. if not element:
  95. e = "A <{0}> tag is missing a required <{1}> child tag.".format(
  96. parent.tagName, tag_name)
  97. raise MissingElementError(e)
  98. return element
  99. def get_required_attribute(element, attr_name):
  100. """Return the value of the attribute 'attr_name' in 'element'. If
  101. undefined, raise MissingAttributeError()."""
  102. attribute = element.getAttribute(attr_name)
  103. if not attribute:
  104. e = "A <{0}> tag is missing the required attribute '{1}'.".format(
  105. element.tagName, attr_name)
  106. raise MissingAttributeError(e)
  107. return attribute
  108. def attribute_to_bool(value, element, attr_name):
  109. """Return True if 'value' is 'true', '1', or 'on', return False if it is
  110. 'false', '0', or 'off' (regardless of capitalization), or raise
  111. TypeMismatchError() if it does match any of those. 'element' and
  112. 'attr_name' are only used to generate the error message."""
  113. lcase = value.lower()
  114. if lcase in ["true", "1", "on"]:
  115. return True
  116. elif lcase in ["false", "0", "off"]:
  117. return False
  118. else:
  119. e = ("Expected a bool in attribute '{0}' of tag '{1}', but got '{2}'."
  120. ).format(attr_name, element.tagName, value)
  121. raise TypeMismatchError(e)
  122. def attribute_to_int(value, element, attr_name):
  123. """Return 'value' after it is converted to an integer. If it could not be
  124. converted, raise TypeMismatchError() using 'element' and 'attr_name' only
  125. to give the user information about what happened."""
  126. try:
  127. return int(value)
  128. except ValueError:
  129. e = ("Expected an integer in attribute '{0}' of tag '{1}', but got " +
  130. "'{2}'.").format(attr_name, element.tagName, value)
  131. raise TypeMismatchError(e)
  132. def parse_config(key):
  133. """A thin wrapper for the actual config parser in _parse_config(): catch
  134. parsing exceptions and report them to the user cleanly."""
  135. try:
  136. _parse_config(key)
  137. except ConfigParseError as error:
  138. print "\nError parsing config file:"
  139. print error
  140. exit(1)
  141. except blowfish.BlowfishError as error:
  142. print "\nError decrypting passwords:"
  143. print "{0}: {1}.".format(error.__class__.__name__, error)
  144. exit(1)
  145. def _parse_config(key):
  146. """Parse config data from a DOM object into the five global variables that
  147. store our config info. The key is used to unencrypt passwords stored in the
  148. XML config file."""
  149. global components, wiki, irc, schedule, watcher
  150. _load_config() # we might be re-loading unnecessarily here, but no harm in
  151. # that!
  152. data = _config.getElementsByTagName("config")[0]
  153. components = parse_components(data)
  154. wiki = parse_wiki(data, key)
  155. irc = parse_irc(data, key)
  156. schedule = parse_schedule(data)
  157. watcher = parse_watcher(data)
  158. def parse_components(data):
  159. """Parse everything within the <components> XML tag of our config file.
  160. The components object here will exist as config.components, and is a dict
  161. of our enabled components: components[name] = True if it is enabled, False
  162. if it is disabled."""
  163. components = defaultdict(lambda: False) # all components are disabled by
  164. # default
  165. element = get_required_element(data, "components")
  166. for component in element.getElementsByTagName("component"):
  167. name = get_required_attribute(component, "name")
  168. components[name] = True
  169. return components
  170. def parse_wiki(data, key):
  171. """Parse everything within the <wiki> tag of our XML config file."""
  172. pass
  173. def parse_irc_server(data, key):
  174. """Parse everything within a <server> tag."""
  175. server = Container()
  176. connection = get_required_element(data, "connection")
  177. server.host = get_required_attribute(connection, "host")
  178. server.port = get_required_attribute(connection, "port")
  179. server.nick = get_required_attribute(connection, "nick")
  180. server.ident = get_required_attribute(connection, "ident")
  181. server.realname = get_required_attribute(connection, "realname")
  182. # convert the port from a string to an int
  183. server.port = attribute_to_int(server.port, connection, "port")
  184. nickserv = get_first_element(data, "nickserv")
  185. if nickserv:
  186. server.nickserv = Container()
  187. server.nickserv.username = get_required_attribute(nickserv, "username")
  188. password = get_required_attribute(nickserv, "password")
  189. if are_passwords_encrypted():
  190. server.nickserv.password = blowfish.decrypt(key, password)
  191. else:
  192. server.nickserv.password = password
  193. else:
  194. server.nickserv = None
  195. server.channels = list()
  196. channels = get_first_element(data, "channels")
  197. if channels:
  198. for channel in channels.getElementsByTagName("channel"):
  199. name = get_required_attribute(channel, "name")
  200. server.channels.append(name)
  201. return server
  202. def parse_irc(data, key):
  203. """Parse everything within the <irc> tag of our XML config file."""
  204. irc = Container()
  205. element = get_first_element(data, "irc")
  206. if not element:
  207. return irc
  208. servers = get_first_element(element, "servers")
  209. if servers:
  210. for server in servers.getElementsByTagName("server"):
  211. server_name = get_required_attribute(server, "name")
  212. if server_name == "frontend":
  213. irc.frontend = parse_irc_server(server, key)
  214. elif server_name == "watcher":
  215. irc.watcher = parse_irc_server(server, key)
  216. else:
  217. print ("Warning: config file specifies a <server> with " +
  218. "unknown name '{0}'. Ignoring.").format(server_name)
  219. permissions = get_first_element(element, "permissions")
  220. if permissions:
  221. irc.permissions = dict()
  222. for group in permissions.getElementsByTagName("group"):
  223. group_name = get_required_attribute(group, "name")
  224. irc.permissions[group_name] = list()
  225. for user in group.getElementsByTagName("user"):
  226. hostname = get_required_attribute(user, "host")
  227. irc.permissions[group_name].append(hostname)
  228. return irc
  229. def parse_schedule(data):
  230. """Parse everything within the <schedule> tag of our XML config file."""
  231. pass
  232. def parse_watcher(data):
  233. """Parse everything within the <watcher> tag of our XML config file."""
  234. pass