A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. # -*- coding: utf-8 -*-
  2. # Convert a Wikipedia page name into a URL.
  3. import re
  4. from irc.base_command import BaseCommand
  5. class Link(BaseCommand):
  6. def get_hooks(self):
  7. return ["msg"]
  8. def get_help(self, command):
  9. return "Convert a Wikipedia page name into a URL."
  10. def check(self, data):
  11. if ((data.is_command and data.command == "link") or
  12. (("[[" in data.msg and "]]" in data.msg) or
  13. ("{{" in data.msg and "}}" in data.msg))):
  14. return True
  15. return False
  16. def process(self, data):
  17. msg = data.msg
  18. if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", msg):
  19. links = self.parse_line(msg)
  20. links = " , ".join(links)
  21. self.connection.reply(data, links)
  22. elif data.command == "link":
  23. if not data.args:
  24. self.connection.reply(data, "what do you want me to link to?")
  25. return
  26. pagename = ' '.join(data.args)
  27. link = self.parse_link(pagename)
  28. self.connection.reply(data, link)
  29. def parse_line(self, line):
  30. results = list()
  31. line = re.sub("\{\{\{(.*?)\}\}\}", "", line) # destroy {{{template parameters}}}
  32. links = re.findall("(\[\[(.*?)(\||\]\]))", line) # find all [[links]]
  33. if links:
  34. links = map(lambda x: x[1], links) # re.findall() returns a list of tuples, but we only want the 2nd item in each tuple
  35. results.extend(map(self.parse_link, links))
  36. templates = re.findall("(\{\{(.*?)(\||\}\}))", line) # find all {{templates}}
  37. if templates:
  38. templates = map(lambda x: x[1], templates)
  39. results.extend(map(self.parse_template, templates))
  40. return results
  41. def parse_link(self, pagename):
  42. pagename = pagename.strip()
  43. link = "http://en.wikipedia.org/wiki/" + pagename
  44. link = link.replace(" ", "_")
  45. return link
  46. def parse_template(self, pagename):
  47. pagename = "Template:%s" % pagename # TODO: implement an actual namespace check
  48. link = self.parse_link(pagename)
  49. return link