A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. # -*- coding: utf-8 -*-
  2. import re
  3. from urllib import quote
  4. from classes import BaseCommand
  5. class Command(BaseCommand):
  6. """Convert a Wikipedia page name into a URL."""
  7. name = "link"
  8. def check(self, data):
  9. if ((data.is_command and data.command == "link") or
  10. (("[[" in data.msg and "]]" in data.msg) or
  11. ("{{" in data.msg and "}}" in data.msg))):
  12. return True
  13. return False
  14. def process(self, data):
  15. msg = data.msg
  16. if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", msg):
  17. links = self.parse_line(msg)
  18. links = " , ".join(links)
  19. self.connection.reply(data, links)
  20. elif data.command == "link":
  21. if not data.args:
  22. self.connection.reply(data, "what do you want me to link to?")
  23. return
  24. pagename = ' '.join(data.args)
  25. link = self.parse_link(pagename)
  26. self.connection.reply(data, link)
  27. def parse_line(self, line):
  28. results = []
  29. # Destroy {{{template parameters}}}:
  30. line = re.sub("\{\{\{(.*?)\}\}\}", "", line)
  31. # Find all [[links]]:
  32. links = re.findall("(\[\[(.*?)(\||\]\]))", line)
  33. if links:
  34. # re.findall() returns a list of tuples, but we only want the 2nd
  35. # item in each tuple:
  36. links = [i[1] for i in links]
  37. results = map(self.parse_link, links)
  38. # Find all {{templates}}
  39. templates = re.findall("(\{\{(.*?)(\||\}\}))", line)
  40. if templates:
  41. templates = [i[1] for i in templates]
  42. results.extend(map(self.parse_template, templates))
  43. return results
  44. def parse_link(self, pagename):
  45. link = quote(pagename.replace(" ", "_"), safe="/:")
  46. return "".join(("http://enwp.org/", link))
  47. def parse_template(self, pagename):
  48. pagename = "".join(("Template:", pagename))
  49. return self.parse_link(pagename)