A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rc.py 1.4 KiB

123456789101112131415161718192021222324252627282930313233
  1. # -*- coding: utf-8 -*-
  2. # A class to store data on an individual event received from our IRC watcher.
  3. import re
  4. class RC:
  5. def __init__(self, msg):
  6. """store data on an individual event received from our IRC watcher"""
  7. self.msg = msg
  8. def parse(self):
  9. """parse recent changes log into some variables"""
  10. msg = self.msg
  11. msg = re.sub("\x03([0-9]{1,2}(,[0-9]{1,2})?)?", "", msg) # strip IRC color codes; we don't want/need 'em
  12. msg = msg.strip()
  13. self.msg = msg
  14. # page name of the modified page
  15. # 'M' for minor edit, 'B' for bot edit, 'create' for a user creation log entry...
  16. try:
  17. page, flags, url, user, comment = re.findall("\A\[\[(.*?)\]\]\s(.*?)\s(http://.*?)\s\*\s(.*?)\s\*\s(.*?)\Z", msg)[0]
  18. except IndexError: # we're probably missing the http:// part, because it's a log entry, which lacks a url
  19. page, flags, user, comment = re.findall("\A\[\[(.*?)\]\]\s(.*?)\s\*\s(.*?)\s\*\s(.*?)\Z", msg)[0]
  20. url = "http://en.wikipedia.org/wiki/%s" % page
  21. flags = flags.strip() # flag tends to have a extraneous whitespace character at the end when it's a log entry
  22. self.page, self.flags, self.url, self.user, self.comment = page, flags, url, user, comment
  23. def get_pretty(self):
  24. """make a nice, colorful message from self.msg to send to the front-end"""
  25. pretty = self.msg
  26. return pretty