A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

190 Zeilen
7.1 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. import re
  23. from earwigbot import exceptions
  24. from earwigbot.commands import Command
  25. class Dictionary(Command):
  26. """Define words and stuff."""
  27. name = "dictionary"
  28. commands = ["dict", "dictionary", "define", "def"]
  29. def process(self, data):
  30. if not data.args:
  31. self.reply(data, "What do you want me to define?")
  32. return
  33. term = " ".join(data.args)
  34. lang = self.bot.wiki.get_site().lang
  35. try:
  36. defined = self.define(term, lang)
  37. except exceptions.APIError:
  38. msg = "Cannot find a {0}-language Wiktionary."
  39. self.reply(data, msg.format(lang))
  40. else:
  41. self.reply(data, defined.encode("utf8"))
  42. def define(self, term, lang, tries=2):
  43. try:
  44. site = self.bot.wiki.get_site(project="wiktionary", lang=lang)
  45. except exceptions.SiteNotFoundError:
  46. site = self.bot.wiki.add_site(project="wiktionary", lang=lang)
  47. page = site.get_page(term, follow_redirects=True)
  48. try:
  49. entry = page.get()
  50. except (exceptions.PageNotFoundError, exceptions.InvalidPageError):
  51. if term.lower() != term and tries:
  52. return self.define(term.lower(), lang, tries - 1)
  53. if term.capitalize() != term and tries:
  54. return self.define(term.capitalize(), lang, tries - 1)
  55. return "No definition found."
  56. level, languages = self.get_languages(entry)
  57. if not languages:
  58. return u"Couldn't parse {0}!".format(page.url)
  59. if "#" in term: # Requesting a specific language
  60. lang = term.rsplit("#", 1)[1]
  61. if lang not in languages:
  62. resp = u"Language {0} not found in definition."
  63. return resp.format(lang)
  64. definition = self.get_definition(languages[lang], level)
  65. return u"({0}) {1}".format(lang, definition)
  66. result = []
  67. for lang, section in sorted(languages.items()):
  68. definition = self.get_definition(section, level)
  69. result.append(u"({0}) {1}".format(lang, definition))
  70. return u"; ".join(result)
  71. def get_languages(self, entry, level=2):
  72. regex = r"(?:\A|\n)==\s*([a-zA-Z0-9_ ]*?)\s*==(?:\Z|\n)"
  73. split = re.split(regex, entry)
  74. if len(split) % 2 == 0:
  75. if level == 2:
  76. return self.get_languages(entry, level=3)
  77. else:
  78. return 3, None
  79. return 2, None
  80. split.pop(0)
  81. languages = {}
  82. for i in xrange(0, len(split), 2):
  83. languages[split[i]] = split[i + 1]
  84. return level, languages
  85. def get_definition(self, section, level):
  86. parts_of_speech = {
  87. "v.": "Verb",
  88. "n.": "Noun",
  89. "pron.": "Pronoun",
  90. "adj.": "Adjective",
  91. "adv.": "Adverb",
  92. "prep.": "Preposition",
  93. "conj.": "Conjunction",
  94. "inter.": "Interjection",
  95. "symbol": "Symbol",
  96. "suffix": "Suffix",
  97. "initialism": "Initialism",
  98. "phrase": "Phrase",
  99. "proverb": "Proverb",
  100. "prop. n.": "Proper noun",
  101. "abbr.": "Abbreviation",
  102. "punct.": "Punctuation mark",
  103. }
  104. blocks = "=" * (level + 1)
  105. defs = []
  106. for part, basename in parts_of_speech.iteritems():
  107. fullnames = [basename, "\{\{" + basename + "\}\}",
  108. "\{\{" + basename.lower() + "\}\}"]
  109. for fullname in fullnames:
  110. regex = blocks + "\s*" + fullname + "\s*" + blocks
  111. if re.search(regex, section):
  112. regex = blocks + "\s*" + fullname
  113. regex += "\s*{0}(.*?)(?:(?:{0})|\Z)".format(blocks)
  114. bodies = re.findall(regex, section, re.DOTALL)
  115. if bodies:
  116. for body in bodies:
  117. definition = self.parse_body(body)
  118. if definition:
  119. msg = u"\x02{0}\x0F {1}"
  120. defs.append(msg.format(part, definition))
  121. return "; ".join(defs)
  122. def parse_body(self, body):
  123. substitutions = [
  124. ("<!--(.*?)-->", ""),
  125. ("<ref>(.*?)</ref>", ""),
  126. ("\[\[[^\]|]*?\|([^\]|]*?)\]\]", r"\1"),
  127. ("\{\{unsupported\|(.*?)\}\}", r"\1"),
  128. ("\{\{(.*?) of\|([^}|]*?)(\|(.*?))?\}\}", r"\1 of \2."),
  129. ("\{\{w\|(.*?)\}\}", r"\1"),
  130. ("\{\{surname(.*?)\}\}", r"A surname."),
  131. ("\{\{given name\|([^}|]*?)(\|(.*?))?\}\}", r"A \1 given name."),
  132. ]
  133. senses = []
  134. for line in body.splitlines():
  135. line = line.strip()
  136. if re.match("#\s*[^:*#]", line):
  137. for regex, repl in substitutions:
  138. line = re.sub(regex, repl, line)
  139. line = self.strip_templates(line)
  140. line = line[1:].replace("'''", "").replace("''", "")
  141. line = line.replace("[[", "").replace("]]", "")
  142. if line.strip():
  143. senses.append(line.strip()[0].upper() + line.strip()[1:])
  144. if not senses:
  145. return None
  146. if len(senses) == 1:
  147. return senses[0]
  148. result = [] # Number the senses incrementally
  149. for i, sense in enumerate(senses):
  150. result.append(u"{0}. {1}".format(i + 1, sense))
  151. return " ".join(result)
  152. def strip_templates(self, line):
  153. line = list(line)
  154. stripped = ""
  155. depth = 0
  156. while line:
  157. this = line.pop(0)
  158. if line:
  159. next = line[0]
  160. else:
  161. next = ""
  162. if this == "{" and next == "{":
  163. line.pop(0)
  164. depth += 1
  165. elif this == "}" and next == "}":
  166. line.pop(0)
  167. depth -= 1
  168. elif depth == 0:
  169. stripped += this
  170. return stripped