From fc563f4ddd74f389076c047386f8261726333c71 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 9 Jul 2012 04:19:46 -0400 Subject: [PATCH] Finish !dictionary command (#31). --- earwigbot/commands/dictionary.py | 97 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 94 insertions(+), 3 deletions(-) diff --git a/earwigbot/commands/dictionary.py b/earwigbot/commands/dictionary.py index ff72a90..b6bdba7 100644 --- a/earwigbot/commands/dictionary.py +++ b/earwigbot/commands/dictionary.py @@ -20,13 +20,15 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import re + from earwigbot import exceptions from earwigbot.commands import Command class Dictionary(Command): """Define words and stuff.""" name = "dictionary" - commands = ["dict", "dictionary"] + commands = ["dict", "dictionary", "define"] def process(self, data): if not data.args: @@ -41,7 +43,7 @@ class Dictionary(Command): msg = "cannot find a {0}-language Wiktionary." self.reply(data, msg.format(lang)) else: - self.reply(data, "{0}: {1}".format(term, defined.encode("utf8"))) + self.reply(data, defined.encode("utf8")) def define(self, term, lang): try: @@ -55,4 +57,93 @@ class Dictionary(Command): except (exceptions.PageNotFoundError, exceptions.InvalidPageError): return "no definition found." - return entry + languages = self.get_languages(entry) + if not languages: + return u"couldn't parse {0}!".format(page.url) + + result = [] + for lang, section in sorted(languages.items()): + this = u"({0}) {1}".format(lang, self.get_definition(section)) + result.append(this) + return u"; ".join(result) + + def get_languages(self, entry): + regex = r"(?:\A|\n)==\s*([a-zA-Z0-9_ ]*?)\s*==(?:\Z|\n)" + split = re.split(regex, entry) + if len(split) % 2 == 0: + return None + + split.pop(0) + languages = {} + for i in xrange(0, len(split), 2): + languages[split[i]] = split[i + 1] + return languages + + def get_definition(self, section): + parts_of_speech = { + "v.": "Verb", + "n.": "Noun", + "pron.": "Pronoun", + "adj.": "Adjective", + "adv.": "Adverb", + "prep.": "Preposition", + "conj.": "Conjunction", + "inter.": "Interjection", + "symbol": "Symbol", + "suffix": "Suffix", + "initialism": "Initialism", + "phrase": "Phrase", + "proverb": "Proverb", + } + defs = [] + for part, fullname in parts_of_speech.iteritems(): + if re.search("===\s*" + fullname + "\s*===", section): + regex = "===\s*" + fullname + "\s*===(.*?)(?:(?:===)|\Z)" + body = re.findall(regex, section, re.DOTALL) + if body: + definition = self.parse_body(body[0]) + if definition: + defs.append("\x02{0}\x0F {1}".format(part, definition)) + + return "; ".join(defs) + + def parse_body(self, body): + senses = [] + for line in body.splitlines(): + line = line.strip() + if re.match("#\s*[^:*]", line): + line = re.sub("\[\[(.*?)\|(.*?)\]\]", r"\2", line) + line = self.strip_templates(line) + line = line[1:].replace("'''", "").replace("''", "") + line = line.replace("[[", "").replace("]]", "") + senses.append(line.strip()) + + if not senses: + return None + if len(senses) == 1: + return senses[0] + + result = [] # Number the senses incrementally + for i, sense in enumerate(senses): + result.append(u"{0}. {1}".format(i + 1, sense)) + return " ".join(result) + + def strip_templates(self, line): + line = list(line) + stripped = "" + depth = 0 + while line: + this = line.pop(0) + if line: + next = line[0] + else: + next = "" + if this == "{" and next == "{": + line.pop(0) + depth += 1 + elif this == "}" and next == "}": + line.pop(0) + depth -= 1 + elif depth == 0: + stripped += this + return stripped