From d8d0bcf5fa84931ea3b5ad791d3d3b619f52541e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 10 Jul 2012 00:38:34 -0400 Subject: [PATCH] Some smarter parsing, plus given names and surnames. --- earwigbot/commands/dictionary.py | 59 ++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/earwigbot/commands/dictionary.py b/earwigbot/commands/dictionary.py index 0e6dc3a..41cdbb5 100644 --- a/earwigbot/commands/dictionary.py +++ b/earwigbot/commands/dictionary.py @@ -89,35 +89,37 @@ class Dictionary(Command): def get_definition(self, section, level): parts_of_speech = { - "v.": "Verb", - "n.": "Noun", - "pron.": "Pronoun", - "adj.": "Adjective", - "adv.": "Adverb", - "prep.": "Preposition", - "conj.": "Conjunction", - "inter.": "Interjection", - "symbol": "Symbol", - "suffix": "Suffix", - "initialism": "Initialism", - "phrase": "Phrase", - "proverb": "Proverb", - "prop. n.": "Proper noun", - "abbr.": "\{\{abbreviation\}\}", + "v.": ["Verb"], + "n.": ["Noun"], + "pron.": ["Pronoun"], + "adj.": ["Adjective"], + "adv.": ["Adverb"], + "prep.": ["Preposition"], + "conj.": ["Conjunction"], + "inter.": ["Interjection"], + "symbol": ["Symbol"], + "suffix": ["Suffix"], + "initialism": ["Initialism"], + "phrase": ["Phrase"], + "proverb": ["Proverb"], + "prop. n.": ["Proper noun"], + "abbr.": ["Abbreviation", "\{\{abbreviation\}\}"], } blocks = "=" * (level + 1) defs = [] - for part, fullname in parts_of_speech.iteritems(): - if re.search("{0}\s*{1}\s*{0}".format(blocks, fullname), section): - regex = "{0}\s*{1}\s*{0}(.*?)(?:(?:{0})|\Z)" - regex = regex.format(blocks, fullname) - bodies = re.findall(regex, section, re.DOTALL) - if bodies: - for body in bodies: - definition = self.parse_body(body) - if definition: - msg = u"\x02{0}\x0F {1}" - defs.append(msg.format(part, definition)) + for part, fullnames in parts_of_speech.iteritems(): + for fullname in fullnames: + regex = "{0}\s*{1}\s*{0}".format(blocks, fullname) + if re.search(regex, section): + regex = "{0}\s*{1}\s*{0}(.*?)(?:(?:{0})|\Z)" + regex = regex.format(blocks, fullname) + bodies = re.findall(regex, section, re.DOTALL) + if bodies: + for body in bodies: + definition = self.parse_body(body) + if definition: + msg = u"\x02{0}\x0F {1}" + defs.append(msg.format(part, definition)) return "; ".join(defs) @@ -125,8 +127,11 @@ class Dictionary(Command): substitutions = [ ("", ""), ("\[\[(.*?)\|(.*?)\]\]", r"\2"), - ("\{\{alternative spelling of\|(.*?)\}\}", r"Alternative spelling of \1."), + ("\{\{alternative spelling of\|(.*?)\}\}", + r"Alternative spelling of \1."), ("\{\{synonym of\|(.*?)\}\}", r"Synonym of \1."), + ("\{\{surname(\||\}\})", r"A surname."), + ("\{\{given name\|(.*?)(\||\}\})", r"A \1 given name."), ] senses = []