瀏覽代碼

Some smarter parsing, plus given names and surnames.

tags/v0.1^2
Ben Kurtovic 12 年之前
父節點
當前提交
d8d0bcf5fa
共有 1 個文件被更改,包括 32 次插入27 次删除
  1. +32
    -27
      earwigbot/commands/dictionary.py

+ 32
- 27
earwigbot/commands/dictionary.py 查看文件

@@ -89,35 +89,37 @@ class Dictionary(Command):

def get_definition(self, section, level):
parts_of_speech = {
"v.": "Verb",
"n.": "Noun",
"pron.": "Pronoun",
"adj.": "Adjective",
"adv.": "Adverb",
"prep.": "Preposition",
"conj.": "Conjunction",
"inter.": "Interjection",
"symbol": "Symbol",
"suffix": "Suffix",
"initialism": "Initialism",
"phrase": "Phrase",
"proverb": "Proverb",
"prop. n.": "Proper noun",
"abbr.": "\{\{abbreviation\}\}",
"v.": ["Verb"],
"n.": ["Noun"],
"pron.": ["Pronoun"],
"adj.": ["Adjective"],
"adv.": ["Adverb"],
"prep.": ["Preposition"],
"conj.": ["Conjunction"],
"inter.": ["Interjection"],
"symbol": ["Symbol"],
"suffix": ["Suffix"],
"initialism": ["Initialism"],
"phrase": ["Phrase"],
"proverb": ["Proverb"],
"prop. n.": ["Proper noun"],
"abbr.": ["Abbreviation", "\{\{abbreviation\}\}"],
}
blocks = "=" * (level + 1)
defs = []
for part, fullname in parts_of_speech.iteritems():
if re.search("{0}\s*{1}\s*{0}".format(blocks, fullname), section):
regex = "{0}\s*{1}\s*{0}(.*?)(?:(?:{0})|\Z)"
regex = regex.format(blocks, fullname)
bodies = re.findall(regex, section, re.DOTALL)
if bodies:
for body in bodies:
definition = self.parse_body(body)
if definition:
msg = u"\x02{0}\x0F {1}"
defs.append(msg.format(part, definition))
for part, fullnames in parts_of_speech.iteritems():
for fullname in fullnames:
regex = "{0}\s*{1}\s*{0}".format(blocks, fullname)
if re.search(regex, section):
regex = "{0}\s*{1}\s*{0}(.*?)(?:(?:{0})|\Z)"
regex = regex.format(blocks, fullname)
bodies = re.findall(regex, section, re.DOTALL)
if bodies:
for body in bodies:
definition = self.parse_body(body)
if definition:
msg = u"\x02{0}\x0F {1}"
defs.append(msg.format(part, definition))

return "; ".join(defs)

@@ -125,8 +127,11 @@ class Dictionary(Command):
substitutions = [
("<!--(.*?)-->", ""),
("\[\[(.*?)\|(.*?)\]\]", r"\2"),
("\{\{alternative spelling of\|(.*?)\}\}", r"Alternative spelling of \1."),
("\{\{alternative spelling of\|(.*?)\}\}",
r"Alternative spelling of \1."),
("\{\{synonym of\|(.*?)\}\}", r"Synonym of \1."),
("\{\{surname(\||\}\})", r"A surname."),
("\{\{given name\|(.*?)(\||\}\})", r"A \1 given name."),
]

senses = []


Loading…
取消
儲存