From fc563f4ddd74f389076c047386f8261726333c71 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 9 Jul 2012 04:19:46 -0400
Subject: [PATCH] Finish !dictionary command (#31).

---
 earwigbot/commands/dictionary.py | 97 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 94 insertions(+), 3 deletions(-)

diff --git a/earwigbot/commands/dictionary.py b/earwigbot/commands/dictionary.py
index ff72a90..b6bdba7 100644
--- a/earwigbot/commands/dictionary.py
+++ b/earwigbot/commands/dictionary.py
@@ -20,13 +20,15 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+import re
+
 from earwigbot import exceptions
 from earwigbot.commands import Command
 
 class Dictionary(Command):
     """Define words and stuff."""
     name = "dictionary"
-    commands = ["dict", "dictionary"]
+    commands = ["dict", "dictionary", "define"]
 
     def process(self, data):
         if not data.args:
@@ -41,7 +43,7 @@ class Dictionary(Command):
             msg = "cannot find a {0}-language Wiktionary."
             self.reply(data, msg.format(lang))
         else:
-            self.reply(data, "{0}: {1}".format(term, defined.encode("utf8")))
+            self.reply(data, defined.encode("utf8"))
 
     def define(self, term, lang):
         try:
@@ -55,4 +57,93 @@ class Dictionary(Command):
         except (exceptions.PageNotFoundError, exceptions.InvalidPageError):
             return "no definition found."
 
-        return entry
+        languages = self.get_languages(entry)
+        if not languages:
+            return u"couldn't parse {0}!".format(page.url)
+
+        result = []
+        for lang, section in sorted(languages.items()):
+            this = u"({0}) {1}".format(lang, self.get_definition(section))
+            result.append(this)
+        return u"; ".join(result)
+
+    def get_languages(self, entry):
+        regex = r"(?:\A|\n)==\s*([a-zA-Z0-9_ ]*?)\s*==(?:\Z|\n)"
+        split = re.split(regex, entry)
+        if len(split) % 2 == 0:
+            return None
+
+        split.pop(0)
+        languages = {}
+        for i in xrange(0, len(split), 2):
+            languages[split[i]] = split[i + 1]
+        return languages
+
+    def get_definition(self, section):
+        parts_of_speech = {
+            "v.": "Verb",
+            "n.": "Noun",
+            "pron.": "Pronoun",
+            "adj.": "Adjective",
+            "adv.": "Adverb",
+            "prep.": "Preposition",
+            "conj.": "Conjunction",
+            "inter.": "Interjection",
+            "symbol": "Symbol",
+            "suffix": "Suffix",
+            "initialism": "Initialism",
+            "phrase": "Phrase",
+            "proverb": "Proverb",
+        }
+        defs = []
+        for part, fullname in parts_of_speech.iteritems():
+            if re.search("===\s*" + fullname + "\s*===", section):
+                regex = "===\s*" + fullname + "\s*===(.*?)(?:(?:===)|\Z)"
+                body = re.findall(regex, section, re.DOTALL)
+                if body:
+                    definition = self.parse_body(body[0])
+                    if definition:
+                        defs.append("\x02{0}\x0F {1}".format(part, definition))
+
+        return "; ".join(defs)
+
+    def parse_body(self, body):
+        senses = []
+        for line in body.splitlines():
+            line = line.strip()
+            if re.match("#\s*[^:*]", line):
+                line = re.sub("\[\[(.*?)\|(.*?)\]\]", r"\2", line)
+                line = self.strip_templates(line)
+                line = line[1:].replace("'''", "").replace("''", "")
+                line = line.replace("[[", "").replace("]]", "")
+                senses.append(line.strip())
+
+        if not senses:
+            return None
+        if len(senses) == 1:
+            return senses[0]
+
+        result = []  # Number the senses incrementally
+        for i, sense in enumerate(senses):
+            result.append(u"{0}. {1}".format(i + 1, sense))
+        return " ".join(result)
+
+    def strip_templates(self, line):
+        line = list(line)
+        stripped = ""
+        depth = 0
+        while line:
+            this = line.pop(0)
+            if line:
+                next = line[0]
+            else:
+                next = ""
+            if this == "{" and next == "{":
+                line.pop(0)
+                depth += 1
+            elif this == "}" and next == "}":
+                line.pop(0)
+                depth -= 1
+            elif depth == 0:
+                stripped += this
+        return stripped