From e63cd89ed54c9930c73d3bc9ec27538764b6f183 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 8 Jul 2012 17:51:22 -0400 Subject: [PATCH 1/4] Starting !dictionary command (#31) --- earwigbot/commands/dictionary.py | 58 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 earwigbot/commands/dictionary.py diff --git a/earwigbot/commands/dictionary.py b/earwigbot/commands/dictionary.py new file mode 100644 index 0000000..99d0623 --- /dev/null +++ b/earwigbot/commands/dictionary.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from earwigbot import exceptions +from earwigbot.commands import Command + +class Dictionary(Command): + """Define words and stuff.""" + name = "dictionary" + commands = ["dict", "dictionary"] + + def process(self, data): + if not data.args: + self.reply(data, "what do you want me to define?") + return + + term = " ".join(data.args) + lang = self.bot.wiki.get_site().lang + try: + definition = self.define(term, lang) + except exceptions.APIError: + msg = "cannot find a {0}-language Wiktionary." + self.reply(data, msg.format(lang)) + else: + self.reply(data, "{0}: {1}".format(term, definition)) + + def define(self, term, lang): + try: + site = self.bot.wiki.get_site(project="wiktionary", lang=lang) + except exceptions.SiteNotFoundError: + site = self.bot.wiki.add_site(project="wiktionary", lang=lang) + + page = site.get_page(term) + try: + entry = page.get() + except (exceptions.PageNotFoundError, exceptions.InvalidPageError): + return "no definition found." + + return entry From 3cfedde6bd91aaee978ee2bb4f657c64b00e0126 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 8 Jul 2012 18:34:13 -0400 Subject: [PATCH 2/4] A bunch of cleanup and fixes. --- earwigbot/commands/dictionary.py | 6 +++--- earwigbot/config.py | 3 ++- earwigbot/managers.py | 4 ++-- earwigbot/wiki/site.py | 16 +++++++++++----- earwigbot/wiki/sitesdb.py | 21 ++++----------------- 5 files changed, 22 insertions(+), 28 deletions(-) diff --git a/earwigbot/commands/dictionary.py b/earwigbot/commands/dictionary.py index 99d0623..ff72a90 100644 --- a/earwigbot/commands/dictionary.py +++ b/earwigbot/commands/dictionary.py @@ -30,18 +30,18 @@ class Dictionary(Command): def process(self, data): if not data.args: - self.reply(data, "what do you want me to define?") + self.reply(data, "what do you want me to define?") return term = " ".join(data.args) lang = self.bot.wiki.get_site().lang try: - definition = self.define(term, lang) + defined = self.define(term, lang) except exceptions.APIError: msg = "cannot find a {0}-language Wiktionary." self.reply(data, msg.format(lang)) else: - self.reply(data, "{0}: {1}".format(term, definition)) + self.reply(data, "{0}: {1}".format(term, defined.encode("utf8"))) def define(self, term, lang): try: diff --git a/earwigbot/config.py b/earwigbot/config.py index 6076edf..ffec3ec 100644 --- a/earwigbot/config.py +++ b/earwigbot/config.py @@ -274,7 +274,8 @@ class BotConfig(object): key = getpass("Enter key to decrypt bot passwords: ") self._decryption_cipher = Blowfish.new(sha256(key).digest()) signature = self.metadata["signature"] - assert bcrypt.hashpw(key, signature) == signature + if bcrypt.hashpw(key, signature) != signature: + raise RuntimeError("Incorrect password.") for node, nodes in self._decryptable_nodes: self._decrypt(node, nodes) diff --git a/earwigbot/managers.py b/earwigbot/managers.py index 9455d2f..55636e4 100644 --- a/earwigbot/managers.py +++ b/earwigbot/managers.py @@ -78,7 +78,7 @@ class _ResourceManager(object): try: resource = klass(self.bot) # Create instance of resource except Exception: - e = "Error instantiating {0} class in {1} (from {2})" + e = "Error instantiating {0} class in '{1}' (from {2})" self.logger.exception(e.format(res_type, name, path)) else: self._resources[resource.name] = resource @@ -98,7 +98,7 @@ class _ResourceManager(object): try: module = imp.load_module(name, f, path, desc) except Exception: - e = "Couldn't load module {0} (from {1})" + e = "Couldn't load module '{0}' (from {1})" self.logger.exception(e.format(name, path)) return finally: diff --git a/earwigbot/wiki/site.py b/earwigbot/wiki/site.py index eaec2a0..93f636d 100644 --- a/earwigbot/wiki/site.py +++ b/earwigbot/wiki/site.py @@ -131,13 +131,19 @@ class Site(object): self._api_info_cache = {"maxlag": 0, "lastcheck": 0} # Attributes used for SQL queries: - self._sql_data = sql + if sql: + self._sql_data = sql + else: + self._sql_data = {} self._sql_conn = None self._sql_lock = Lock() self._sql_info_cache = {"replag": 0, "lastcheck": 0, "usable": None} # Attribute used in copyright violation checks (see CopyrightMixIn): - self._search_config = search_config + if search_config: + self._search_config = search_config + else: + self._search_config = {} # Set up cookiejar and URL opener for making API queries: if cookiejar: @@ -150,9 +156,6 @@ class Site(object): self._opener.addheaders = [("User-Agent", user_agent), ("Accept-Encoding", "gzip")] - # Get all of the above attributes that were not specified as arguments: - self._load_attributes() - # Set up our internal logger: if logger: self._logger = logger @@ -160,6 +163,9 @@ class Site(object): self._logger = getLogger("earwigbot.wiki") self._logger.addHandler(NullHandler()) + # Get all of the above attributes that were not specified as arguments: + self._load_attributes() + # If we have a name/pass and the API says we're not logged in, log in: self._login_info = name, password = login if name and password: diff --git a/earwigbot/wiki/sitesdb.py b/earwigbot/wiki/sitesdb.py index 9e6e296..7852e70 100644 --- a/earwigbot/wiki/sitesdb.py +++ b/earwigbot/wiki/sitesdb.py @@ -278,6 +278,7 @@ class SitesDB(object): else: conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) + self._logger.info("Removed site '{0}'".format(name)) return True def get_site(self, name=None, project=None, lang=None): @@ -376,34 +377,20 @@ class SitesDB(object): assert_edit = config.wiki.get("assert") maxlag = config.wiki.get("maxlag") wait_between_queries = config.wiki.get("waitTime", 3) - logger = self._logger.getChild(name) - search_config = config.wiki.get("search") if user_agent: user_agent = user_agent.replace("$1", __version__) user_agent = user_agent.replace("$2", python_version()) - if search_config: - nltk_dir = path.join(self.config.root_dir, ".nltk") - search_config["nltk_dir"] = nltk_dir - search_config["exclusions_db"] = self._exclusions_db - - if not sql: - sql = config.wiki.get("sql", {}) - for key, value in sql.iteritems(): - if "$1" in value: - sql[key] = value.replace("$1", name) - # Create a Site object to log in and load the other attributes: site = Site(base_url=base_url, script_path=script_path, sql=sql, login=login, cookiejar=cookiejar, user_agent=user_agent, use_https=use_https, assert_edit=assert_edit, - maxlag=maxlag, wait_between_queries=wait_between_queries, - logger=logger, search_config=search_config) + maxlag=maxlag, wait_between_queries=wait_between_queries) + self._logger.info("Added site '{0}'".format(site.name)) self._add_site_to_sitesdb(site) - self._sites[site.name] = site - return site + return self._get_site_object(site.name) def remove_site(self, name=None, project=None, lang=None): """Remove a site from the sitesdb. From fb31aa73c87f3cc2ba0cb78c0ef4d5049ee911e4 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 9 Jul 2012 01:32:28 -0400 Subject: [PATCH 3/4] Proper handling of unicode in some commands. --- earwigbot/commands/afc_report.py | 5 +++-- earwigbot/commands/afc_submissions.py | 2 +- earwigbot/commands/link.py | 9 ++++----- earwigbot/tasks/afc_statistics.py | 3 ++- earwigbot/wiki/page.py | 4 ++-- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/earwigbot/commands/afc_report.py b/earwigbot/commands/afc_report.py index cb33c5f..d26f44b 100644 --- a/earwigbot/commands/afc_report.py +++ b/earwigbot/commands/afc_report.py @@ -74,12 +74,13 @@ class AFCReport(Command): return page def report(self, page): - url = page.url.replace("en.wikipedia.org/wiki", "enwp.org") + url = page.url.encode("utf8") + url = url.replace("en.wikipedia.org/wiki", "enwp.org") short = self.statistics.get_short_title(page.title) status = self.get_status(page) user = page.get_creator() user_name = user.name - user_url = user.get_talkpage().url + user_url = user.get_talkpage().url.encode("utf8") msg1 = "AfC submission report for \x0302{0}\x0F ({1}):" msg2 = "Status: \x0303{0}\x0F" diff --git a/earwigbot/commands/afc_submissions.py b/earwigbot/commands/afc_submissions.py index a7144b2..16530d3 100644 --- a/earwigbot/commands/afc_submissions.py +++ b/earwigbot/commands/afc_submissions.py @@ -54,6 +54,6 @@ class AFCSubmissions(Command): site = self.bot.wiki.get_site() category = site.get_category("Pending AfC submissions") members = category.get_members(limit=number + len(self.ignore_list)) - urls = [member.url for member in members if member.title not in self.ignore_list] + urls = [member.url.encode("utf8") for member in members if member.title not in self.ignore_list] pages = ", ".join(urls[:number]) self.reply(data, "{0} pending AfC subs: {1}".format(number, pages)) diff --git a/earwigbot/commands/link.py b/earwigbot/commands/link.py index c087600..aafb114 100644 --- a/earwigbot/commands/link.py +++ b/earwigbot/commands/link.py @@ -35,15 +35,15 @@ class Link(Command): if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", msg): links = self.parse_line(msg) - links = " , ".join(links) - self.reply(data, links) + links = u" , ".join(links) + self.reply(data, links.encode("utf8")) elif data.command == "link": if not data.args: self.reply(data, "what do you want me to link to?") return pagename = " ".join(data.args) - link = self.site.get_page(pagename).url + link = self.site.get_page(pagename).url.encode("utf8") self.reply(data, link) def parse_line(self, line): @@ -68,5 +68,4 @@ class Link(Command): return results def parse_template(self, pagename): - pagename = "".join(("Template:", pagename)) - return self.site.get_page(pagename).url + return self.site.get_page("Template:" + pagename).url diff --git a/earwigbot/tasks/afc_statistics.py b/earwigbot/tasks/afc_statistics.py index 7b852f7..c3ddfe2 100644 --- a/earwigbot/tasks/afc_statistics.py +++ b/earwigbot/tasks/afc_statistics.py @@ -269,7 +269,8 @@ class AFCStatistics(Task): tracked = [i[0] for i in cursor.fetchall()] category = self.site.get_category(self.pending_cat) - for title, pageid in category.get_members(): + for page in category.get_members(): + title, pageid = page.title, page.pageid if title in self.ignore_list: continue if pageid not in tracked: diff --git a/earwigbot/wiki/page.py b/earwigbot/wiki/page.py index 3f00b71..71479b3 100644 --- a/earwigbot/wiki/page.py +++ b/earwigbot/wiki/page.py @@ -513,9 +513,9 @@ class Page(CopyvioMixIn): return self._fullurl else: encoded = self._title.encode("utf8").replace(" ", "_") - slug = quote(encoded, safe="/:") + slug = quote(encoded, safe="/:").decode("utf8") path = self.site._article_path.replace("$1", slug) - return ''.join((self.site.url, path)) + return u"".join((self.site.url, path)) @property def namespace(self): From fc563f4ddd74f389076c047386f8261726333c71 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 9 Jul 2012 04:19:46 -0400 Subject: [PATCH 4/4] Finish !dictionary command (#31). --- earwigbot/commands/dictionary.py | 97 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 94 insertions(+), 3 deletions(-) diff --git a/earwigbot/commands/dictionary.py b/earwigbot/commands/dictionary.py index ff72a90..b6bdba7 100644 --- a/earwigbot/commands/dictionary.py +++ b/earwigbot/commands/dictionary.py @@ -20,13 +20,15 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import re + from earwigbot import exceptions from earwigbot.commands import Command class Dictionary(Command): """Define words and stuff.""" name = "dictionary" - commands = ["dict", "dictionary"] + commands = ["dict", "dictionary", "define"] def process(self, data): if not data.args: @@ -41,7 +43,7 @@ class Dictionary(Command): msg = "cannot find a {0}-language Wiktionary." self.reply(data, msg.format(lang)) else: - self.reply(data, "{0}: {1}".format(term, defined.encode("utf8"))) + self.reply(data, defined.encode("utf8")) def define(self, term, lang): try: @@ -55,4 +57,93 @@ class Dictionary(Command): except (exceptions.PageNotFoundError, exceptions.InvalidPageError): return "no definition found." - return entry + languages = self.get_languages(entry) + if not languages: + return u"couldn't parse {0}!".format(page.url) + + result = [] + for lang, section in sorted(languages.items()): + this = u"({0}) {1}".format(lang, self.get_definition(section)) + result.append(this) + return u"; ".join(result) + + def get_languages(self, entry): + regex = r"(?:\A|\n)==\s*([a-zA-Z0-9_ ]*?)\s*==(?:\Z|\n)" + split = re.split(regex, entry) + if len(split) % 2 == 0: + return None + + split.pop(0) + languages = {} + for i in xrange(0, len(split), 2): + languages[split[i]] = split[i + 1] + return languages + + def get_definition(self, section): + parts_of_speech = { + "v.": "Verb", + "n.": "Noun", + "pron.": "Pronoun", + "adj.": "Adjective", + "adv.": "Adverb", + "prep.": "Preposition", + "conj.": "Conjunction", + "inter.": "Interjection", + "symbol": "Symbol", + "suffix": "Suffix", + "initialism": "Initialism", + "phrase": "Phrase", + "proverb": "Proverb", + } + defs = [] + for part, fullname in parts_of_speech.iteritems(): + if re.search("===\s*" + fullname + "\s*===", section): + regex = "===\s*" + fullname + "\s*===(.*?)(?:(?:===)|\Z)" + body = re.findall(regex, section, re.DOTALL) + if body: + definition = self.parse_body(body[0]) + if definition: + defs.append("\x02{0}\x0F {1}".format(part, definition)) + + return "; ".join(defs) + + def parse_body(self, body): + senses = [] + for line in body.splitlines(): + line = line.strip() + if re.match("#\s*[^:*]", line): + line = re.sub("\[\[(.*?)\|(.*?)\]\]", r"\2", line) + line = self.strip_templates(line) + line = line[1:].replace("'''", "").replace("''", "") + line = line.replace("[[", "").replace("]]", "") + senses.append(line.strip()) + + if not senses: + return None + if len(senses) == 1: + return senses[0] + + result = [] # Number the senses incrementally + for i, sense in enumerate(senses): + result.append(u"{0}. {1}".format(i + 1, sense)) + return " ".join(result) + + def strip_templates(self, line): + line = list(line) + stripped = "" + depth = 0 + while line: + this = line.pop(0) + if line: + next = line[0] + else: + next = "" + if this == "{" and next == "{": + line.pop(0) + depth += 1 + elif this == "}" and next == "}": + line.pop(0) + depth -= 1 + elif depth == 0: + stripped += this + return stripped