Browse Source

Merge branch 'feature/dictionary' into develop

tags/v0.1^2
Ben Kurtovic 12 years ago
parent
commit
f1e0a6f4de
10 changed files with 180 additions and 36 deletions
  1. +3
    -2
      earwigbot/commands/afc_report.py
  2. +1
    -1
      earwigbot/commands/afc_submissions.py
  3. +149
    -0
      earwigbot/commands/dictionary.py
  4. +4
    -5
      earwigbot/commands/link.py
  5. +2
    -1
      earwigbot/config.py
  6. +2
    -2
      earwigbot/managers.py
  7. +2
    -1
      earwigbot/tasks/afc_statistics.py
  8. +2
    -2
      earwigbot/wiki/page.py
  9. +11
    -5
      earwigbot/wiki/site.py
  10. +4
    -17
      earwigbot/wiki/sitesdb.py

+ 3
- 2
earwigbot/commands/afc_report.py View File

@@ -74,12 +74,13 @@ class AFCReport(Command):
return page

def report(self, page):
url = page.url.replace("en.wikipedia.org/wiki", "enwp.org")
url = page.url.encode("utf8")
url = url.replace("en.wikipedia.org/wiki", "enwp.org")
short = self.statistics.get_short_title(page.title)
status = self.get_status(page)
user = page.get_creator()
user_name = user.name
user_url = user.get_talkpage().url
user_url = user.get_talkpage().url.encode("utf8")

msg1 = "AfC submission report for \x0302{0}\x0F ({1}):"
msg2 = "Status: \x0303{0}\x0F"


+ 1
- 1
earwigbot/commands/afc_submissions.py View File

@@ -54,6 +54,6 @@ class AFCSubmissions(Command):
site = self.bot.wiki.get_site()
category = site.get_category("Pending AfC submissions")
members = category.get_members(limit=number + len(self.ignore_list))
urls = [member.url for member in members if member.title not in self.ignore_list]
urls = [member.url.encode("utf8") for member in members if member.title not in self.ignore_list]
pages = ", ".join(urls[:number])
self.reply(data, "{0} pending AfC subs: {1}".format(number, pages))

+ 149
- 0
earwigbot/commands/dictionary.py View File

@@ -0,0 +1,149 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re

from earwigbot import exceptions
from earwigbot.commands import Command

class Dictionary(Command):
"""Define words and stuff."""
name = "dictionary"
commands = ["dict", "dictionary", "define"]

def process(self, data):
if not data.args:
self.reply(data, "what do you want me to define?")
return

term = " ".join(data.args)
lang = self.bot.wiki.get_site().lang
try:
defined = self.define(term, lang)
except exceptions.APIError:
msg = "cannot find a {0}-language Wiktionary."
self.reply(data, msg.format(lang))
else:
self.reply(data, defined.encode("utf8"))

def define(self, term, lang):
try:
site = self.bot.wiki.get_site(project="wiktionary", lang=lang)
except exceptions.SiteNotFoundError:
site = self.bot.wiki.add_site(project="wiktionary", lang=lang)

page = site.get_page(term)
try:
entry = page.get()
except (exceptions.PageNotFoundError, exceptions.InvalidPageError):
return "no definition found."

languages = self.get_languages(entry)
if not languages:
return u"couldn't parse {0}!".format(page.url)

result = []
for lang, section in sorted(languages.items()):
this = u"({0}) {1}".format(lang, self.get_definition(section))
result.append(this)
return u"; ".join(result)

def get_languages(self, entry):
regex = r"(?:\A|\n)==\s*([a-zA-Z0-9_ ]*?)\s*==(?:\Z|\n)"
split = re.split(regex, entry)
if len(split) % 2 == 0:
return None

split.pop(0)
languages = {}
for i in xrange(0, len(split), 2):
languages[split[i]] = split[i + 1]
return languages

def get_definition(self, section):
parts_of_speech = {
"v.": "Verb",
"n.": "Noun",
"pron.": "Pronoun",
"adj.": "Adjective",
"adv.": "Adverb",
"prep.": "Preposition",
"conj.": "Conjunction",
"inter.": "Interjection",
"symbol": "Symbol",
"suffix": "Suffix",
"initialism": "Initialism",
"phrase": "Phrase",
"proverb": "Proverb",
}
defs = []
for part, fullname in parts_of_speech.iteritems():
if re.search("===\s*" + fullname + "\s*===", section):
regex = "===\s*" + fullname + "\s*===(.*?)(?:(?:===)|\Z)"
body = re.findall(regex, section, re.DOTALL)
if body:
definition = self.parse_body(body[0])
if definition:
defs.append("\x02{0}\x0F {1}".format(part, definition))

return "; ".join(defs)

def parse_body(self, body):
senses = []
for line in body.splitlines():
line = line.strip()
if re.match("#\s*[^:*]", line):
line = re.sub("\[\[(.*?)\|(.*?)\]\]", r"\2", line)
line = self.strip_templates(line)
line = line[1:].replace("'''", "").replace("''", "")
line = line.replace("[[", "").replace("]]", "")
senses.append(line.strip())

if not senses:
return None
if len(senses) == 1:
return senses[0]

result = [] # Number the senses incrementally
for i, sense in enumerate(senses):
result.append(u"{0}. {1}".format(i + 1, sense))
return " ".join(result)

def strip_templates(self, line):
line = list(line)
stripped = ""
depth = 0
while line:
this = line.pop(0)
if line:
next = line[0]
else:
next = ""
if this == "{" and next == "{":
line.pop(0)
depth += 1
elif this == "}" and next == "}":
line.pop(0)
depth -= 1
elif depth == 0:
stripped += this
return stripped

+ 4
- 5
earwigbot/commands/link.py View File

@@ -35,15 +35,15 @@ class Link(Command):

if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", msg):
links = self.parse_line(msg)
links = " , ".join(links)
self.reply(data, links)
links = u" , ".join(links)
self.reply(data, links.encode("utf8"))

elif data.command == "link":
if not data.args:
self.reply(data, "what do you want me to link to?")
return
pagename = " ".join(data.args)
link = self.site.get_page(pagename).url
link = self.site.get_page(pagename).url.encode("utf8")
self.reply(data, link)

def parse_line(self, line):
@@ -68,5 +68,4 @@ class Link(Command):
return results

def parse_template(self, pagename):
pagename = "".join(("Template:", pagename))
return self.site.get_page(pagename).url
return self.site.get_page("Template:" + pagename).url

+ 2
- 1
earwigbot/config.py View File

@@ -274,7 +274,8 @@ class BotConfig(object):
key = getpass("Enter key to decrypt bot passwords: ")
self._decryption_cipher = Blowfish.new(sha256(key).digest())
signature = self.metadata["signature"]
assert bcrypt.hashpw(key, signature) == signature
if bcrypt.hashpw(key, signature) != signature:
raise RuntimeError("Incorrect password.")
for node, nodes in self._decryptable_nodes:
self._decrypt(node, nodes)



+ 2
- 2
earwigbot/managers.py View File

@@ -78,7 +78,7 @@ class _ResourceManager(object):
try:
resource = klass(self.bot) # Create instance of resource
except Exception:
e = "Error instantiating {0} class in {1} (from {2})"
e = "Error instantiating {0} class in '{1}' (from {2})"
self.logger.exception(e.format(res_type, name, path))
else:
self._resources[resource.name] = resource
@@ -98,7 +98,7 @@ class _ResourceManager(object):
try:
module = imp.load_module(name, f, path, desc)
except Exception:
e = "Couldn't load module {0} (from {1})"
e = "Couldn't load module '{0}' (from {1})"
self.logger.exception(e.format(name, path))
return
finally:


+ 2
- 1
earwigbot/tasks/afc_statistics.py View File

@@ -269,7 +269,8 @@ class AFCStatistics(Task):
tracked = [i[0] for i in cursor.fetchall()]

category = self.site.get_category(self.pending_cat)
for title, pageid in category.get_members():
for page in category.get_members():
title, pageid = page.title, page.pageid
if title in self.ignore_list:
continue
if pageid not in tracked:


+ 2
- 2
earwigbot/wiki/page.py View File

@@ -513,9 +513,9 @@ class Page(CopyvioMixIn):
return self._fullurl
else:
encoded = self._title.encode("utf8").replace(" ", "_")
slug = quote(encoded, safe="/:")
slug = quote(encoded, safe="/:").decode("utf8")
path = self.site._article_path.replace("$1", slug)
return ''.join((self.site.url, path))
return u"".join((self.site.url, path))

@property
def namespace(self):


+ 11
- 5
earwigbot/wiki/site.py View File

@@ -131,13 +131,19 @@ class Site(object):
self._api_info_cache = {"maxlag": 0, "lastcheck": 0}

# Attributes used for SQL queries:
self._sql_data = sql
if sql:
self._sql_data = sql
else:
self._sql_data = {}
self._sql_conn = None
self._sql_lock = Lock()
self._sql_info_cache = {"replag": 0, "lastcheck": 0, "usable": None}

# Attribute used in copyright violation checks (see CopyrightMixIn):
self._search_config = search_config
if search_config:
self._search_config = search_config
else:
self._search_config = {}

# Set up cookiejar and URL opener for making API queries:
if cookiejar:
@@ -150,9 +156,6 @@ class Site(object):
self._opener.addheaders = [("User-Agent", user_agent),
("Accept-Encoding", "gzip")]

# Get all of the above attributes that were not specified as arguments:
self._load_attributes()

# Set up our internal logger:
if logger:
self._logger = logger
@@ -160,6 +163,9 @@ class Site(object):
self._logger = getLogger("earwigbot.wiki")
self._logger.addHandler(NullHandler())

# Get all of the above attributes that were not specified as arguments:
self._load_attributes()

# If we have a name/pass and the API says we're not logged in, log in:
self._login_info = name, password = login
if name and password:


+ 4
- 17
earwigbot/wiki/sitesdb.py View File

@@ -278,6 +278,7 @@ class SitesDB(object):
else:
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,))
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,))
self._logger.info("Removed site '{0}'".format(name))
return True

def get_site(self, name=None, project=None, lang=None):
@@ -376,34 +377,20 @@ class SitesDB(object):
assert_edit = config.wiki.get("assert")
maxlag = config.wiki.get("maxlag")
wait_between_queries = config.wiki.get("waitTime", 3)
logger = self._logger.getChild(name)
search_config = config.wiki.get("search")

if user_agent:
user_agent = user_agent.replace("$1", __version__)
user_agent = user_agent.replace("$2", python_version())

if search_config:
nltk_dir = path.join(self.config.root_dir, ".nltk")
search_config["nltk_dir"] = nltk_dir
search_config["exclusions_db"] = self._exclusions_db

if not sql:
sql = config.wiki.get("sql", {})
for key, value in sql.iteritems():
if "$1" in value:
sql[key] = value.replace("$1", name)

# Create a Site object to log in and load the other attributes:
site = Site(base_url=base_url, script_path=script_path, sql=sql,
login=login, cookiejar=cookiejar, user_agent=user_agent,
use_https=use_https, assert_edit=assert_edit,
maxlag=maxlag, wait_between_queries=wait_between_queries,
logger=logger, search_config=search_config)
maxlag=maxlag, wait_between_queries=wait_between_queries)

self._logger.info("Added site '{0}'".format(site.name))
self._add_site_to_sitesdb(site)
self._sites[site.name] = site
return site
return self._get_site_object(site.name)

def remove_site(self, name=None, project=None, lang=None):
"""Remove a site from the sitesdb.


Loading…
Cancel
Save