@@ -74,12 +74,13 @@ class AFCReport(Command): | |||||
return page | return page | ||||
def report(self, page): | def report(self, page): | ||||
url = page.url.replace("en.wikipedia.org/wiki", "enwp.org") | |||||
url = page.url.encode("utf8") | |||||
url = url.replace("en.wikipedia.org/wiki", "enwp.org") | |||||
short = self.statistics.get_short_title(page.title) | short = self.statistics.get_short_title(page.title) | ||||
status = self.get_status(page) | status = self.get_status(page) | ||||
user = page.get_creator() | user = page.get_creator() | ||||
user_name = user.name | user_name = user.name | ||||
user_url = user.get_talkpage().url | |||||
user_url = user.get_talkpage().url.encode("utf8") | |||||
msg1 = "AfC submission report for \x0302{0}\x0F ({1}):" | msg1 = "AfC submission report for \x0302{0}\x0F ({1}):" | ||||
msg2 = "Status: \x0303{0}\x0F" | msg2 = "Status: \x0303{0}\x0F" | ||||
@@ -54,6 +54,6 @@ class AFCSubmissions(Command): | |||||
site = self.bot.wiki.get_site() | site = self.bot.wiki.get_site() | ||||
category = site.get_category("Pending AfC submissions") | category = site.get_category("Pending AfC submissions") | ||||
members = category.get_members(limit=number + len(self.ignore_list)) | members = category.get_members(limit=number + len(self.ignore_list)) | ||||
urls = [member.url for member in members if member.title not in self.ignore_list] | |||||
urls = [member.url.encode("utf8") for member in members if member.title not in self.ignore_list] | |||||
pages = ", ".join(urls[:number]) | pages = ", ".join(urls[:number]) | ||||
self.reply(data, "{0} pending AfC subs: {1}".format(number, pages)) | self.reply(data, "{0} pending AfC subs: {1}".format(number, pages)) |
@@ -0,0 +1,149 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import re | |||||
from earwigbot import exceptions | |||||
from earwigbot.commands import Command | |||||
class Dictionary(Command): | |||||
"""Define words and stuff.""" | |||||
name = "dictionary" | |||||
commands = ["dict", "dictionary", "define"] | |||||
def process(self, data): | |||||
if not data.args: | |||||
self.reply(data, "what do you want me to define?") | |||||
return | |||||
term = " ".join(data.args) | |||||
lang = self.bot.wiki.get_site().lang | |||||
try: | |||||
defined = self.define(term, lang) | |||||
except exceptions.APIError: | |||||
msg = "cannot find a {0}-language Wiktionary." | |||||
self.reply(data, msg.format(lang)) | |||||
else: | |||||
self.reply(data, defined.encode("utf8")) | |||||
def define(self, term, lang): | |||||
try: | |||||
site = self.bot.wiki.get_site(project="wiktionary", lang=lang) | |||||
except exceptions.SiteNotFoundError: | |||||
site = self.bot.wiki.add_site(project="wiktionary", lang=lang) | |||||
page = site.get_page(term) | |||||
try: | |||||
entry = page.get() | |||||
except (exceptions.PageNotFoundError, exceptions.InvalidPageError): | |||||
return "no definition found." | |||||
languages = self.get_languages(entry) | |||||
if not languages: | |||||
return u"couldn't parse {0}!".format(page.url) | |||||
result = [] | |||||
for lang, section in sorted(languages.items()): | |||||
this = u"({0}) {1}".format(lang, self.get_definition(section)) | |||||
result.append(this) | |||||
return u"; ".join(result) | |||||
def get_languages(self, entry): | |||||
regex = r"(?:\A|\n)==\s*([a-zA-Z0-9_ ]*?)\s*==(?:\Z|\n)" | |||||
split = re.split(regex, entry) | |||||
if len(split) % 2 == 0: | |||||
return None | |||||
split.pop(0) | |||||
languages = {} | |||||
for i in xrange(0, len(split), 2): | |||||
languages[split[i]] = split[i + 1] | |||||
return languages | |||||
def get_definition(self, section): | |||||
parts_of_speech = { | |||||
"v.": "Verb", | |||||
"n.": "Noun", | |||||
"pron.": "Pronoun", | |||||
"adj.": "Adjective", | |||||
"adv.": "Adverb", | |||||
"prep.": "Preposition", | |||||
"conj.": "Conjunction", | |||||
"inter.": "Interjection", | |||||
"symbol": "Symbol", | |||||
"suffix": "Suffix", | |||||
"initialism": "Initialism", | |||||
"phrase": "Phrase", | |||||
"proverb": "Proverb", | |||||
} | |||||
defs = [] | |||||
for part, fullname in parts_of_speech.iteritems(): | |||||
if re.search("===\s*" + fullname + "\s*===", section): | |||||
regex = "===\s*" + fullname + "\s*===(.*?)(?:(?:===)|\Z)" | |||||
body = re.findall(regex, section, re.DOTALL) | |||||
if body: | |||||
definition = self.parse_body(body[0]) | |||||
if definition: | |||||
defs.append("\x02{0}\x0F {1}".format(part, definition)) | |||||
return "; ".join(defs) | |||||
def parse_body(self, body): | |||||
senses = [] | |||||
for line in body.splitlines(): | |||||
line = line.strip() | |||||
if re.match("#\s*[^:*]", line): | |||||
line = re.sub("\[\[(.*?)\|(.*?)\]\]", r"\2", line) | |||||
line = self.strip_templates(line) | |||||
line = line[1:].replace("'''", "").replace("''", "") | |||||
line = line.replace("[[", "").replace("]]", "") | |||||
senses.append(line.strip()) | |||||
if not senses: | |||||
return None | |||||
if len(senses) == 1: | |||||
return senses[0] | |||||
result = [] # Number the senses incrementally | |||||
for i, sense in enumerate(senses): | |||||
result.append(u"{0}. {1}".format(i + 1, sense)) | |||||
return " ".join(result) | |||||
def strip_templates(self, line): | |||||
line = list(line) | |||||
stripped = "" | |||||
depth = 0 | |||||
while line: | |||||
this = line.pop(0) | |||||
if line: | |||||
next = line[0] | |||||
else: | |||||
next = "" | |||||
if this == "{" and next == "{": | |||||
line.pop(0) | |||||
depth += 1 | |||||
elif this == "}" and next == "}": | |||||
line.pop(0) | |||||
depth -= 1 | |||||
elif depth == 0: | |||||
stripped += this | |||||
return stripped |
@@ -35,15 +35,15 @@ class Link(Command): | |||||
if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", msg): | if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", msg): | ||||
links = self.parse_line(msg) | links = self.parse_line(msg) | ||||
links = " , ".join(links) | |||||
self.reply(data, links) | |||||
links = u" , ".join(links) | |||||
self.reply(data, links.encode("utf8")) | |||||
elif data.command == "link": | elif data.command == "link": | ||||
if not data.args: | if not data.args: | ||||
self.reply(data, "what do you want me to link to?") | self.reply(data, "what do you want me to link to?") | ||||
return | return | ||||
pagename = " ".join(data.args) | pagename = " ".join(data.args) | ||||
link = self.site.get_page(pagename).url | |||||
link = self.site.get_page(pagename).url.encode("utf8") | |||||
self.reply(data, link) | self.reply(data, link) | ||||
def parse_line(self, line): | def parse_line(self, line): | ||||
@@ -68,5 +68,4 @@ class Link(Command): | |||||
return results | return results | ||||
def parse_template(self, pagename): | def parse_template(self, pagename): | ||||
pagename = "".join(("Template:", pagename)) | |||||
return self.site.get_page(pagename).url | |||||
return self.site.get_page("Template:" + pagename).url |
@@ -274,7 +274,8 @@ class BotConfig(object): | |||||
key = getpass("Enter key to decrypt bot passwords: ") | key = getpass("Enter key to decrypt bot passwords: ") | ||||
self._decryption_cipher = Blowfish.new(sha256(key).digest()) | self._decryption_cipher = Blowfish.new(sha256(key).digest()) | ||||
signature = self.metadata["signature"] | signature = self.metadata["signature"] | ||||
assert bcrypt.hashpw(key, signature) == signature | |||||
if bcrypt.hashpw(key, signature) != signature: | |||||
raise RuntimeError("Incorrect password.") | |||||
for node, nodes in self._decryptable_nodes: | for node, nodes in self._decryptable_nodes: | ||||
self._decrypt(node, nodes) | self._decrypt(node, nodes) | ||||
@@ -78,7 +78,7 @@ class _ResourceManager(object): | |||||
try: | try: | ||||
resource = klass(self.bot) # Create instance of resource | resource = klass(self.bot) # Create instance of resource | ||||
except Exception: | except Exception: | ||||
e = "Error instantiating {0} class in {1} (from {2})" | |||||
e = "Error instantiating {0} class in '{1}' (from {2})" | |||||
self.logger.exception(e.format(res_type, name, path)) | self.logger.exception(e.format(res_type, name, path)) | ||||
else: | else: | ||||
self._resources[resource.name] = resource | self._resources[resource.name] = resource | ||||
@@ -98,7 +98,7 @@ class _ResourceManager(object): | |||||
try: | try: | ||||
module = imp.load_module(name, f, path, desc) | module = imp.load_module(name, f, path, desc) | ||||
except Exception: | except Exception: | ||||
e = "Couldn't load module {0} (from {1})" | |||||
e = "Couldn't load module '{0}' (from {1})" | |||||
self.logger.exception(e.format(name, path)) | self.logger.exception(e.format(name, path)) | ||||
return | return | ||||
finally: | finally: | ||||
@@ -269,7 +269,8 @@ class AFCStatistics(Task): | |||||
tracked = [i[0] for i in cursor.fetchall()] | tracked = [i[0] for i in cursor.fetchall()] | ||||
category = self.site.get_category(self.pending_cat) | category = self.site.get_category(self.pending_cat) | ||||
for title, pageid in category.get_members(): | |||||
for page in category.get_members(): | |||||
title, pageid = page.title, page.pageid | |||||
if title in self.ignore_list: | if title in self.ignore_list: | ||||
continue | continue | ||||
if pageid not in tracked: | if pageid not in tracked: | ||||
@@ -513,9 +513,9 @@ class Page(CopyvioMixIn): | |||||
return self._fullurl | return self._fullurl | ||||
else: | else: | ||||
encoded = self._title.encode("utf8").replace(" ", "_") | encoded = self._title.encode("utf8").replace(" ", "_") | ||||
slug = quote(encoded, safe="/:") | |||||
slug = quote(encoded, safe="/:").decode("utf8") | |||||
path = self.site._article_path.replace("$1", slug) | path = self.site._article_path.replace("$1", slug) | ||||
return ''.join((self.site.url, path)) | |||||
return u"".join((self.site.url, path)) | |||||
@property | @property | ||||
def namespace(self): | def namespace(self): | ||||
@@ -131,13 +131,19 @@ class Site(object): | |||||
self._api_info_cache = {"maxlag": 0, "lastcheck": 0} | self._api_info_cache = {"maxlag": 0, "lastcheck": 0} | ||||
# Attributes used for SQL queries: | # Attributes used for SQL queries: | ||||
self._sql_data = sql | |||||
if sql: | |||||
self._sql_data = sql | |||||
else: | |||||
self._sql_data = {} | |||||
self._sql_conn = None | self._sql_conn = None | ||||
self._sql_lock = Lock() | self._sql_lock = Lock() | ||||
self._sql_info_cache = {"replag": 0, "lastcheck": 0, "usable": None} | self._sql_info_cache = {"replag": 0, "lastcheck": 0, "usable": None} | ||||
# Attribute used in copyright violation checks (see CopyrightMixIn): | # Attribute used in copyright violation checks (see CopyrightMixIn): | ||||
self._search_config = search_config | |||||
if search_config: | |||||
self._search_config = search_config | |||||
else: | |||||
self._search_config = {} | |||||
# Set up cookiejar and URL opener for making API queries: | # Set up cookiejar and URL opener for making API queries: | ||||
if cookiejar: | if cookiejar: | ||||
@@ -150,9 +156,6 @@ class Site(object): | |||||
self._opener.addheaders = [("User-Agent", user_agent), | self._opener.addheaders = [("User-Agent", user_agent), | ||||
("Accept-Encoding", "gzip")] | ("Accept-Encoding", "gzip")] | ||||
# Get all of the above attributes that were not specified as arguments: | |||||
self._load_attributes() | |||||
# Set up our internal logger: | # Set up our internal logger: | ||||
if logger: | if logger: | ||||
self._logger = logger | self._logger = logger | ||||
@@ -160,6 +163,9 @@ class Site(object): | |||||
self._logger = getLogger("earwigbot.wiki") | self._logger = getLogger("earwigbot.wiki") | ||||
self._logger.addHandler(NullHandler()) | self._logger.addHandler(NullHandler()) | ||||
# Get all of the above attributes that were not specified as arguments: | |||||
self._load_attributes() | |||||
# If we have a name/pass and the API says we're not logged in, log in: | # If we have a name/pass and the API says we're not logged in, log in: | ||||
self._login_info = name, password = login | self._login_info = name, password = login | ||||
if name and password: | if name and password: | ||||
@@ -278,6 +278,7 @@ class SitesDB(object): | |||||
else: | else: | ||||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | ||||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | ||||
self._logger.info("Removed site '{0}'".format(name)) | |||||
return True | return True | ||||
def get_site(self, name=None, project=None, lang=None): | def get_site(self, name=None, project=None, lang=None): | ||||
@@ -376,34 +377,20 @@ class SitesDB(object): | |||||
assert_edit = config.wiki.get("assert") | assert_edit = config.wiki.get("assert") | ||||
maxlag = config.wiki.get("maxlag") | maxlag = config.wiki.get("maxlag") | ||||
wait_between_queries = config.wiki.get("waitTime", 3) | wait_between_queries = config.wiki.get("waitTime", 3) | ||||
logger = self._logger.getChild(name) | |||||
search_config = config.wiki.get("search") | |||||
if user_agent: | if user_agent: | ||||
user_agent = user_agent.replace("$1", __version__) | user_agent = user_agent.replace("$1", __version__) | ||||
user_agent = user_agent.replace("$2", python_version()) | user_agent = user_agent.replace("$2", python_version()) | ||||
if search_config: | |||||
nltk_dir = path.join(self.config.root_dir, ".nltk") | |||||
search_config["nltk_dir"] = nltk_dir | |||||
search_config["exclusions_db"] = self._exclusions_db | |||||
if not sql: | |||||
sql = config.wiki.get("sql", {}) | |||||
for key, value in sql.iteritems(): | |||||
if "$1" in value: | |||||
sql[key] = value.replace("$1", name) | |||||
# Create a Site object to log in and load the other attributes: | # Create a Site object to log in and load the other attributes: | ||||
site = Site(base_url=base_url, script_path=script_path, sql=sql, | site = Site(base_url=base_url, script_path=script_path, sql=sql, | ||||
login=login, cookiejar=cookiejar, user_agent=user_agent, | login=login, cookiejar=cookiejar, user_agent=user_agent, | ||||
use_https=use_https, assert_edit=assert_edit, | use_https=use_https, assert_edit=assert_edit, | ||||
maxlag=maxlag, wait_between_queries=wait_between_queries, | |||||
logger=logger, search_config=search_config) | |||||
maxlag=maxlag, wait_between_queries=wait_between_queries) | |||||
self._logger.info("Added site '{0}'".format(site.name)) | |||||
self._add_site_to_sitesdb(site) | self._add_site_to_sitesdb(site) | ||||
self._sites[site.name] = site | |||||
return site | |||||
return self._get_site_object(site.name) | |||||
def remove_site(self, name=None, project=None, lang=None): | def remove_site(self, name=None, project=None, lang=None): | ||||
"""Remove a site from the sitesdb. | """Remove a site from the sitesdb. | ||||