@@ -74,12 +74,13 @@ class AFCReport(Command): | |||
return page | |||
def report(self, page): | |||
url = page.url.replace("en.wikipedia.org/wiki", "enwp.org") | |||
url = page.url.encode("utf8") | |||
url = url.replace("en.wikipedia.org/wiki", "enwp.org") | |||
short = self.statistics.get_short_title(page.title) | |||
status = self.get_status(page) | |||
user = page.get_creator() | |||
user_name = user.name | |||
user_url = user.get_talkpage().url | |||
user_url = user.get_talkpage().url.encode("utf8") | |||
msg1 = "AfC submission report for \x0302{0}\x0F ({1}):" | |||
msg2 = "Status: \x0303{0}\x0F" | |||
@@ -54,6 +54,6 @@ class AFCSubmissions(Command): | |||
site = self.bot.wiki.get_site() | |||
category = site.get_category("Pending AfC submissions") | |||
members = category.get_members(limit=number + len(self.ignore_list)) | |||
urls = [member.url for member in members if member.title not in self.ignore_list] | |||
urls = [member.url.encode("utf8") for member in members if member.title not in self.ignore_list] | |||
pages = ", ".join(urls[:number]) | |||
self.reply(data, "{0} pending AfC subs: {1}".format(number, pages)) |
@@ -0,0 +1,149 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import re | |||
from earwigbot import exceptions | |||
from earwigbot.commands import Command | |||
class Dictionary(Command): | |||
"""Define words and stuff.""" | |||
name = "dictionary" | |||
commands = ["dict", "dictionary", "define"] | |||
def process(self, data): | |||
if not data.args: | |||
self.reply(data, "what do you want me to define?") | |||
return | |||
term = " ".join(data.args) | |||
lang = self.bot.wiki.get_site().lang | |||
try: | |||
defined = self.define(term, lang) | |||
except exceptions.APIError: | |||
msg = "cannot find a {0}-language Wiktionary." | |||
self.reply(data, msg.format(lang)) | |||
else: | |||
self.reply(data, defined.encode("utf8")) | |||
def define(self, term, lang): | |||
try: | |||
site = self.bot.wiki.get_site(project="wiktionary", lang=lang) | |||
except exceptions.SiteNotFoundError: | |||
site = self.bot.wiki.add_site(project="wiktionary", lang=lang) | |||
page = site.get_page(term) | |||
try: | |||
entry = page.get() | |||
except (exceptions.PageNotFoundError, exceptions.InvalidPageError): | |||
return "no definition found." | |||
languages = self.get_languages(entry) | |||
if not languages: | |||
return u"couldn't parse {0}!".format(page.url) | |||
result = [] | |||
for lang, section in sorted(languages.items()): | |||
this = u"({0}) {1}".format(lang, self.get_definition(section)) | |||
result.append(this) | |||
return u"; ".join(result) | |||
def get_languages(self, entry): | |||
regex = r"(?:\A|\n)==\s*([a-zA-Z0-9_ ]*?)\s*==(?:\Z|\n)" | |||
split = re.split(regex, entry) | |||
if len(split) % 2 == 0: | |||
return None | |||
split.pop(0) | |||
languages = {} | |||
for i in xrange(0, len(split), 2): | |||
languages[split[i]] = split[i + 1] | |||
return languages | |||
def get_definition(self, section): | |||
parts_of_speech = { | |||
"v.": "Verb", | |||
"n.": "Noun", | |||
"pron.": "Pronoun", | |||
"adj.": "Adjective", | |||
"adv.": "Adverb", | |||
"prep.": "Preposition", | |||
"conj.": "Conjunction", | |||
"inter.": "Interjection", | |||
"symbol": "Symbol", | |||
"suffix": "Suffix", | |||
"initialism": "Initialism", | |||
"phrase": "Phrase", | |||
"proverb": "Proverb", | |||
} | |||
defs = [] | |||
for part, fullname in parts_of_speech.iteritems(): | |||
if re.search("===\s*" + fullname + "\s*===", section): | |||
regex = "===\s*" + fullname + "\s*===(.*?)(?:(?:===)|\Z)" | |||
body = re.findall(regex, section, re.DOTALL) | |||
if body: | |||
definition = self.parse_body(body[0]) | |||
if definition: | |||
defs.append("\x02{0}\x0F {1}".format(part, definition)) | |||
return "; ".join(defs) | |||
def parse_body(self, body): | |||
senses = [] | |||
for line in body.splitlines(): | |||
line = line.strip() | |||
if re.match("#\s*[^:*]", line): | |||
line = re.sub("\[\[(.*?)\|(.*?)\]\]", r"\2", line) | |||
line = self.strip_templates(line) | |||
line = line[1:].replace("'''", "").replace("''", "") | |||
line = line.replace("[[", "").replace("]]", "") | |||
senses.append(line.strip()) | |||
if not senses: | |||
return None | |||
if len(senses) == 1: | |||
return senses[0] | |||
result = [] # Number the senses incrementally | |||
for i, sense in enumerate(senses): | |||
result.append(u"{0}. {1}".format(i + 1, sense)) | |||
return " ".join(result) | |||
def strip_templates(self, line): | |||
line = list(line) | |||
stripped = "" | |||
depth = 0 | |||
while line: | |||
this = line.pop(0) | |||
if line: | |||
next = line[0] | |||
else: | |||
next = "" | |||
if this == "{" and next == "{": | |||
line.pop(0) | |||
depth += 1 | |||
elif this == "}" and next == "}": | |||
line.pop(0) | |||
depth -= 1 | |||
elif depth == 0: | |||
stripped += this | |||
return stripped |
@@ -35,15 +35,15 @@ class Link(Command): | |||
if re.search("(\[\[(.*?)\]\])|(\{\{(.*?)\}\})", msg): | |||
links = self.parse_line(msg) | |||
links = " , ".join(links) | |||
self.reply(data, links) | |||
links = u" , ".join(links) | |||
self.reply(data, links.encode("utf8")) | |||
elif data.command == "link": | |||
if not data.args: | |||
self.reply(data, "what do you want me to link to?") | |||
return | |||
pagename = " ".join(data.args) | |||
link = self.site.get_page(pagename).url | |||
link = self.site.get_page(pagename).url.encode("utf8") | |||
self.reply(data, link) | |||
def parse_line(self, line): | |||
@@ -68,5 +68,4 @@ class Link(Command): | |||
return results | |||
def parse_template(self, pagename): | |||
pagename = "".join(("Template:", pagename)) | |||
return self.site.get_page(pagename).url | |||
return self.site.get_page("Template:" + pagename).url |
@@ -274,7 +274,8 @@ class BotConfig(object): | |||
key = getpass("Enter key to decrypt bot passwords: ") | |||
self._decryption_cipher = Blowfish.new(sha256(key).digest()) | |||
signature = self.metadata["signature"] | |||
assert bcrypt.hashpw(key, signature) == signature | |||
if bcrypt.hashpw(key, signature) != signature: | |||
raise RuntimeError("Incorrect password.") | |||
for node, nodes in self._decryptable_nodes: | |||
self._decrypt(node, nodes) | |||
@@ -78,7 +78,7 @@ class _ResourceManager(object): | |||
try: | |||
resource = klass(self.bot) # Create instance of resource | |||
except Exception: | |||
e = "Error instantiating {0} class in {1} (from {2})" | |||
e = "Error instantiating {0} class in '{1}' (from {2})" | |||
self.logger.exception(e.format(res_type, name, path)) | |||
else: | |||
self._resources[resource.name] = resource | |||
@@ -98,7 +98,7 @@ class _ResourceManager(object): | |||
try: | |||
module = imp.load_module(name, f, path, desc) | |||
except Exception: | |||
e = "Couldn't load module {0} (from {1})" | |||
e = "Couldn't load module '{0}' (from {1})" | |||
self.logger.exception(e.format(name, path)) | |||
return | |||
finally: | |||
@@ -269,7 +269,8 @@ class AFCStatistics(Task): | |||
tracked = [i[0] for i in cursor.fetchall()] | |||
category = self.site.get_category(self.pending_cat) | |||
for title, pageid in category.get_members(): | |||
for page in category.get_members(): | |||
title, pageid = page.title, page.pageid | |||
if title in self.ignore_list: | |||
continue | |||
if pageid not in tracked: | |||
@@ -513,9 +513,9 @@ class Page(CopyvioMixIn): | |||
return self._fullurl | |||
else: | |||
encoded = self._title.encode("utf8").replace(" ", "_") | |||
slug = quote(encoded, safe="/:") | |||
slug = quote(encoded, safe="/:").decode("utf8") | |||
path = self.site._article_path.replace("$1", slug) | |||
return ''.join((self.site.url, path)) | |||
return u"".join((self.site.url, path)) | |||
@property | |||
def namespace(self): | |||
@@ -131,13 +131,19 @@ class Site(object): | |||
self._api_info_cache = {"maxlag": 0, "lastcheck": 0} | |||
# Attributes used for SQL queries: | |||
self._sql_data = sql | |||
if sql: | |||
self._sql_data = sql | |||
else: | |||
self._sql_data = {} | |||
self._sql_conn = None | |||
self._sql_lock = Lock() | |||
self._sql_info_cache = {"replag": 0, "lastcheck": 0, "usable": None} | |||
# Attribute used in copyright violation checks (see CopyrightMixIn): | |||
self._search_config = search_config | |||
if search_config: | |||
self._search_config = search_config | |||
else: | |||
self._search_config = {} | |||
# Set up cookiejar and URL opener for making API queries: | |||
if cookiejar: | |||
@@ -150,9 +156,6 @@ class Site(object): | |||
self._opener.addheaders = [("User-Agent", user_agent), | |||
("Accept-Encoding", "gzip")] | |||
# Get all of the above attributes that were not specified as arguments: | |||
self._load_attributes() | |||
# Set up our internal logger: | |||
if logger: | |||
self._logger = logger | |||
@@ -160,6 +163,9 @@ class Site(object): | |||
self._logger = getLogger("earwigbot.wiki") | |||
self._logger.addHandler(NullHandler()) | |||
# Get all of the above attributes that were not specified as arguments: | |||
self._load_attributes() | |||
# If we have a name/pass and the API says we're not logged in, log in: | |||
self._login_info = name, password = login | |||
if name and password: | |||
@@ -278,6 +278,7 @@ class SitesDB(object): | |||
else: | |||
conn.execute("DELETE FROM sql_data WHERE sql_site = ?", (name,)) | |||
conn.execute("DELETE FROM namespaces WHERE ns_site = ?", (name,)) | |||
self._logger.info("Removed site '{0}'".format(name)) | |||
return True | |||
def get_site(self, name=None, project=None, lang=None): | |||
@@ -376,34 +377,20 @@ class SitesDB(object): | |||
assert_edit = config.wiki.get("assert") | |||
maxlag = config.wiki.get("maxlag") | |||
wait_between_queries = config.wiki.get("waitTime", 3) | |||
logger = self._logger.getChild(name) | |||
search_config = config.wiki.get("search") | |||
if user_agent: | |||
user_agent = user_agent.replace("$1", __version__) | |||
user_agent = user_agent.replace("$2", python_version()) | |||
if search_config: | |||
nltk_dir = path.join(self.config.root_dir, ".nltk") | |||
search_config["nltk_dir"] = nltk_dir | |||
search_config["exclusions_db"] = self._exclusions_db | |||
if not sql: | |||
sql = config.wiki.get("sql", {}) | |||
for key, value in sql.iteritems(): | |||
if "$1" in value: | |||
sql[key] = value.replace("$1", name) | |||
# Create a Site object to log in and load the other attributes: | |||
site = Site(base_url=base_url, script_path=script_path, sql=sql, | |||
login=login, cookiejar=cookiejar, user_agent=user_agent, | |||
use_https=use_https, assert_edit=assert_edit, | |||
maxlag=maxlag, wait_between_queries=wait_between_queries, | |||
logger=logger, search_config=search_config) | |||
maxlag=maxlag, wait_between_queries=wait_between_queries) | |||
self._logger.info("Added site '{0}'".format(site.name)) | |||
self._add_site_to_sitesdb(site) | |||
self._sites[site.name] = site | |||
return site | |||
return self._get_site_object(site.name) | |||
def remove_site(self, name=None, project=None, lang=None): | |||
"""Remove a site from the sitesdb. | |||