Explorar el Código

Basic SQL querying support with oursql.

tags/v0.1^2
Ben Kurtovic hace 12 años
padre
commit
3205fa3215
Se han modificado 4 ficheros con 76 adiciones y 16 borrados
  1. +9
    -0
      README.md
  2. +3
    -0
      bot/wiki/exceptions.py
  3. +12
    -13
      bot/wiki/functions.py
  4. +52
    -3
      bot/wiki/site.py

+ 9
- 0
README.md Ver fichero

@@ -19,3 +19,12 @@ made over 45,000 edits.
A project to rewrite it from scratch began in early April 2011, thus moving
away from the Pywikipedia framework and allowing for less overall code, better
integration between bot parts, and easier maintenance.

# Installation

## Dependencies

EarwigBot uses the MySQL library
[oursql](http://packages.python.org/oursql/) (>= 0.9.2) for communicating with
MediaWiki databases, and some tasks use their own tables for storage. It is not
required.

+ 3
- 0
bot/wiki/exceptions.py Ver fichero

@@ -64,3 +64,6 @@ class SpamDetectedError(EditError):

class FilteredError(EditError):
"""The edit filter refused our edit."""

class SQLError(WikiToolsetError):
"""Some error involving SQL querying occurred."""

+ 12
- 13
bot/wiki/functions.py Ver fichero

@@ -32,7 +32,7 @@ def _load_config():
earwigbot.py or core/main.py will already call these functions.
"""
is_encrypted = config.load()
if is_encrypted: # passwords in the config file are encrypted
if is_encrypted: # Passwords in the config file are encrypted
key = getpass("Enter key to unencrypt bot passwords: ")
config.decrypt(key)

@@ -60,12 +60,11 @@ def _get_cookiejar():
try:
_cookiejar.load()
except LoadError:
# file contains bad data, so ignore it completely
pass
pass # File contains bad data, so ignore it completely
except IOError as e:
if e.errno == errno.ENOENT: # "No such file or directory"
# create the file and restrict reading/writing only to the owner,
# so others can't peak at our cookies
# Create the file and restrict reading/writing only to the owner,
# so others can't peak at our cookies:
open(cookie_file, "w").close()
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR)
else:
@@ -82,7 +81,7 @@ def _get_site_object_from_dict(name, d):
base_url = d.get("baseURL")
article_path = d.get("articlePath")
script_path = d.get("scriptPath")
sql = (d.get("sqlServer"), d.get("sqlDB"))
sql = d.get("sql", {})
namespaces = d.get("namespaces", {})
login = (config.wiki.get("username"), config.wiki.get("password"))
cookiejar = _get_cookiejar()
@@ -129,18 +128,18 @@ def get_site(name=None, project=None, lang=None):
then `project` and `lang`. If, with any number of args, a site cannot be
found in the config, SiteNotFoundError is raised.
"""
# check if config has been loaded, and load it if it hasn't
# Check if config has been loaded, and load it if it hasn't:
if not config.is_loaded():
_load_config()

# someone specified a project without a lang (or a lang without a project)!
# Someone specified a project without a lang (or a lang without a project)!
if (project is None and lang is not None) or (project is not None and
lang is None):
e = "Keyword arguments 'lang' and 'project' must be specified together."
raise TypeError(e)

# no args given, so return our default site (project is None implies lang
# is None, so we don't need to add that in)
# No args given, so return our default site (project is None implies lang
# is None, so we don't need to add that in):
if name is None and project is None:
try:
default = config.wiki["defaultSite"]
@@ -154,12 +153,12 @@ def get_site(name=None, project=None, lang=None):
raise SiteNotFoundError(e)
return _get_site_object_from_dict(default, site)

# name arg given, but don't look at others unless `name` isn't found
# Name arg given, but don't look at others unless `name` isn't found:
if name is not None:
try:
site = config.wiki["sites"][name]
except KeyError:
if project is None: # implies lang is None, so only name was given
if project is None: # Implies lang is None, so only name was given
e = "Site '{0}' not found in config.".format(name)
raise SiteNotFoundError(e)
for sitename, site in config.wiki["sites"].items():
@@ -171,7 +170,7 @@ def get_site(name=None, project=None, lang=None):
else:
return _get_site_object_from_dict(name, site)

# if we end up here, then project and lang are both not None
# If we end up here, then project and lang are both not None:
for sitename, site in config.wiki["sites"].items():
if site["project"] == project and site["lang"] == lang:
return _get_site_object_from_dict(sitename, site)


+ 52
- 3
bot/wiki/site.py Ver fichero

@@ -10,6 +10,11 @@ from urllib import unquote_plus, urlencode
from urllib2 import build_opener, HTTPCookieProcessor, URLError
from urlparse import urlparse

try:
from oursql import connect
except ImportError:
connect = None

from wiki.category import Category
from wiki.constants import *
from wiki.exceptions import *
@@ -40,7 +45,7 @@ class Site(object):
"""

def __init__(self, name=None, project=None, lang=None, base_url=None,
article_path=None, script_path=None, sql=(None, None),
article_path=None, script_path=None, sql=None,
namespaces=None, login=(None, None), cookiejar=None,
user_agent=None, assert_edit=None, maxlag=None):
"""Constructor for new Site instances.
@@ -67,14 +72,17 @@ class Site(object):
self._base_url = base_url
self._article_path = article_path
self._script_path = script_path
self._sql = sql
self._namespaces = namespaces

# Attributes used when querying the API:
# Attributes used for API queries:
self._assert_edit = assert_edit
self._maxlag = maxlag
self._max_retries = 5

# Attributes used for SQL queries:
self._sql_data = sql
self._sql_conn = None

# Set up cookiejar and URL opener for making API queries:
if cookiejar is not None:
self._cookiejar = cookiejar
@@ -416,6 +424,24 @@ class Site(object):
self._cookiejar.clear()
self._save_cookiejar()

def _sql_connect(self, **kwargs):
"""Attempt to establish a connection with this site's SQL database.
Will raise SQLError() if the module "oursql" is not available.
"""
if not connect:
e = "Module 'oursql' is required for SQL queries."
raise SQLError(e)

args = self._sql_data
for key, value in kwargs.iteritems():
args[key] = value

if "read_default_file" not in args and "user" not in args and "passwd" not in args:
args["read_default_file"] = "~/.my.cnf"

self._sql_conn = connect(**args)

def api_query(self, **kwargs):
"""Do an API query with `kwargs` as the parameters.

@@ -423,6 +449,29 @@ class Site(object):
"""
return self._api_query(kwargs)

def sql_query(self, query, params=(), plain_query=False, cursor_class=None,
show_table=False):
"""Do an SQL query and yield its results.

For example:
>>> query = "SELECT user_name, user_registration FROM user WHERE user_name IN (?, ?)"
>>> for row in site.sql_query(query, ("EarwigBot", "The Earwig")):
... print row
('EarwigBot', '20090428220032')
('The Earwig', '20080703215134')

May raise SQLError() or one of oursql's exceptions
(oursql.ProgrammingError, oursql.InterfaceError, ...) if there were
problems with the query.
"""
if not self._sql_conn:
self._sql_connect()

with self._sql_conn.cursor(cursor_class, show_table=show_table) as cur:
cur.execute(query, params, plain_query)
for result in cur:
yield result

def name(self):
"""Returns the Site's name (or "wikiid" in the API), like "enwiki"."""
return self._name


Cargando…
Cancelar
Guardar