Bladeren bron

Merge branch 'feature/properties' into develop

tags/v0.1^2
Ben Kurtovic 12 jaren geleden
bovenliggende
commit
93c85b8f01
17 gewijzigde bestanden met toevoegingen van 583 en 498 verwijderingen
  1. +0
    -8
      docs/api/earwigbot.wiki.rst
  2. +4
    -2
      docs/toolset.rst
  3. +7
    -7
      earwigbot/commands/afc_report.py
  4. +2
    -2
      earwigbot/commands/editcount.py
  5. +4
    -5
      earwigbot/commands/registration.py
  6. +1
    -1
      earwigbot/commands/rights.py
  7. +1
    -1
      earwigbot/tasks/__init__.py
  8. +3
    -3
      earwigbot/tasks/afc_copyvios.py
  9. +5
    -5
      earwigbot/tasks/afc_history.py
  10. +4
    -4
      earwigbot/tasks/afc_statistics.py
  11. +17
    -9
      earwigbot/wiki/__init__.py
  12. +24
    -19
      earwigbot/wiki/category.py
  13. +7
    -6
      earwigbot/wiki/constants.py
  14. +195
    -173
      earwigbot/wiki/page.py
  15. +150
    -119
      earwigbot/wiki/site.py
  16. +55
    -47
      earwigbot/wiki/sitesdb.py
  17. +104
    -87
      earwigbot/wiki/user.py

+ 0
- 8
docs/api/earwigbot.wiki.rst Bestand weergeven

@@ -7,7 +7,6 @@ wiki Package
.. automodule:: earwigbot.wiki
:members:
:undoc-members:
:show-inheritance:

:mod:`category` Module
----------------------
@@ -15,7 +14,6 @@ wiki Package
.. automodule:: earwigbot.wiki.category
:members:
:undoc-members:
:show-inheritance:

:mod:`constants` Module
-----------------------
@@ -23,7 +21,6 @@ wiki Package
.. automodule:: earwigbot.wiki.constants
:members:
:undoc-members:
:show-inheritance:

:mod:`copyright` Module
-----------------------
@@ -31,7 +28,6 @@ wiki Package
.. automodule:: earwigbot.wiki.copyright
:members:
:undoc-members:
:show-inheritance:

:mod:`page` Module
------------------
@@ -47,7 +43,6 @@ wiki Package
.. automodule:: earwigbot.wiki.site
:members:
:undoc-members:
:show-inheritance:

:mod:`sitesdb` Module
---------------------
@@ -55,7 +50,6 @@ wiki Package
.. automodule:: earwigbot.wiki.sitesdb
:members:
:undoc-members:
:show-inheritance:

:mod:`user` Module
------------------
@@ -63,5 +57,3 @@ wiki Package
.. automodule:: earwigbot.wiki.user
:members:
:undoc-members:
:show-inheritance:


+ 4
- 2
docs/toolset.rst Bestand weergeven

@@ -117,6 +117,8 @@ Create :py:class:`earwigbot.wiki.Page <earwigbot.wiki.page.Page>` objects with
:py:meth:`user.get_talkpage() <earwigbot.wiki.user.User.get_talkpage>`. They
provide the following attributes:

- :py:attr:`~earwigbot.wiki.page.Page.site`: the page's corresponding
:py:class:`~earwigbot.wiki.site.Site` object
- :py:attr:`~earwigbot.wiki.page.Page.title`: the page's title, or pagename
- :py:attr:`~earwigbot.wiki.page.Page.exists`: whether the page exists
- :py:attr:`~earwigbot.wiki.page.Page.pageid`: an integer ID representing the
@@ -133,7 +135,7 @@ provide the following attributes:

and the following methods:

- :py:meth:`~earwigbot.wiki.page.Page.reload`: forcibly reload the page's
- :py:meth:`~earwigbot.wiki.page.Page.reload`: forcibly reloads the page's
attributes (emphasis on *reload* - this is only necessary if there is reason
to believe they have changed)
- :py:meth:`toggle_talk(...) <earwigbot.wiki.page.Page.toggle_talk>`: returns a
@@ -198,7 +200,7 @@ provide the following attributes:

and the following methods:

- :py:meth:`~earwigbot.wiki.user.User.reload`: forcibly reload the user's
- :py:meth:`~earwigbot.wiki.user.User.reload`: forcibly reloads the user's
attributes (emphasis on *reload* - this is only necessary if there is reason
to believe they have changed)
- :py:meth:`~earwigbot.wiki.user.User.get_userpage`: returns a


+ 7
- 7
earwigbot/commands/afc_report.py Bestand weergeven

@@ -70,16 +70,16 @@ class Command(BaseCommand):

def get_page(self, title):
page = self.site.get_page(title, follow_redirects=False)
if page.exists()[0]:
if page.exists[0]:
return page

def report(self, page):
url = page.url().replace("en.wikipedia.org/wiki", "enwp.org")
short = self.statistics.get_short_title(page.title())
url = page.url.replace("en.wikipedia.org/wiki", "enwp.org")
short = self.statistics.get_short_title(page.title)
status = self.get_status(page)
user = self.site.get_user(page.creator())
user_name = user.name()
user_url = user.get_talkpage().url()
user_name = user.name
user_url = user.get_talkpage().url

msg1 = "AfC submission report for \x0302{0}\x0301 ({1}):"
msg2 = "Status: \x0303{0}\x0301"
@@ -92,9 +92,9 @@ class Command(BaseCommand):
self.say(self.data.chan, msg3.format(user_name, user_url))

def get_status(self, page):
if page.is_redirect():
if page.is_redirect:
target = page.get_redirect_target()
if self.site.get_page(target).namespace() == wiki.NS_MAIN:
if self.site.get_page(target).namespace == wiki.NS_MAIN:
return "accepted"
return "redirect"



+ 2
- 2
earwigbot/commands/editcount.py Bestand weergeven

@@ -45,13 +45,13 @@ class Command(BaseCommand):
user = site.get_user(name)

try:
count = user.editcount()
count = user.editcount
except wiki.UserNotFoundError:
msg = "the user \x0302{0}\x0301 does not exist."
self.reply(data, msg.format(name))
return

safe = quote_plus(user.name())
safe = quote_plus(user.name)
url = "http://toolserver.org/~tparis/pcount/index.php?name={0}&lang=en&wiki=wikipedia"
msg = "\x0302{0}\x0301 has {1} edits ({2})."
self.reply(data, msg.format(name, count, url.format(safe)))

+ 4
- 5
earwigbot/commands/registration.py Bestand weergeven

@@ -45,7 +45,7 @@ class Command(BaseCommand):
user = site.get_user(name)

try:
reg = user.registration()
reg = user.registration
except wiki.UserNotFoundError:
msg = "the user \x0302{0}\x0301 does not exist."
self.reply(data, msg.format(name))
@@ -54,14 +54,13 @@ class Command(BaseCommand):
date = time.strftime("%b %d, %Y at %H:%M:%S UTC", reg)
age = self.get_diff(time.mktime(reg), time.mktime(time.gmtime()))

g = user.gender()
if g == "male":
if user.gender == "male":
gender = "He's"
elif g == "female":
elif user.gender == "female":
gender = "She's"
else:
gender = "They're"
msg = "\x0302{0}\x0301 registered on {1}. {2} {3} old."
self.reply(data, msg.format(name, date, gender, age))



+ 1
- 1
earwigbot/commands/rights.py Bestand weergeven

@@ -43,7 +43,7 @@ class Command(BaseCommand):
user = site.get_user(name)

try:
rights = user.groups()
rights = user.groups
except wiki.UserNotFoundError:
msg = "the user \x0302{0}\x0301 does not exist."
self.reply(data, msg.format(name))


+ 1
- 1
earwigbot/tasks/__init__.py Bestand weergeven

@@ -116,7 +116,7 @@ class BaseTask(object):
except KeyError:
return False
title = cfg.get("page", "User:$1/Shutoff/Task $2")
username = site.get_user().name()
username = site.get_user().name
title = title.replace("$1", username).replace("$2", str(self.number))
page = site.get_page(title)



+ 3
- 3
earwigbot/tasks/afc_copyvios.py Bestand weergeven

@@ -70,13 +70,13 @@ class Task(BaseTask):

def process(self, page):
"""Detect copyvios in 'page' and add a note if any are found."""
title = page.title()
title = page.title
if title in self.ignore_list:
msg = "Skipping page in ignore list: [[{0}]]"
self.logger.info(msg.format(title))
return

pageid = page.pageid()
pageid = page.pageid
if self.has_been_processed(pageid):
msg = "Skipping check on already processed page [[{0}]]"
self.logger.info(msg.format(title))
@@ -143,7 +143,7 @@ class Task(BaseTask):
This will only be called if "cache_results" == True in the task's
config, which is False by default.
"""
pageid = page.pageid()
pageid = page.pageid
hash = sha256(page.get()).hexdigest()
query1 = "SELECT 1 FROM cache WHERE cache_id = ?"
query2 = "DELETE FROM cache WHERE cache_id = ?"


+ 5
- 5
earwigbot/tasks/afc_history.py Bestand weergeven

@@ -93,7 +93,7 @@ class Task(BaseTask):
generator = self.backwards_cat_iterator()
for d in xrange(num_days):
category = generator.next()
date = category.title().split("/")[-1]
date = category.title.split("/")[-1]
self.update_date(date, category)
sleep(10)
self.logger.info("Update complete")
@@ -104,7 +104,7 @@ class Task(BaseTask):
generator = self.backwards_cat_iterator()
for d in xrange(num_days):
category = generator.next()
date = category.title().split("/")[-1]
date = category.title.split("/")[-1]
data[date] = self.get_date_counts(date)

data = OrderedDict(reversed(data.items())) # Oldest to most recent
@@ -123,7 +123,7 @@ class Task(BaseTask):
current -= timedelta(1) # Subtract one day from date

def update_date(self, date, category):
msg = "Updating {0} ([[{1}]])".format(date, category.title())
msg = "Updating {0} ([[{1}]])".format(date, category.title)
self.logger.debug(msg)

q_select = "SELECT page_date, page_status FROM page WHERE page_id = ?"
@@ -153,7 +153,7 @@ class Task(BaseTask):

def get_status(self, title, pageid):
page = self.site.get_page(title)
ns = page.namespace()
ns = page.namespace

if ns == wiki.NS_FILE_TALK: # Ignore accepted FFU requests
return self.STATUS_NONE
@@ -161,7 +161,7 @@ class Task(BaseTask):
if ns == wiki.NS_TALK:
new_page = page.toggle_talk()
sleep(2)
if new_page.is_redirect():
if new_page.is_redirect:
return self.STATUS_NONE # Ignore accepted AFC/R requests
return self.STATUS_ACCEPT



+ 4
- 4
earwigbot/tasks/afc_statistics.py Bestand weergeven

@@ -129,7 +129,7 @@ class Task(BaseTask):
"<!-- sig begin -->~~~ at ~~~~~<!-- sig end -->",
newtext)
page.edit(newtext, summary, minor=True, bot=True)
self.logger.info(u"Chart saved to [[{0}]]".format(page.title()))
self.logger.info(u"Chart saved to [[{0}]]".format(page.title))

def compile_charts(self):
"""Compile and return all statistics information from our local db."""
@@ -332,7 +332,7 @@ class Task(BaseTask):
self.logger.error(msg)
return

namespace = self.site.get_page(title).namespace()
namespace = self.site.get_page(title).namespace
status, chart = self.get_status_and_chart(content, namespace)
if chart == self.CHART_NONE:
msg = u"Could not find a status for [[{0}]]".format(title)
@@ -364,7 +364,7 @@ class Task(BaseTask):
self.logger.error(msg)
return

namespace = self.site.get_page(title).namespace()
namespace = self.site.get_page(title).namespace
status, chart = self.get_status_and_chart(content, namespace)
if chart == self.CHART_NONE:
self.untrack_page(cursor, pageid)
@@ -718,7 +718,7 @@ class Task(BaseTask):
if chart in [self.CHART_PEND, self.CHART_DRAFT] and s_user:
submitter = self.site.get_user(s_user)
try:
if submitter.blockinfo():
if submitter.blockinfo:
notes += "|nb=1" # Submitter is blocked
except wiki.UserNotFoundError: # Likely an IP
pass


+ 17
- 9
earwigbot/wiki/__init__.py Bestand weergeven

@@ -21,18 +21,26 @@
# SOFTWARE.

"""
EarwigBot's Wiki Toolset
**EarwigBot's Wiki Toolset**

This is a collection of classes and functions to read from and write to
Wikipedia and other wiki sites. No connection whatsoever to python-wikitools
written by Mr.Z-man, other than a similar purpose. We share no code.
Wikipedia and other wiki sites. No connection whatsoever to `python-wikitools
<http://code.google.com/p/python-wikitools/>`_ written by `Mr.Z-man
<http://en.wikipedia.org/wiki/User:Mr.Z-man>`_, other than a similar purpose.
We share no code.

Import the toolset directly with `from earwigbot import wiki`. If using the
built-in integration with the rest of the bot, Bot() objects contain a `wiki`
attribute, which is a SitesDB object tied to the sites.db file located in the
same directory as config.yml. That object has the principal methods get_site,
add_site, and remove_site that should handle all of your Site (and thus, Page,
Category, and User) needs.
Import the toolset directly with ``from earwigbot import wiki``. If using the
built-in integration with the rest of the bot, :py:class:`~earwigbot.bot.Bot`
objects contain a :py:attr:`~earwigbot.bot.Bot.wiki` attribute, which is a
:py:class:`~earwigbot.wiki.sitesdb.SitesDB` object tied to the :file:`sites.db`
file located in the same directory as :file:`config.yml`. That object has the
principal methods :py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site`,
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site`, and
:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.remove_site` that should handle all
of your :py:class:`~earwigbot.wiki.site.Site` (and thus,
:py:class:`~earwigbot.wiki.page.Page`,
:py:class:`~earwigbot.wiki.category.Category`, and
:py:class:`~earwigbot.wiki.user.User`) needs.
"""

from earwigbot.wiki.category import *


+ 24
- 19
earwigbot/wiki/category.py Bestand weergeven

@@ -26,33 +26,37 @@ __all__ = ["Category"]

class Category(Page):
"""
EarwigBot's Wiki Toolset: Category Class
**EarwigBot's Wiki Toolset: Category Class**

Represents a Category on a given Site, a subclass of Page. Provides
additional methods, but Page's own methods should work fine on Category
objects. Site.get_page() will return a Category instead of a Page if the
given title is in the category namespace; get_category() is shorthand,
because it accepts category names without the namespace prefix.
Represents a category on a given :py:class:`~earwigbot.wiki.site.Site`, a
subclass of :py:class:`~earwigbot.wiki.page.Page`. Provides additional
methods, but :py:class:`~earwigbot.wiki.page.Page`'s own methods should
work fine on :py:class:`Category` objects. :py:meth:`site.get_page()
<earwigbot.wiki.site.Site.get_page>` will return a :py:class:`Category`
instead of a :py:class:`~earwigbot.wiki.page.Page` if the given title is in
the category namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category`
is shorthand, accepting category names without the namespace prefix.

Public methods:
get_members -- returns a list of page titles in the category
*Public methods:*

- :py:meth:`get_members`: returns a list of page titles in the category
"""

def __repr__(self):
"""Returns the canonical string representation of the Category."""
"""Return the canonical string representation of the Category."""
res = "Category(title={0!r}, follow_redirects={1!r}, site={2!r})"
return res.format(self._title, self._follow_redirects, self._site)

def __str__(self):
"""Returns a nice string representation of the Category."""
return '<Category "{0}" of {1}>'.format(self.title(), str(self._site))
"""Return a nice string representation of the Category."""
return '<Category "{0}" of {1}>'.format(self.title, str(self._site))

def _get_members_via_sql(self, limit):
"""Return a list of tuples of (title, pageid) in the category."""
query = """SELECT page_title, page_namespace, page_id FROM page
JOIN categorylinks ON page_id = cl_from
WHERE cl_to = ?"""
title = self.title().replace(" ", "_").split(":", 1)[1]
title = self.title.replace(" ", "_").split(":", 1)[1]

if limit:
query += " LIMIT ?"
@@ -83,16 +87,17 @@ class Category(Page):
return [member["title"] for member in members]

def get_members(self, use_sql=False, limit=None):
"""Returns a list of page titles in the category.
"""Return a list of page titles in the category.

If `use_sql` is True, we will use a SQL query instead of the API. Pages
will be returned as tuples of (title, pageid) instead of just titles.
If *use_sql* is ``True``, we will use a SQL query instead of the API.
Pages will be returned as tuples of ``(title, pageid)`` instead of just
titles.

If `limit` is provided, we will provide this many titles, or less if
the category is smaller. `limit` defaults to 50 for API queries; normal
If *limit* is provided, we will provide this many titles, or less if
the category is smaller. It defaults to 50 for API queries; normal
users can go up to 500, and bots can go up to 5,000 on a single API
query. If we're using SQL, the limit is None by default (returning all
pages in the category), but an arbitrary limit can still be chosen.
query. If we're using SQL, the limit is ``None`` by default (returning
all pages in the category), but an arbitrary limit can still be chosen.
"""
if use_sql:
return self._get_members_via_sql(limit)


+ 7
- 6
earwigbot/wiki/constants.py Bestand weergeven

@@ -21,15 +21,16 @@
# SOFTWARE.

"""
EarwigBot's Wiki Toolset: Constants
**EarwigBot's Wiki Toolset: Constants**

This module defines some useful constants:
* USER_AGENT - our default User Agent when making API queries
* NS_* - default namespace IDs for easy lookup

Import directly with `from earwigbot.wiki import constants` or
`from earwigbot.wiki.constants import *`. These are also available from
earwigbot.wiki (e.g. `earwigbot.wiki.USER_AGENT`).
- :py:const:`USER_AGENT`: our default User Agent when making API queries
- :py:const:`NS_*`: default namespace IDs for easy lookup

Import directly with ``from earwigbot.wiki import constants`` or
``from earwigbot.wiki.constants import *``. These are also available from
:py:mod:`earwigbot.wiki` directly (e.g. ``earwigbot.wiki.USER_AGENT``).
"""

# Default User Agent when making API queries:


+ 195
- 173
earwigbot/wiki/page.py Bestand weergeven

@@ -32,32 +32,42 @@ __all__ = ["Page"]

class Page(CopyrightMixin):
"""
EarwigBot's Wiki Toolset: Page Class

Represents a Page on a given Site. Has methods for getting information
about the page, getting page content, and so on. Category is a subclass of
Page with additional methods.

Attributes:
title -- the page's title, or pagename
exists -- whether the page exists
pageid -- an integer ID representing the page
url -- the page's URL
namespace -- the page's namespace as an integer
protection -- the page's current protection status
is_talkpage -- True if the page is a talkpage, else False
is_redirect -- True if the page is a redirect, else False

Public methods:
reload -- forcibly reload the page's attributes
toggle_talk -- returns a content page's talk page, or vice versa
get -- returns page content
get_redirect_target -- if the page is a redirect, returns its destination
get_creator -- returns a User object representing the first person
to edit the page
edit -- replaces the page's content or creates a new page
add_section -- adds a new section at the bottom of the page
copyvio_check -- checks the page for copyright violations
**EarwigBot's Wiki Toolset: Page Class**

Represents a page on a given :py:class:`~earwigbot.wiki.site.Site`. Has
methods for getting information about the page, getting page content, and
so on. :py:class:`~earwigbot.wiki.category.Category` is a subclass of
:py:class:`Page` with additional methods.

*Attributes:*

- :py:attr:`site`: the page's corresponding Site object
- :py:attr:`title`: the page's title, or pagename
- :py:attr:`exists`: whether the page exists
- :py:attr:`pageid`: an integer ID representing the page
- :py:attr:`url`: the page's URL
- :py:attr:`namespace`: the page's namespace as an integer
- :py:attr:`protection`: the page's current protection status
- :py:attr:`is_talkpage`: ``True`` if this is a talkpage, else ``False``
- :py:attr:`is_redirect`: ``True`` if this is a redirect, else ``False``

*Public methods:*

- :py:meth:`reload`: forcibly reloads the page's attributes
- :py:meth:`toggle_talk`: returns a content page's talk page, or vice versa
- :py:meth:`get`: returns the page's content
- :py:meth:`get_redirect_target`: returns the page's destination if it is a
redirect
- :py:meth:`get_creator`: returns a User object representing the first
person to edit the page
- :py:meth:`edit`: replaces the page's content or creates a new page
- :py:meth:`add_section`: adds a new section at the bottom of the page

- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixin.copyvio_check`:
checks the page for copyright violations
- :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixin.copyvio_compare`:
checks the page for like :py:meth:`copyvio_check`, but against a specific
URL
"""

re_redirect = "^\s*\#\s*redirect\s*\[\[(.*?)\]\]"
@@ -69,10 +79,10 @@ class Page(CopyrightMixin):
and whether or not to follow redirects (optional, defaults to False).

As with User, site.get_page() is preferred. Site's method has support
for a default `follow_redirects` value in our config, while __init__
for a default *follow_redirects* value in our config, while __init__()
always defaults to False.

__init__ will not do any API queries, but it will use basic namespace
__init__() will not do any API queries, but it will use basic namespace
logic to determine our namespace ID and if we are a talkpage.
"""
super(Page, self).__init__(site)
@@ -113,15 +123,15 @@ class Page(CopyrightMixin):
self._is_talkpage = self._namespace % 2 == 1

def __repr__(self):
"""Returns the canonical string representation of the Page."""
"""Return the canonical string representation of the Page."""
res = "Page(title={0!r}, follow_redirects={1!r}, site={2!r})"
return res.format(self._title, self._follow_redirects, self._site)

def __str__(self):
"""Returns a nice string representation of the Page."""
return '<Page "{0}" of {1}>'.format(self.title(), str(self._site))
"""Return a nice string representation of the Page."""
return '<Page "{0}" of {1}>'.format(self.title, str(self._site))

def _force_validity(self):
def _assert_validity(self):
"""Used to ensure that our page's title is valid.

If this method is called when our page is not valid (and after
@@ -134,20 +144,20 @@ class Page(CopyrightMixin):
e = "Page '{0}' is invalid.".format(self._title)
raise exceptions.InvalidPageError(e)

def _force_existence(self):
def _assert_existence(self):
"""Used to ensure that our page exists.

If this method is called when our page doesn't exist (and after
_load_attributes() has been called), PageNotFoundError will be raised.
It will also call _force_validity() beforehand.
It will also call _assert_validity() beforehand.
"""
self._force_validity()
self._assert_validity()
if self._exists == 2:
e = "Page '{0}' does not exist.".format(self._title)
raise exceptions.PageNotFoundError(e)

def _load_wrapper(self):
"""Calls _load_attributes() and follows redirects if we're supposed to.
def _load(self):
"""Call _load_attributes() and follows redirects if we're supposed to.

This method will only follow redirects if follow_redirects=True was
passed to __init__() (perhaps indirectly passed by site.get_page()).
@@ -170,13 +180,13 @@ class Page(CopyrightMixin):
self._load_attributes()

def _load_attributes(self, result=None):
"""Loads various data from the API in a single query.
"""Load various data from the API in a single query.

Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl,
._protection, ._namespace, ._is_talkpage, ._creator, ._lastrevid,
._token, and ._starttimestamp using the API. It will do a query of
its own unless `result` is provided, in which case we'll pretend
`result` is what the query returned.
its own unless *result* is provided, in which case we'll pretend
*result* is what the query returned.

Assuming the API is sound, this should not raise any exceptions.
"""
@@ -235,14 +245,14 @@ class Page(CopyrightMixin):
pass

def _load_content(self, result=None):
"""Loads current page content from the API.
"""Load current page content from the API.

If `result` is provided, we'll pretend that is the result of an API
If *result* is provided, we'll pretend that is the result of an API
query and try to get content from that. Otherwise, we'll do an API
query on our own.

Don't call this directly, ever - use .get(force=True) if you want to
force content reloading.
Don't call this directly, ever; use reload() followed by get() if you
want to force content reloading.
"""
if not result:
params = {"action": "query", "prop": "revisions", "rvlimit": 1,
@@ -258,14 +268,14 @@ class Page(CopyrightMixin):
# self._load_attributes(). In that case, some of our attributes are
# outdated, so force another self._load_attributes():
self._load_attributes()
self._force_existence()
self._assert_existence()

def _edit(self, params=None, text=None, summary=None, minor=None, bot=None,
force=None, section=None, captcha_id=None, captcha_word=None,
tries=0):
"""Edit the page!

If `params` is given, we'll use it as our API query parameters.
If *params* is given, we'll use it as our API query parameters.
Otherwise, we'll build params using the given kwargs via
_build_edit_params().
@@ -281,7 +291,7 @@ class Page(CopyrightMixin):
raise exceptions.PermissionsError(e)

# Weed out invalid pages before we get too far:
self._force_validity()
self._assert_validity()

# Build our API query string:
if not params:
@@ -420,28 +430,33 @@ class Page(CopyrightMixin):
e = "AssertEdit: assertion '{0}' failed.".format(assertion)
raise exceptions.PermissionsError(e)

def title(self, force=False):
"""Returns the Page's title, or pagename.
@property
def site(self):
"""The Page's corresponding Site object."""
return self._site

This won't do any API queries on its own unless force is True, in which
case the title will be forcibly reloaded from the API (normalizing it,
and following redirects if follow_redirects=True was passed to
__init__()). Any other methods that do API queries will reload title on
their own, however, like exists() and get().
@property
def title(self):
"""The Page's title, or "pagename".

This won't do any API queries on its own. Any other attributes or
methods that do API queries will reload the title, however, like
:py:attr:`exists` and :py:meth:`get`, potentially "normalizing" it or
following redirects if :py:attr:`self._follow_redirects` is ``True``.
"""
if force:
self._load_wrapper()
return self._title

def exists(self, force=False):
"""Returns information about whether the Page exists or not.
@property
def exists(self):
"""Information about whether the Page exists or not.

The returned "information" is a tuple with two items. The first is a
bool, either True if the page exists or False if it does not. The
second is a string giving more information, either "invalid", (title
is invalid, e.g. it contains "["), "missing", or "exists".
The "information" is a tuple with two items. The first is a bool,
either ``True`` if the page exists or ``False`` if it does not. The
second is a string giving more information, either ``"invalid"``,
(title is invalid, e.g. it contains ``"["``), ``"missing"``, or
``"exists"``.

Makes an API query if force is True or if we haven't already made one.
Makes an API query only if we haven't already made one.
"""
cases = {
0: (None, "unknown"),
@@ -449,32 +464,33 @@ class Page(CopyrightMixin):
2: (False, "missing"),
3: (True, "exists"),
}
if self._exists == 0 or force:
self._load_wrapper()
if self._exists == 0:
self._load()
return cases[self._exists]

def pageid(self, force=False):
"""Returns an integer ID representing the Page.
@property
def pageid(self):
"""An integer ID representing the Page.

Makes an API query if force is True or if we haven't already made one.
Makes an API query only if we haven't already made one.

Raises InvalidPageError or PageNotFoundError if the page name is
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
invalid or the page does not exist, respectively.
"""
if self._exists == 0 or force:
self._load_wrapper()
self._force_existence() # missing pages do not have IDs
if self._exists == 0:
self._load()
self._assert_existence() # Missing pages do not have IDs
return self._pageid

def url(self, force=False):
"""Returns the page's URL.
@property
def url(self):
"""The page's URL.

Like title(), this won't do any API queries on its own unless force is
True. If the API was never queried for this page, we will attempt to
determine the URL ourselves based on the title.
Like :py:meth:`title`, this won't do any API queries on its own. If the
API was never queried for this page, we will attempt to determine the
URL ourselves based on the title.
"""
if force:
self._load_wrapper()
if self._fullurl:
return self._fullurl
else:
@@ -482,91 +498,80 @@ class Page(CopyrightMixin):
path = self._site._article_path.replace("$1", slug)
return ''.join((self._site._base_url, path))

def namespace(self, force=False):
"""Returns the page's namespace ID (an integer).
@property
def namespace(self):
"""The page's namespace ID (an integer).

Like title(), this won't do any API queries on its own unless force is
True. If the API was never queried for this page, we will attempt to
determine the namespace ourselves based on the title.
Like :py:meth:`title`, this won't do any API queries on its own. If the
API was never queried for this page, we will attempt to determine the
namespace ourselves based on the title.
"""
if force:
self._load_wrapper()
return self._namespace

def protection(self, force=False):
"""Returns the page's current protection status.
@property
def protection(self):
"""The page's current protection status.

Makes an API query if force is True or if we haven't already made one.
Makes an API query only if we haven't already made one.

Raises InvalidPageError if the page name is invalid. Will not raise an
error if the page is missing because those can still be protected.
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` if the page
name is invalid. Won't raise an error if the page is missing because
those can still be create-protected.
"""
if self._exists == 0 or force:
self._load_wrapper()
self._force_validity() # invalid pages cannot be protected
if self._exists == 0:
self._load()
self._assert_validity() # Invalid pages cannot be protected
return self._protection

def creator(self, force=False):
"""Returns the page's creator (i.e., the first user to edit the page).

Makes an API query if force is True or if we haven't already made one.
Normally, we can get the creator along with everything else (except
content) in self._load_attributes(). However, due to a limitation in
the API (can't get the editor of one revision and the content of
another at both ends of the history), if our other attributes were only
loaded from get(), we'll have to do another API query. This is done
by calling ourselves again with force=True.
@property
def is_talkpage(self):
"""``True`` if the page is a talkpage, otherwise ``False``.

Raises InvalidPageError or PageNotFoundError if the page name is
invalid or the page does not exist, respectively.
Like :py:meth:`title`, this won't do any API queries on its own. If the
API was never queried for this page, we will attempt to determine
whether it is a talkpage ourselves based on its namespace.
"""
if self._exists == 0 or force:
self._load_wrapper()
self._force_existence()
if not self._creator and not force:
self.creator(force=True)
return self._creator

def is_talkpage(self, force=False):
"""Returns True if the page is a talkpage, else False.

Like title(), this won't do any API queries on its own unless force is
True. If the API was never queried for this page, we will attempt to
determine the talkpage status ourselves based on its namespace ID.
"""
if force:
self._load_wrapper()
return self._is_talkpage

def is_redirect(self, force=False):
"""Returns True if the page is a redirect, else False.
@property
def is_redirect(self):
"""``True`` if the page is a redirect, otherwise ``False``.

Makes an API query if force is True or if we haven't already made one.
Makes an API query only if we haven't already made one.

We will return False even if the page does not exist or is invalid.
We will return ``False`` even if the page does not exist or is invalid.
"""
if self._exists == 0 or force:
self._load_wrapper()
if self._exists == 0:
self._load()
return self._is_redirect

def toggle_talk(self, force=False, follow_redirects=None):
"""Returns a content page's talk page, or vice versa.
def reload(self):
"""Forcibly reload the page's attributes.

Emphasis on *reload*: this is only necessary if there is reason to
believe they have changed.
"""
self._load()
if self._content is not None:
# Only reload content if it has already been loaded:
self._load_content()

def toggle_talk(self, follow_redirects=None):
"""Return a content page's talk page, or vice versa.

The title of the new page is determined by namespace logic, not API
queries. We won't make any API queries on our own unless force is True,
and the only reason then would be to forcibly update the title or
follow redirects if we haven't already made an API query.
queries. We won't make any API queries on our own.

If `follow_redirects` is anything other than None (the default), it
will be passed to the new Page's __init__(). Otherwise, we'll use the
value passed to our own __init__().
If *follow_redirects* is anything other than ``None`` (the default), it
will be passed to the new :py:class:`~earwigbot.wiki.page.Page`
object's :py:meth:`__init__`. Otherwise, we'll use the value passed to
our own :py:meth:`__init__`.

Will raise InvalidPageError if we try to get the talk page of a special
page (in the Special: or Media: namespaces), but we won't raise an
exception if our page is otherwise missing or invalid.
Will raise :py:exc:`~earwigbot.exceptions.InvalidPageError` if we try
to get the talk page of a special page (in the ``Special:`` or
``Media:`` namespaces), but we won't raise an exception if our page is
otherwise missing or invalid.
"""
if force:
self._load_wrapper()
if self._namespace < 0:
ns = self._site.namespace_id_to_name(self._namespace)
e = "Pages in the {0} namespace can't have talk pages.".format(ns)
@@ -587,7 +592,7 @@ class Page(CopyrightMixin):
# If the new page is in namespace 0, don't do ":Title" (it's correct,
# but unnecessary), just do "Title":
if new_prefix:
new_title = ':'.join((new_prefix, body))
new_title = u":".join((new_prefix, body))
else:
new_title = body

@@ -595,17 +600,13 @@ class Page(CopyrightMixin):
follow_redirects = self._follow_redirects
return Page(self._site, new_title, follow_redirects)

def get(self, force=False):
"""Returns page content, which is cached if you try to call get again.

Use `force` to forcibly reload page content even if we've already
loaded some. This is good if you want to edit a page multiple times,
and you want to get updated content before you make your second edit.
def get(self):
"""Return page content, which is cached if you try to call get again.

Raises InvalidPageError or PageNotFoundError if the page name is
invalid or the page does not exist, respectively.
"""
if force or self._exists == 0:
if self._exists == 0:
# Kill two birds with one stone by doing an API query for both our
# attributes and our page content:
params = {"action": "query", "rvlimit": 1, "titles": self._title,
@@ -613,54 +614,75 @@ class Page(CopyrightMixin):
"intoken": "edit", "rvprop": "content|timestamp"}
result = self._site._api_query(params)
self._load_attributes(result=result)
self._force_existence()
self._assert_existence()
self._load_content(result=result)

# Follow redirects if we're told to:
if self._keep_following and self._is_redirect:
self._title = self.get_redirect_target()
self._keep_following = False # don't follow double redirects
self._content = None # reset the content we just loaded
self.get(force=True)
self._keep_following = False # Don't follow double redirects
self._exists = 0 # Force another API query
self.get()

return self._content

# Make sure we're dealing with a real page here. This may be outdated
# if the page was deleted since we last called self._load_attributes(),
# but self._load_content() can handle that:
self._force_existence()
self._assert_existence()

if self._content is None:
self._load_content()

return self._content

def get_redirect_target(self, force=False):
"""If the page is a redirect, returns its destination.

Use `force` to forcibly reload content even if we've already loaded
some before. Note that this method calls get() for page content.
def get_redirect_target(self):
"""If the page is a redirect, return its destination.

Raises InvalidPageError or PageNotFoundError if the page name is
invalid or the page does not exist, respectively. Raises RedirectError
if the page is not a redirect.
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
invalid or the page does not exist, respectively. Raises
:py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a
redirect.
"""
content = self.get(force)
content = self.get()
try:
return re.findall(self.re_redirect, content, flags=re.I)[0]
except IndexError:
e = "The page does not appear to have a redirect target."
raise exceptions.RedirectError(e)

def get_creator(self):
"""Return the User object for the first person to edit the page.

Makes an API query only if we haven't already made one. Normally, we
can get the creator along with everything else (except content) in
:py:meth:`_load_attributes`. However, due to a limitation in the API
(can't get the editor of one revision and the content of another at
both ends of the history), if our other attributes were only loaded
through :py:meth:`get`, we'll have to do another API query.

Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
invalid or the page does not exist, respectively.
"""
if self._exists == 0:
self._load()
self._assert_existence()
if not self._creator:
self._load()
self._assert_existence()
return self._site.get_user(self._creator)

def edit(self, text, summary, minor=False, bot=True, force=False):
"""Replaces the page's content or creates a new page.
"""Replace the page's content or creates a new page.

`text` is the new page content, with `summary` as the edit summary.
If `minor` is True, the edit will be marked as minor. If `bot` is true,
the edit will be marked as a bot edit, but only if we actually have a
bot flag.
*text* is the new page content, with *summary* as the edit summary.
If *minor* is ``True``, the edit will be marked as minor. If *bot* is
``True``, the edit will be marked as a bot edit, but only if we
actually have a bot flag.

Use `force` to push the new content even if there's an edit conflict or
Use *force* to push the new content even if there's an edit conflict or
the page was deleted/recreated between getting our edit token and
editing our page. Be careful with this!
"""
@@ -668,12 +690,12 @@ class Page(CopyrightMixin):
force=force)

def add_section(self, text, title, minor=False, bot=True, force=False):
"""Adds a new section to the bottom of the page.
"""Add a new section to the bottom of the page.

The arguments for this are the same as those for edit(), but instead of
providing a summary, you provide a section title.
The arguments for this are the same as those for :py:meth:`edit`, but
instead of providing a summary, you provide a section title.

Likewise, raised exceptions are the same as edit()'s.
Likewise, raised exceptions are the same as :py:meth:`edit`'s.

This should create the page if it does not already exist, with just the
new section as content.


+ 150
- 119
earwigbot/wiki/site.py Bestand weergeven

@@ -48,29 +48,38 @@ __all__ = ["Site"]

class Site(object):
"""
EarwigBot's Wiki Toolset: Site Class

Represents a Site, with support for API queries and returning Pages, Users,
and Categories. The constructor takes a bunch of arguments and you probably
won't need to call it directly, rather tools.get_site() for returning Site
instances, tools.add_site() for adding new ones to config, and
tools.del_site() for removing old ones from config, should suffice.

Attributes:
name -- the site's name (or "wikiid"), like "enwiki"
project -- the site's project name, like "wikipedia"
lang -- the site's language code, like "en"
domain -- the site's web domain, like "en.wikipedia.org"

Public methods:
api_query -- does an API query with the given kwargs as params
sql_query -- does an SQL query and yields its results
get_replag -- returns the estimated database replication lag
namespace_id_to_name -- given a namespace ID, returns associated name(s)
namespace_name_to_id -- given a namespace name, returns the associated ID
get_page -- returns a Page object for the given title
get_category -- returns a Category object for the given title
get_user -- returns a User object for the given username
**EarwigBot's Wiki Toolset: Site Class**

Represents a site, with support for API queries and returning
:py:class:`~earwigbot.wiki.page.Page`,
:py:class:`~earwigbot.wiki.user.User`,
and :py:class:`~earwigbot.wiki.category.Category` objects. The constructor
takes a bunch of arguments and you probably won't need to call it directly,
rather :py:meth:`wiki.get_site() <earwigbot.wiki.sitesdb.SitesDB.get_site>`
for returning :py:class:`Site`
instances, :py:meth:`wiki.add_site()
<earwigbot.wiki.sitesdb.SitesDB.add_site>` for adding new ones to our
database, and :py:meth:`wiki.remove_site()
<earwigbot.wiki.sitesdb.SitesDB.remove_site>` for removing old ones from
our database, should suffice.

*Attributes:*

- :py:attr:`name`: the site's name (or "wikiid"), like ``"enwiki"``
- :py:attr:`project`: the site's project name, like ``"wikipedia"``
- :py:attr:`lang`: the site's language code, like ``"en"``
- :py:attr:`domain`: the site's web domain, like ``"en.wikipedia.org"``

*Public methods:*

- :py:meth:`api_query`: does an API query with kwargs as params
- :py:meth:`sql_query`: does an SQL query and yields its results
- :py:meth:`get_replag`: estimates the database replication lag
- :py:meth:`namespace_id_to_name`: returns names associated with an NS id
- :py:meth:`namespace_name_to_id`: returns the ID associated with a NS name
- :py:meth:`get_page`: returns a Page for the given title
- :py:meth:`get_category`: returns a Category for the given title
- :py:meth:`get_user`: returns a User object for the given name
"""

def __init__(self, name=None, project=None, lang=None, base_url=None,
@@ -83,11 +92,11 @@ class Site(object):

This probably isn't necessary to call yourself unless you're building a
Site that's not in your config and you don't want to add it - normally
all you need is tools.get_site(name), which creates the Site for you
all you need is wiki.get_site(name), which creates the Site for you
based on your config file and the sites database. We accept a bunch of
kwargs, but the only ones you really "need" are `base_url` and
`script_path` - this is enough to figure out an API url. `login`, a
tuple of (username, password), is highly recommended. `cookiejar` will
kwargs, but the only ones you really "need" are *base_url* and
*script_path*; this is enough to figure out an API url. *login*, a
tuple of (username, password), is highly recommended. *cookiejar will
be used to store cookies, and we'll use a normal CookieJar if none is
given.

@@ -151,7 +160,7 @@ class Site(object):
self._login(login)

def __repr__(self):
"""Returns the canonical string representation of the Site."""
"""Return the canonical string representation of the Site."""
res = ", ".join((
"Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}",
"base_url={_base_url!r}", "article_path={_article_path!r}",
@@ -170,13 +179,12 @@ class Site(object):
return res.format(login, cookies, agent, **self.__dict__)

def __str__(self):
"""Returns a nice string representation of the Site."""
"""Return a nice string representation of the Site."""
res = "<Site {0} ({1}:{2}) at {3}>"
return res.format(self.name(), self.project(), self.lang(),
self.domain())
return res.format(self.name, self.project, self.lang, self.domain)

def _urlencode_utf8(self, params):
"""Implement urllib.urlencode(params) with support for unicode input."""
"""Implement urllib.urlencode() with support for unicode input."""
enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s)
args = []
for key, val in params.iteritems():
@@ -186,30 +194,10 @@ class Site(object):
return "&".join(args)

def _api_query(self, params, tries=0, wait=5):
"""Do an API query with `params` as a dict of parameters.
"""Do an API query with *params* as a dict of parameters.

This will first attempt to construct an API url from self._base_url and
self._script_path. We need both of these, or else we'll raise
SiteAPIError. If self._base_url is protocol-relative (introduced in
MediaWiki 1.18), we'll choose HTTPS if self._user_https is True,
otherwise HTTP.

We'll encode the given params, adding format=json along the way, as
well as &assert= and &maxlag= based on self._assert_edit and _maxlag.
Additionally, we'll sleep a bit if the last query was made less than
self._wait_between_queries seconds ago. The request is made through
self._opener, which has cookie support (self._cookiejar), a User-Agent
(wiki.constants.USER_AGENT), and Accept-Encoding set to "gzip".

Assuming everything went well, we'll gunzip the data (if compressed),
load it as a JSON object, and return it.

If our request failed for some reason, we'll raise SiteAPIError with
details. If that reason was due to maxlag, we'll sleep for a bit and
then repeat the query until we exceed self._max_retries.

There's helpful MediaWiki API documentation at
<http://www.mediawiki.org/wiki/API>.
See the documentation for :py:meth:`api_query` for full implementation
details.
"""
since_last_query = time() - self._last_query_time # Throttling support
if since_last_query < self._wait_between_queries:
@@ -301,8 +289,8 @@ class Site(object):
was not given as a keyword argument. We'll do an API query to get the
missing data, but only if there actually *is* missing data.

Additionally, you can call this with `force=True` to forcibly reload
all attributes.
Additionally, you can call this with *force* set to True to forcibly
reload all attributes.
"""
# All attributes to be loaded, except _namespaces, which is a special
# case because it requires additional params in the API query:
@@ -332,7 +320,7 @@ class Site(object):
def _load_namespaces(self, result):
"""Fill self._namespaces with a dict of namespace IDs and names.

Called by _load_attributes() with API data as `result` when
Called by _load_attributes() with API data as *result* when
self._namespaces was not given as an kwarg to __init__().
"""
self._namespaces = {}
@@ -381,13 +369,12 @@ class Site(object):
(for that, we'd do self._login_info[0]), but rather to get our current
username without an unnecessary ?action=query&meta=userinfo API query.
"""
domain = self.domain()
name = ''.join((self._name, "Token"))
cookie = self._get_cookie(name, domain)
cookie = self._get_cookie(name, self.domain)

if cookie:
name = ''.join((self._name, "UserName"))
user_name = self._get_cookie(name, domain)
user_name = self._get_cookie(name, self.domain)
if user_name:
return user_name.value

@@ -399,7 +386,7 @@ class Site(object):
continue
# Build a regex that will match domains this cookie affects:
search = ''.join(("(.*?)", re_escape(cookie.domain)))
if re_match(search, domain): # Test it against our site
if re_match(search, self.domain): # Test it against our site
user_name = self._get_cookie("centralauth_User", cookie.domain)
if user_name:
return user_name.value
@@ -464,8 +451,8 @@ class Site(object):
Raises LoginError on login errors (duh), like bad passwords and
nonexistent usernames.

`login` is a (username, password) tuple. `token` is the token returned
from our first request, and `attempt` is to prevent getting stuck in a
*login* is a (username, password) tuple. *token* is the token returned
from our first request, and *attempt* is to prevent getting stuck in a
loop if MediaWiki isn't acting right.
"""
name, password = login
@@ -535,26 +522,57 @@ class Site(object):

self._sql_conn = oursql.connect(**args)

@property
def name(self):
"""Returns the Site's name (or "wikiid" in the API), like "enwiki"."""
"""The Site's name (or "wikiid" in the API), like ``"enwiki"``."""
return self._name

@property
def project(self):
"""Returns the Site's project name in lowercase, like "wikipedia"."""
"""The Site's project name in lowercase, like ``"wikipedia"``."""
return self._project

@property
def lang(self):
"""Returns the Site's language code, like "en" or "es"."""
"""The Site's language code, like ``"en"`` or ``"es"``."""
return self._lang

@property
def domain(self):
"""Returns the Site's web domain, like "en.wikipedia.org"."""
"""The Site's web domain, like ``"en.wikipedia.org"``."""
return urlparse(self._base_url).netloc

def api_query(self, **kwargs):
"""Do an API query with `kwargs` as the parameters.

See _api_query()'s documentation for details.
This will first attempt to construct an API url from
:py:attr:`self._base_url` and :py:attr:`self._script_path`. We need
both of these, or else we'll raise
:py:exc:`~earwigbot.exceptions.SiteAPIError`. If
:py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki
1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is
``True``, otherwise HTTP.

We'll encode the given params, adding ``format=json`` along the way, as
well as ``&assert=`` and ``&maxlag=`` based on
:py:attr:`self._assert_edit` and :py:attr:`_maxlag` respectively.
Additionally, we'll sleep a bit if the last query was made fewer than
:py:attr:`self._wait_between_queries` seconds ago. The request is made
through :py:attr:`self._opener`, which has cookie support
(:py:attr:`self._cookiejar`), a ``User-Agent``
(:py:const:`earwigbot.wiki.constants.USER_AGENT`), and
``Accept-Encoding`` set to ``"gzip"``.

Assuming everything went well, we'll gunzip the data (if compressed),
load it as a JSON object, and return it.

If our request failed for some reason, we'll raise
:py:exc:`~earwigbot.exceptions.SiteAPIError` with details. If that
reason was due to maxlag, we'll sleep for a bit and then repeat the
query until we exceed :py:attr:`self._max_retries`.

There is helpful MediaWiki API documentation at `MediaWiki.org
<http://www.mediawiki.org/wiki/API>`_.
"""
return self._api_query(kwargs)

@@ -562,34 +580,33 @@ class Site(object):
cursor_class=None, show_table=False):
"""Do an SQL query and yield its results.

If `plain_query` is True, we will force an unparameterized query.
Specifying both params and plain_query will cause an error.

If `dict_cursor` is True, we will use oursql.DictCursor as our cursor,
otherwise the default oursql.Cursor. If `cursor_class` is given, it
will override this option.

If `show_table` is True, the name of the table will be prepended to the
name of the column. This will mainly affect a DictCursor.

Example:
>>> query = "SELECT user_id, user_registration FROM user WHERE user_name = ?"
>>> params = ("The Earwig",)
>>> result1 = site.sql_query(query, params)
>>> result2 = site.sql_query(query, params, dict_cursor=True)
>>> for row in result1: print row
(7418060L, '20080703215134')
>>> for row in result2: print row
{'user_id': 7418060L, 'user_registration': '20080703215134'}

See _sql_connect() for information on how a connection is acquired.

<http://packages.python.org/oursql> has helpful documentation on the
oursql module.

This may raise SQLError() or one of oursql's exceptions
(oursql.ProgrammingError, oursql.InterfaceError, ...) if there were
problems with the query.
If *plain_query* is ``True``, we will force an unparameterized query.
Specifying both *params* and *plain_query* will cause an error. If
*dict_cursor* is ``True``, we will use :py:class:`oursql.DictCursor` as
our cursor, otherwise the default :py:class:`oursql.Cursor`. If
*cursor_class* is given, it will override this option. If *show_table*
is True, the name of the table will be prepended to the name of the
column. This will mainly affect an :py:class:`~oursql.DictCursor`.

Example usage::

>>> query = "SELECT user_id, user_registration FROM user WHERE user_name = ?"
>>> params = ("The Earwig",)
>>> result1 = site.sql_query(query, params)
>>> result2 = site.sql_query(query, params, dict_cursor=True)
>>> for row in result1: print row
(7418060L, '20080703215134')
>>> for row in result2: print row
{'user_id': 7418060L, 'user_registration': '20080703215134'}

This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
:py:exc:`oursql.InterfaceError`, ...) if there were problems with the
query.

See :py:meth:`_sql_connect` for information on how a connection is
acquired. Also relevant is `oursql's documentation
<http://packages.python.org/oursql>`_ for details on that package.
"""
if not cursor_class:
if dict_cursor:
@@ -608,11 +625,16 @@ class Site(object):

def get_replag(self):
"""Return the estimated database replication lag in seconds.
Requires SQL access. This function only makes sense on a replicated
database (e.g. the Wikimedia Toolserver) and on a wiki that receives a
large number of edits (ideally, at least one per second), or the result
may be larger than expected.
may be larger than expected, since it works by subtracting the current
time from the timestamp of the latest recent changes event.

This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
:py:exc:`oursql.InterfaceError`, ...) if there were problems.
"""
query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM
recentchanges ORDER BY rc_timestamp DESC LIMIT 1"""
@@ -622,14 +644,16 @@ class Site(object):
def namespace_id_to_name(self, ns_id, all=False):
"""Given a namespace ID, returns associated namespace names.

If all is False (default), we'll return the first name in the list,
which is usually the localized version. Otherwise, we'll return the
entire list, which includes the canonical name.
If *all* is ``False`` (default), we'll return the first name in the
list, which is usually the localized version. Otherwise, we'll return
the entire list, which includes the canonical name.

For example, returns u"Wikipedia" if ns_id=4 and all=False on enwiki;
returns [u"Wikipedia", u"Project", u"WP"] if ns_id=4 and all=True.
For example, this returns ``u"Wikipedia"`` if *ns_id* = ``4`` and
*all* = ``False`` on ``enwiki``; returns ``[u"Wikipedia", u"Project",
u"WP"]`` if *ns_id* = ``4`` and *all* is ``True``.

Raises NamespaceNotFoundError if the ID is not found.
Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the ID
is not found.
"""
try:
if all:
@@ -643,10 +667,11 @@ class Site(object):
def namespace_name_to_id(self, name):
"""Given a namespace name, returns the associated ID.

Like namespace_id_to_name(), but reversed. Case is ignored, because
namespaces are assumed to be case-insensitive.
Like :py:meth:`namespace_id_to_name`, but reversed. Case is ignored,
because namespaces are assumed to be case-insensitive.

Raises NamespaceNotFoundError if the name is not found.
Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the
name is not found.
"""
lname = name.lower()
for ns_id, names in self._namespaces.items():
@@ -658,14 +683,18 @@ class Site(object):
raise exceptions.NamespaceNotFoundError(e)

def get_page(self, title, follow_redirects=False):
"""Returns a Page object for the given title (pagename).
"""Return a :py:class:`Page` object for the given title.

Will return a Category object instead if the given title is in the
category namespace. As Category is a subclass of Page, this should not
cause problems.
*follow_redirects* is passed directly to
:py:class:`~earwigbot.wiki.page.Page`'s constructor. Also, this will
return a :py:class:`~earwigbot.wiki.category.Category` object instead
if the given title is in the category namespace. As
:py:class:`~earwigbot.wiki.category.Category` is a subclass of
:py:class:`~earwigbot.wiki.page.Page`, this should not cause problems.

Note that this doesn't do any direct checks for existence or
redirect-following - Page's methods provide that.
redirect-following: :py:class:`~earwigbot.wiki.page.Page`'s methods
provide that.
"""
prefixes = self.namespace_id_to_name(constants.NS_CATEGORY, all=True)
prefix = title.split(":", 1)[0]
@@ -675,20 +704,22 @@ class Site(object):
return Page(self, title, follow_redirects)

def get_category(self, catname, follow_redirects=False):
"""Returns a Category object for the given category name.
"""Return a :py:class:`Category` object for the given category name.

`catname` should be given *without* a namespace prefix. This method is
really just shorthand for get_page("Category:" + catname).
*catname* should be given *without* a namespace prefix. This method is
really just shorthand for :py:meth:`get_page("Category:" + catname)
<get_page>`.
"""
prefix = self.namespace_id_to_name(constants.NS_CATEGORY)
pagename = ':'.join((prefix, catname))
return Category(self, pagename, follow_redirects)

def get_user(self, username=None):
"""Returns a User object for the given username.
"""Return a :py:class:`User` object for the given username.

If `username` is left as None, then a User object representing the
currently logged-in (or anonymous!) user is returned.
If *username* is left as ``None``, then a
:py:class:`~earwigbot.wiki.user.User` object representing the currently
logged-in (or anonymous!) user is returned.
"""
if not username:
username = self._get_username()


+ 55
- 47
earwigbot/wiki/sitesdb.py Bestand weergeven

@@ -35,20 +35,23 @@ __all__ = ["SitesDB"]

class SitesDB(object):
"""
EarwigBot's Wiki Toolset: Sites Database Manager
**EarwigBot's Wiki Toolset: Sites Database Manager**

This class controls the sites.db file, which stores information about all
wiki sites known to the bot. Three public methods act as bridges between
the bot's config files and Site objects:
get_site -- returns a Site object corresponding to a given site name
add_site -- stores a site in the database, given connection info
remove_site -- removes a site from the database, given its name
This class controls the :file:`sites.db` file, which stores information
about all wiki sites known to the bot. Three public methods act as bridges
between the bot's config files and :py:class:`~earwigbot.wiki.site.Site`
objects:

- :py:meth:`get_site`: returns a Site object corresponding to a site
- :py:meth:`add_site`: stores a site in the database
- :py:meth:`remove_site`: removes a site from the database

There's usually no need to use this class directly. All public methods
here are available as bot.wiki.get_site(), bot.wiki.add_site(), and
bot.wiki.remove_site(), which use a sites.db file located in the same
directory as our config.yml file. Lower-level access can be achieved
by importing the manager class (`from earwigbot.wiki import SitesDB`).
here are available as :py:meth:`bot.wiki.get_site`,
:py:meth:`bot.wiki.add_site`, and :py:meth:`bot.wiki.remove_site`, which
use a :file:`sites.db` file located in the same directory as our
:file:`config.yml` file. Lower-level access can be achieved by importing
the manager class (``from earwigbot.wiki import SitesDB``).
"""

def __init__(self, bot):
@@ -157,7 +160,7 @@ class SitesDB(object):
namespaces)

def _make_site_object(self, name):
"""Return a Site object associated with the site 'name' in our sitesdb.
"""Return a Site object associated with the site *name* in our sitesdb.

This calls _load_site_from_sitesdb(), so SiteNotFoundError will be
raised if the site is not in our sitesdb.
@@ -210,8 +213,8 @@ class SitesDB(object):
namespaces are extracted from the site and inserted into the sites
database. If the sitesdb doesn't exist, we'll create it first.
"""
name = site.name()
sites_data = (name, site.project(), site.lang(), site._base_url,
name = site.name
sites_data = (name, site.project, site.lang, site._base_url,
site._article_path, site._script_path)
sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()]
ns_data = []
@@ -255,24 +258,25 @@ class SitesDB(object):
"""Return a Site instance based on information from the sitesdb.

With no arguments, return the default site as specified by our config
file. This is config.wiki["defaultSite"].
file. This is ``config.wiki["defaultSite"]``.

With 'name' specified, return the site with that name. This is
equivalent to the site's 'wikiid' in the API, like 'enwiki'.
With *name* specified, return the site with that name. This is
equivalent to the site's ``wikiid`` in the API, like *enwiki*.

With 'project' and 'lang' specified, return the site whose project and
With *project* and *lang* specified, return the site whose project and
language match these values. If there are multiple sites with the same
values (unlikely), this is not a reliable way of loading a site. Call
the function with an explicit 'name' in that case.
the function with an explicit *name* in that case.

We will attempt to login to the site automatically using
config.wiki["username"] and config.wiki["password"] if both are
``config.wiki["username"]`` and ``config.wiki["password"]`` if both are
defined.

Specifying a project without a lang or a lang without a project will
raise TypeError. If all three args are specified, 'name' will be first
tried, then 'project' and 'lang' if 'name' doesn't work. If a site
cannot be found in the sitesdb, SiteNotFoundError will be raised. An
raise :py:exc:`TypeError`. If all three args are specified, *name* will
be first tried, then *project* and *lang* if *name* doesn't work. If a
site cannot be found in the sitesdb,
:py:exc:`~earwigbot.exceptions.SiteNotFoundError` will be raised. An
empty sitesdb will be created if none is found.
"""
# Someone specified a project without a lang, or vice versa:
@@ -311,23 +315,27 @@ class SitesDB(object):
script_path="/w", sql=None):
"""Add a site to the sitesdb so it can be retrieved with get_site().

If only a project and a lang are given, we'll guess the base_url as
"//{lang}.{project}.org" (which is protocol-relative, becoming 'https'
if 'useHTTPS' is True in config otherwise 'http'). If this is wrong,
provide the correct base_url as an argument (in which case project and
lang are ignored). Most wikis use "/w" as the script path (meaning the
API is located at "{base_url}{script_path}/api.php" ->
"//{lang}.{project}.org/w/api.php"), so this is the default. If your
wiki is different, provide the script_path as an argument. The only
other argument to Site() that we can't get from config files or by
querying the wiki itself is SQL connection info, so provide a dict of
kwargs as `sql` and Site will pass it to oursql.connect(**sql),
allowing you to make queries with site.sql_query().

Returns True if the site was added successfully or False if the site is
already in our sitesdb (this can be done purposefully to update old
site info). Raises SiteNotFoundError if not enough information has
been provided to identify the site (e.g. a project but not a lang).
If only a project and a lang are given, we'll guess the *base_url* as
``"//{lang}.{project}.org"`` (which is protocol-relative, becoming
``"https"`` if *useHTTPS* is ``True`` in config otherwise ``"http"``).
If this is wrong, provide the correct *base_url* as an argument (in
which case project and lang are ignored). Most wikis use ``"/w"`` as
the script path (meaning the API is located at
``"{base_url}{script_path}/api.php"`` ->
``"//{lang}.{project}.org/w/api.php"``), so this is the default. If
your wiki is different, provide the script_path as an argument. The
only other argument to :py:class:`~earwigbot.wiki.site.Site` that we
can't get from config files or by querying the wiki itself is SQL
connection info, so provide a dict of kwargs as *sql* and Site will
pass it to :py:func:`oursql.connect(**sql) <oursql.connect>`, allowing
you to make queries with :py:meth:`site.sql_query
<earwigbot.wiki.site.Site.sql_query>`.

Returns ``True`` if the site was added successfully or ``False`` if the
site is already in our sitesdb (this can be done purposefully to update
old site info). Raises :py:exc:`~earwigbot.exception.SiteNotFoundError`
if not enough information has been provided to identify the site (e.g.
a *project* but not a *lang*).
"""
if not base_url:
if not project or not lang:
@@ -353,18 +361,18 @@ class SitesDB(object):
search_config=search_config)

self._add_site_to_sitesdb(site)
self._sites[site.name()] = site
self._sites[site.name] = site
return site

def remove_site(self, name=None, project=None, lang=None):
"""Remove a site from the sitesdb.

Returns True if the site was removed successfully or False if the site
was not in our sitesdb originally. If all three args (name, project,
and lang) are given, we'll first try 'name' and then try the latter two
if 'name' wasn't found in the database. Raises TypeError if a project
was given but not a language, or vice versa. Will create an empty
sitesdb if none was found.
Returns ``True`` if the site was removed successfully or ``False`` if
the site was not in our sitesdb originally. If all three args (*name*,
*project*, and *lang*) are given, we'll first try *name* and then try
the latter two if *name* wasn't found in the database. Raises
:py:exc:`TypeError` if a project was given but not a language, or vice
versa. Will create an empty sitesdb if none was found.
"""
# Someone specified a project without a lang, or vice versa:
if (project and not lang) or (not project and lang):


+ 104
- 87
earwigbot/wiki/user.py Bestand weergeven

@@ -30,28 +30,33 @@ __all__ = ["User"]

class User(object):
"""
EarwigBot's Wiki Toolset: User Class

Represents a User on a given Site. Has methods for getting a bunch of
information about the user, such as editcount and user rights, methods for
returning the user's userpage and talkpage, etc.

Attributes:
name -- the user's username
exists -- True if the user exists, or False if they do not
userid -- an integer ID representing the user
blockinfo -- information about any current blocks on the user
groups -- a list of the user's groups
rights -- a list of the user's rights
editcount -- the number of edits made by the user
registration -- the time the user registered as a time.struct_time
emailable -- True if you can email the user, False if you cannot
gender -- the user's gender ("male", "female", or "unknown")

Public methods:
reload -- forcibly reload the user's attributes
get_userpage -- returns a Page object representing the user's userpage
get_talkpage -- returns a Page object representing the user's talkpage
**EarwigBot's Wiki Toolset: User Class**

Represents a user on a given :py:class:`~earwigbot.wiki.site.Site`. Has
methods for getting a bunch of information about the user, such as
editcount and user rights, methods for returning the user's userpage and
talkpage, etc.

*Attributes:*

- :py:attr:`name`: the user's username
- :py:attr:`exists`: ``True`` if the user exists, else ``False``
- :py:attr:`userid`: an integer ID representing the user
- :py:attr:`blockinfo`: information about any current blocks on the user
- :py:attr:`groups`: a list of the user's groups
- :py:attr:`rights`: a list of the user's rights
- :py:attr:`editcount`: the number of edits made by the user
- :py:attr:`registration`: the time the user registered
- :py:attr:`emailable`: ``True`` if you can email the user, or ``False``
- :py:attr:`gender`: the user's gender ("male"/"female"/"unknown")

*Public methods:*

- :py:meth:`reload`: forcibly reloads the user's attributes
- :py:meth:`get_userpage`: returns a Page object representing the user's
userpage
- :py:meth:`get_talkpage`: returns a Page object representing the user's
talkpage
"""

def __init__(self, site, name):
@@ -71,26 +76,25 @@ class User(object):
self._name = name

def __repr__(self):
"""Returns the canonical string representation of the User."""
"""Return the canonical string representation of the User."""
return "User(name={0!r}, site={1!r})".format(self._name, self._site)

def __str__(self):
"""Returns a nice string representation of the User."""
return '<User "{0}" of {1}>'.format(self.name(), str(self._site))
"""Return a nice string representation of the User."""
return '<User "{0}" of {1}>'.format(self._name, str(self._site))

def _get_attribute(self, attr, force):
def _get_attribute(self, attr):
"""Internally used to get an attribute by name.

We'll call _load_attributes() to get this (and all other attributes)
from the API if it is not already defined. If `force` is True, we'll
re-load them even if they've already been loaded.
from the API if it is not already defined.

Raises UserNotFoundError if a nonexistant user prevents us from
returning a certain attribute.
"""
if not hasattr(self, attr) or force:
if not hasattr(self, attr):
self._load_attributes()
if self._exists is False:
if not self._exists:
e = "User '{0}' does not exist.".format(self._name)
raise UserNotFoundError(e)
return getattr(self, attr)
@@ -150,105 +154,118 @@ class User(object):

self._gender = res["gender"]

def name(self, force=False):
"""Returns the user's name.
@property
def name(self):
"""The user's username.

If `force` is True, we will load the name from the API and return that.
This could potentially return a "normalized" version of the name - for
example, without a "User:" prefix or without underscores. Unlike other
attribute getters, this will never make an API query without `force`.

Note that if another attribute getter, like exists(), has already been
called, then the username has already been normalized.
This will never make an API query on its own, but if one has already
been made by the time this is retrieved, the username may have been
"normalized" from the original input to the constructor, converted into
a Unicode object, with underscores removed, etc.
"""
if force:
self._load_attributes()
return self._name

def exists(self, force=False):
"""Returns True if the user exists, or False if they do not.
@property
def exists(self):
"""``True`` if the user exists, or ``False`` if they do not.

Makes an API query if `force` is True or if we haven't made one
already.
Makes an API query only if we haven't made one already.
"""
if not hasattr(self, "_exists") or force:
if not hasattr(self, "_exists"):
self._load_attributes()
return self._exists

def userid(self, force=False):
"""Returns an integer ID used by MediaWiki to represent the user.
@property
def userid(self):
"""An integer ID used by MediaWiki to represent the user.

Raises UserNotFoundError if the user does not exist. Makes an API query
if `force` is True or if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_userid", force)
return self._get_attribute("_userid")

def blockinfo(self, force=False):
"""Returns information about a current block on the user.
@property
def blockinfo(self):
"""Information about any current blocks on the user.

If the user is not blocked, returns False. If they are, returns a dict
with three keys: "by" is the blocker's username, "reason" is the reason
why they were blocked, and "expiry" is when the block expires.
If the user is not blocked, returns ``False``. If they are, returns a
dict with three keys: ``"by"`` is the blocker's username, ``"reason"``
is the reason why they were blocked, and ``"expiry"`` is when the block
expires.

Raises UserNotFoundError if the user does not exist. Makes an API query
if `force` is True or if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_blockinfo", force)

def groups(self, force=False):
"""Returns a list of groups this user is in, including "*".
@property
def groups(self):
"""A list of groups this user is in, including ``"*"``.

Raises UserNotFoundError if the user does not exist. Makes an API query
if `force` is True or if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_groups", force)

def rights(self, force=False):
"""Returns a list of this user's rights.
@property
def rights(self):
"""A list of this user's rights.

Raises UserNotFoundError if the user does not exist. Makes an API query
if `force` is True or if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_rights", force)

def editcount(self, force=False):
@property
def editcount(self):
"""Returns the number of edits made by the user.

Raises UserNotFoundError if the user does not exist. Makes an API query
if `force` is True or if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_editcount", force)

def registration(self, force=False):
"""Returns the time the user registered as a time.struct_time object.
@property
def registration(self):
"""The time the user registered as a :py:class:`time.struct_time`.

Raises UserNotFoundError if the user does not exist. Makes an API query
if `force` is True or if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_registration", force)

def emailable(self, force=False):
"""Returns True if the user can be emailed, or False if they cannot.
@property
def emailable(self):
"""``True`` if the user can be emailed, or ``False`` if they cannot.

Raises UserNotFoundError if the user does not exist. Makes an API query
if `force` is True or if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_emailable", force)

def gender(self, force=False):
"""Returns the user's gender.
@property
def gender(self):
"""The user's gender.

Can return either "male", "female", or "unknown", if they did not
specify it.
Can return either ``"male"``, ``"female"``, or ``"unknown"``, if they
did not specify it.

Raises UserNotFoundError if the user does not exist. Makes an API query
if `force` is True or if we haven't made one already.
Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user
does not exist. Makes an API query only if we haven't made one already.
"""
return self._get_attribute("_gender", force)

def reload(self):
"""Forcibly reload the user's attributes.

Emphasis on *reload*: this is only necessary if there is reason to
believe they have changed.
"""
self._load_attributes()

def get_userpage(self):
"""Returns a Page object representing the user's userpage.
"""Return a Page object representing the user's userpage.
No checks are made to see if it exists or not. Proper site namespace
conventions are followed.
"""
@@ -257,8 +274,8 @@ class User(object):
return Page(self._site, pagename)

def get_talkpage(self):
"""Returns a Page object representing the user's talkpage.
"""Return a Page object representing the user's talkpage.
No checks are made to see if it exists or not. Proper site namespace
conventions are followed.
"""


Laden…
Annuleren
Opslaan