diff --git a/docs/api/earwigbot.wiki.rst b/docs/api/earwigbot.wiki.rst index 9e7a5ad..806b3eb 100644 --- a/docs/api/earwigbot.wiki.rst +++ b/docs/api/earwigbot.wiki.rst @@ -7,7 +7,6 @@ wiki Package .. automodule:: earwigbot.wiki :members: :undoc-members: - :show-inheritance: :mod:`category` Module ---------------------- @@ -15,7 +14,6 @@ wiki Package .. automodule:: earwigbot.wiki.category :members: :undoc-members: - :show-inheritance: :mod:`constants` Module ----------------------- @@ -23,7 +21,6 @@ wiki Package .. automodule:: earwigbot.wiki.constants :members: :undoc-members: - :show-inheritance: :mod:`copyright` Module ----------------------- @@ -31,7 +28,6 @@ wiki Package .. automodule:: earwigbot.wiki.copyright :members: :undoc-members: - :show-inheritance: :mod:`page` Module ------------------ @@ -47,7 +43,6 @@ wiki Package .. automodule:: earwigbot.wiki.site :members: :undoc-members: - :show-inheritance: :mod:`sitesdb` Module --------------------- @@ -55,7 +50,6 @@ wiki Package .. automodule:: earwigbot.wiki.sitesdb :members: :undoc-members: - :show-inheritance: :mod:`user` Module ------------------ @@ -63,5 +57,3 @@ wiki Package .. automodule:: earwigbot.wiki.user :members: :undoc-members: - :show-inheritance: - diff --git a/docs/toolset.rst b/docs/toolset.rst index cae1beb..b8a4124 100644 --- a/docs/toolset.rst +++ b/docs/toolset.rst @@ -117,6 +117,8 @@ Create :py:class:`earwigbot.wiki.Page ` objects with :py:meth:`user.get_talkpage() `. They provide the following attributes: +- :py:attr:`~earwigbot.wiki.page.Page.site`: the page's corresponding + :py:class:`~earwigbot.wiki.site.Site` object - :py:attr:`~earwigbot.wiki.page.Page.title`: the page's title, or pagename - :py:attr:`~earwigbot.wiki.page.Page.exists`: whether the page exists - :py:attr:`~earwigbot.wiki.page.Page.pageid`: an integer ID representing the @@ -133,7 +135,7 @@ provide the following attributes: and the following methods: -- :py:meth:`~earwigbot.wiki.page.Page.reload`: forcibly reload the page's +- :py:meth:`~earwigbot.wiki.page.Page.reload`: forcibly reloads the page's attributes (emphasis on *reload* - this is only necessary if there is reason to believe they have changed) - :py:meth:`toggle_talk(...) `: returns a @@ -198,7 +200,7 @@ provide the following attributes: and the following methods: -- :py:meth:`~earwigbot.wiki.user.User.reload`: forcibly reload the user's +- :py:meth:`~earwigbot.wiki.user.User.reload`: forcibly reloads the user's attributes (emphasis on *reload* - this is only necessary if there is reason to believe they have changed) - :py:meth:`~earwigbot.wiki.user.User.get_userpage`: returns a diff --git a/earwigbot/commands/afc_report.py b/earwigbot/commands/afc_report.py index dfb612b..6c3348d 100644 --- a/earwigbot/commands/afc_report.py +++ b/earwigbot/commands/afc_report.py @@ -70,16 +70,16 @@ class Command(BaseCommand): def get_page(self, title): page = self.site.get_page(title, follow_redirects=False) - if page.exists()[0]: + if page.exists[0]: return page def report(self, page): - url = page.url().replace("en.wikipedia.org/wiki", "enwp.org") - short = self.statistics.get_short_title(page.title()) + url = page.url.replace("en.wikipedia.org/wiki", "enwp.org") + short = self.statistics.get_short_title(page.title) status = self.get_status(page) user = self.site.get_user(page.creator()) - user_name = user.name() - user_url = user.get_talkpage().url() + user_name = user.name + user_url = user.get_talkpage().url msg1 = "AfC submission report for \x0302{0}\x0301 ({1}):" msg2 = "Status: \x0303{0}\x0301" @@ -92,9 +92,9 @@ class Command(BaseCommand): self.say(self.data.chan, msg3.format(user_name, user_url)) def get_status(self, page): - if page.is_redirect(): + if page.is_redirect: target = page.get_redirect_target() - if self.site.get_page(target).namespace() == wiki.NS_MAIN: + if self.site.get_page(target).namespace == wiki.NS_MAIN: return "accepted" return "redirect" diff --git a/earwigbot/commands/editcount.py b/earwigbot/commands/editcount.py index 237d18d..13dad27 100644 --- a/earwigbot/commands/editcount.py +++ b/earwigbot/commands/editcount.py @@ -45,13 +45,13 @@ class Command(BaseCommand): user = site.get_user(name) try: - count = user.editcount() + count = user.editcount except wiki.UserNotFoundError: msg = "the user \x0302{0}\x0301 does not exist." self.reply(data, msg.format(name)) return - safe = quote_plus(user.name()) + safe = quote_plus(user.name) url = "http://toolserver.org/~tparis/pcount/index.php?name={0}&lang=en&wiki=wikipedia" msg = "\x0302{0}\x0301 has {1} edits ({2})." self.reply(data, msg.format(name, count, url.format(safe))) diff --git a/earwigbot/commands/registration.py b/earwigbot/commands/registration.py index a36bcab..913ca33 100644 --- a/earwigbot/commands/registration.py +++ b/earwigbot/commands/registration.py @@ -45,7 +45,7 @@ class Command(BaseCommand): user = site.get_user(name) try: - reg = user.registration() + reg = user.registration except wiki.UserNotFoundError: msg = "the user \x0302{0}\x0301 does not exist." self.reply(data, msg.format(name)) @@ -54,14 +54,13 @@ class Command(BaseCommand): date = time.strftime("%b %d, %Y at %H:%M:%S UTC", reg) age = self.get_diff(time.mktime(reg), time.mktime(time.gmtime())) - g = user.gender() - if g == "male": + if user.gender == "male": gender = "He's" - elif g == "female": + elif user.gender == "female": gender = "She's" else: gender = "They're" - + msg = "\x0302{0}\x0301 registered on {1}. {2} {3} old." self.reply(data, msg.format(name, date, gender, age)) diff --git a/earwigbot/commands/rights.py b/earwigbot/commands/rights.py index 10c5137..1ccb1ca 100644 --- a/earwigbot/commands/rights.py +++ b/earwigbot/commands/rights.py @@ -43,7 +43,7 @@ class Command(BaseCommand): user = site.get_user(name) try: - rights = user.groups() + rights = user.groups except wiki.UserNotFoundError: msg = "the user \x0302{0}\x0301 does not exist." self.reply(data, msg.format(name)) diff --git a/earwigbot/tasks/__init__.py b/earwigbot/tasks/__init__.py index d830f1e..ba604b0 100644 --- a/earwigbot/tasks/__init__.py +++ b/earwigbot/tasks/__init__.py @@ -116,7 +116,7 @@ class BaseTask(object): except KeyError: return False title = cfg.get("page", "User:$1/Shutoff/Task $2") - username = site.get_user().name() + username = site.get_user().name title = title.replace("$1", username).replace("$2", str(self.number)) page = site.get_page(title) diff --git a/earwigbot/tasks/afc_copyvios.py b/earwigbot/tasks/afc_copyvios.py index 2e651aa..5035ab7 100644 --- a/earwigbot/tasks/afc_copyvios.py +++ b/earwigbot/tasks/afc_copyvios.py @@ -70,13 +70,13 @@ class Task(BaseTask): def process(self, page): """Detect copyvios in 'page' and add a note if any are found.""" - title = page.title() + title = page.title if title in self.ignore_list: msg = "Skipping page in ignore list: [[{0}]]" self.logger.info(msg.format(title)) return - pageid = page.pageid() + pageid = page.pageid if self.has_been_processed(pageid): msg = "Skipping check on already processed page [[{0}]]" self.logger.info(msg.format(title)) @@ -143,7 +143,7 @@ class Task(BaseTask): This will only be called if "cache_results" == True in the task's config, which is False by default. """ - pageid = page.pageid() + pageid = page.pageid hash = sha256(page.get()).hexdigest() query1 = "SELECT 1 FROM cache WHERE cache_id = ?" query2 = "DELETE FROM cache WHERE cache_id = ?" diff --git a/earwigbot/tasks/afc_history.py b/earwigbot/tasks/afc_history.py index dffdb70..2e95b67 100644 --- a/earwigbot/tasks/afc_history.py +++ b/earwigbot/tasks/afc_history.py @@ -93,7 +93,7 @@ class Task(BaseTask): generator = self.backwards_cat_iterator() for d in xrange(num_days): category = generator.next() - date = category.title().split("/")[-1] + date = category.title.split("/")[-1] self.update_date(date, category) sleep(10) self.logger.info("Update complete") @@ -104,7 +104,7 @@ class Task(BaseTask): generator = self.backwards_cat_iterator() for d in xrange(num_days): category = generator.next() - date = category.title().split("/")[-1] + date = category.title.split("/")[-1] data[date] = self.get_date_counts(date) data = OrderedDict(reversed(data.items())) # Oldest to most recent @@ -123,7 +123,7 @@ class Task(BaseTask): current -= timedelta(1) # Subtract one day from date def update_date(self, date, category): - msg = "Updating {0} ([[{1}]])".format(date, category.title()) + msg = "Updating {0} ([[{1}]])".format(date, category.title) self.logger.debug(msg) q_select = "SELECT page_date, page_status FROM page WHERE page_id = ?" @@ -153,7 +153,7 @@ class Task(BaseTask): def get_status(self, title, pageid): page = self.site.get_page(title) - ns = page.namespace() + ns = page.namespace if ns == wiki.NS_FILE_TALK: # Ignore accepted FFU requests return self.STATUS_NONE @@ -161,7 +161,7 @@ class Task(BaseTask): if ns == wiki.NS_TALK: new_page = page.toggle_talk() sleep(2) - if new_page.is_redirect(): + if new_page.is_redirect: return self.STATUS_NONE # Ignore accepted AFC/R requests return self.STATUS_ACCEPT diff --git a/earwigbot/tasks/afc_statistics.py b/earwigbot/tasks/afc_statistics.py index b177734..e29913c 100644 --- a/earwigbot/tasks/afc_statistics.py +++ b/earwigbot/tasks/afc_statistics.py @@ -129,7 +129,7 @@ class Task(BaseTask): "~~~ at ~~~~~", newtext) page.edit(newtext, summary, minor=True, bot=True) - self.logger.info(u"Chart saved to [[{0}]]".format(page.title())) + self.logger.info(u"Chart saved to [[{0}]]".format(page.title)) def compile_charts(self): """Compile and return all statistics information from our local db.""" @@ -332,7 +332,7 @@ class Task(BaseTask): self.logger.error(msg) return - namespace = self.site.get_page(title).namespace() + namespace = self.site.get_page(title).namespace status, chart = self.get_status_and_chart(content, namespace) if chart == self.CHART_NONE: msg = u"Could not find a status for [[{0}]]".format(title) @@ -364,7 +364,7 @@ class Task(BaseTask): self.logger.error(msg) return - namespace = self.site.get_page(title).namespace() + namespace = self.site.get_page(title).namespace status, chart = self.get_status_and_chart(content, namespace) if chart == self.CHART_NONE: self.untrack_page(cursor, pageid) @@ -718,7 +718,7 @@ class Task(BaseTask): if chart in [self.CHART_PEND, self.CHART_DRAFT] and s_user: submitter = self.site.get_user(s_user) try: - if submitter.blockinfo(): + if submitter.blockinfo: notes += "|nb=1" # Submitter is blocked except wiki.UserNotFoundError: # Likely an IP pass diff --git a/earwigbot/wiki/__init__.py b/earwigbot/wiki/__init__.py index 9765239..16bf7e2 100644 --- a/earwigbot/wiki/__init__.py +++ b/earwigbot/wiki/__init__.py @@ -21,18 +21,26 @@ # SOFTWARE. """ -EarwigBot's Wiki Toolset +**EarwigBot's Wiki Toolset** This is a collection of classes and functions to read from and write to -Wikipedia and other wiki sites. No connection whatsoever to python-wikitools -written by Mr.Z-man, other than a similar purpose. We share no code. +Wikipedia and other wiki sites. No connection whatsoever to `python-wikitools +`_ written by `Mr.Z-man +`_, other than a similar purpose. +We share no code. -Import the toolset directly with `from earwigbot import wiki`. If using the -built-in integration with the rest of the bot, Bot() objects contain a `wiki` -attribute, which is a SitesDB object tied to the sites.db file located in the -same directory as config.yml. That object has the principal methods get_site, -add_site, and remove_site that should handle all of your Site (and thus, Page, -Category, and User) needs. +Import the toolset directly with ``from earwigbot import wiki``. If using the +built-in integration with the rest of the bot, :py:class:`~earwigbot.bot.Bot` +objects contain a :py:attr:`~earwigbot.bot.Bot.wiki` attribute, which is a +:py:class:`~earwigbot.wiki.sitesdb.SitesDB` object tied to the :file:`sites.db` +file located in the same directory as :file:`config.yml`. That object has the +principal methods :py:meth:`~earwigbot.wiki.sitesdb.SitesDB.get_site`, +:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.add_site`, and +:py:meth:`~earwigbot.wiki.sitesdb.SitesDB.remove_site` that should handle all +of your :py:class:`~earwigbot.wiki.site.Site` (and thus, +:py:class:`~earwigbot.wiki.page.Page`, +:py:class:`~earwigbot.wiki.category.Category`, and +:py:class:`~earwigbot.wiki.user.User`) needs. """ from earwigbot.wiki.category import * diff --git a/earwigbot/wiki/category.py b/earwigbot/wiki/category.py index e0209b2..27dd999 100644 --- a/earwigbot/wiki/category.py +++ b/earwigbot/wiki/category.py @@ -26,33 +26,37 @@ __all__ = ["Category"] class Category(Page): """ - EarwigBot's Wiki Toolset: Category Class + **EarwigBot's Wiki Toolset: Category Class** - Represents a Category on a given Site, a subclass of Page. Provides - additional methods, but Page's own methods should work fine on Category - objects. Site.get_page() will return a Category instead of a Page if the - given title is in the category namespace; get_category() is shorthand, - because it accepts category names without the namespace prefix. + Represents a category on a given :py:class:`~earwigbot.wiki.site.Site`, a + subclass of :py:class:`~earwigbot.wiki.page.Page`. Provides additional + methods, but :py:class:`~earwigbot.wiki.page.Page`'s own methods should + work fine on :py:class:`Category` objects. :py:meth:`site.get_page() + ` will return a :py:class:`Category` + instead of a :py:class:`~earwigbot.wiki.page.Page` if the given title is in + the category namespace; :py:meth:`~earwigbot.wiki.site.Site.get_category` + is shorthand, accepting category names without the namespace prefix. - Public methods: - get_members -- returns a list of page titles in the category + *Public methods:* + + - :py:meth:`get_members`: returns a list of page titles in the category """ def __repr__(self): - """Returns the canonical string representation of the Category.""" + """Return the canonical string representation of the Category.""" res = "Category(title={0!r}, follow_redirects={1!r}, site={2!r})" return res.format(self._title, self._follow_redirects, self._site) def __str__(self): - """Returns a nice string representation of the Category.""" - return ''.format(self.title(), str(self._site)) + """Return a nice string representation of the Category.""" + return ''.format(self.title, str(self._site)) def _get_members_via_sql(self, limit): """Return a list of tuples of (title, pageid) in the category.""" query = """SELECT page_title, page_namespace, page_id FROM page JOIN categorylinks ON page_id = cl_from WHERE cl_to = ?""" - title = self.title().replace(" ", "_").split(":", 1)[1] + title = self.title.replace(" ", "_").split(":", 1)[1] if limit: query += " LIMIT ?" @@ -83,16 +87,17 @@ class Category(Page): return [member["title"] for member in members] def get_members(self, use_sql=False, limit=None): - """Returns a list of page titles in the category. + """Return a list of page titles in the category. - If `use_sql` is True, we will use a SQL query instead of the API. Pages - will be returned as tuples of (title, pageid) instead of just titles. + If *use_sql* is ``True``, we will use a SQL query instead of the API. + Pages will be returned as tuples of ``(title, pageid)`` instead of just + titles. - If `limit` is provided, we will provide this many titles, or less if - the category is smaller. `limit` defaults to 50 for API queries; normal + If *limit* is provided, we will provide this many titles, or less if + the category is smaller. It defaults to 50 for API queries; normal users can go up to 500, and bots can go up to 5,000 on a single API - query. If we're using SQL, the limit is None by default (returning all - pages in the category), but an arbitrary limit can still be chosen. + query. If we're using SQL, the limit is ``None`` by default (returning + all pages in the category), but an arbitrary limit can still be chosen. """ if use_sql: return self._get_members_via_sql(limit) diff --git a/earwigbot/wiki/constants.py b/earwigbot/wiki/constants.py index 7ccb136..13a1efd 100644 --- a/earwigbot/wiki/constants.py +++ b/earwigbot/wiki/constants.py @@ -21,15 +21,16 @@ # SOFTWARE. """ -EarwigBot's Wiki Toolset: Constants +**EarwigBot's Wiki Toolset: Constants** This module defines some useful constants: -* USER_AGENT - our default User Agent when making API queries -* NS_* - default namespace IDs for easy lookup -Import directly with `from earwigbot.wiki import constants` or -`from earwigbot.wiki.constants import *`. These are also available from -earwigbot.wiki (e.g. `earwigbot.wiki.USER_AGENT`). +- :py:const:`USER_AGENT`: our default User Agent when making API queries +- :py:const:`NS_*`: default namespace IDs for easy lookup + +Import directly with ``from earwigbot.wiki import constants`` or +``from earwigbot.wiki.constants import *``. These are also available from +:py:mod:`earwigbot.wiki` directly (e.g. ``earwigbot.wiki.USER_AGENT``). """ # Default User Agent when making API queries: diff --git a/earwigbot/wiki/page.py b/earwigbot/wiki/page.py index 1c7ab0f..f6ea257 100644 --- a/earwigbot/wiki/page.py +++ b/earwigbot/wiki/page.py @@ -32,32 +32,42 @@ __all__ = ["Page"] class Page(CopyrightMixin): """ - EarwigBot's Wiki Toolset: Page Class - - Represents a Page on a given Site. Has methods for getting information - about the page, getting page content, and so on. Category is a subclass of - Page with additional methods. - - Attributes: - title -- the page's title, or pagename - exists -- whether the page exists - pageid -- an integer ID representing the page - url -- the page's URL - namespace -- the page's namespace as an integer - protection -- the page's current protection status - is_talkpage -- True if the page is a talkpage, else False - is_redirect -- True if the page is a redirect, else False - - Public methods: - reload -- forcibly reload the page's attributes - toggle_talk -- returns a content page's talk page, or vice versa - get -- returns page content - get_redirect_target -- if the page is a redirect, returns its destination - get_creator -- returns a User object representing the first person - to edit the page - edit -- replaces the page's content or creates a new page - add_section -- adds a new section at the bottom of the page - copyvio_check -- checks the page for copyright violations + **EarwigBot's Wiki Toolset: Page Class** + + Represents a page on a given :py:class:`~earwigbot.wiki.site.Site`. Has + methods for getting information about the page, getting page content, and + so on. :py:class:`~earwigbot.wiki.category.Category` is a subclass of + :py:class:`Page` with additional methods. + + *Attributes:* + + - :py:attr:`site`: the page's corresponding Site object + - :py:attr:`title`: the page's title, or pagename + - :py:attr:`exists`: whether the page exists + - :py:attr:`pageid`: an integer ID representing the page + - :py:attr:`url`: the page's URL + - :py:attr:`namespace`: the page's namespace as an integer + - :py:attr:`protection`: the page's current protection status + - :py:attr:`is_talkpage`: ``True`` if this is a talkpage, else ``False`` + - :py:attr:`is_redirect`: ``True`` if this is a redirect, else ``False`` + + *Public methods:* + + - :py:meth:`reload`: forcibly reloads the page's attributes + - :py:meth:`toggle_talk`: returns a content page's talk page, or vice versa + - :py:meth:`get`: returns the page's content + - :py:meth:`get_redirect_target`: returns the page's destination if it is a + redirect + - :py:meth:`get_creator`: returns a User object representing the first + person to edit the page + - :py:meth:`edit`: replaces the page's content or creates a new page + - :py:meth:`add_section`: adds a new section at the bottom of the page + + - :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixin.copyvio_check`: + checks the page for copyright violations + - :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixin.copyvio_compare`: + checks the page for like :py:meth:`copyvio_check`, but against a specific + URL """ re_redirect = "^\s*\#\s*redirect\s*\[\[(.*?)\]\]" @@ -69,10 +79,10 @@ class Page(CopyrightMixin): and whether or not to follow redirects (optional, defaults to False). As with User, site.get_page() is preferred. Site's method has support - for a default `follow_redirects` value in our config, while __init__ + for a default *follow_redirects* value in our config, while __init__() always defaults to False. - __init__ will not do any API queries, but it will use basic namespace + __init__() will not do any API queries, but it will use basic namespace logic to determine our namespace ID and if we are a talkpage. """ super(Page, self).__init__(site) @@ -113,15 +123,15 @@ class Page(CopyrightMixin): self._is_talkpage = self._namespace % 2 == 1 def __repr__(self): - """Returns the canonical string representation of the Page.""" + """Return the canonical string representation of the Page.""" res = "Page(title={0!r}, follow_redirects={1!r}, site={2!r})" return res.format(self._title, self._follow_redirects, self._site) def __str__(self): - """Returns a nice string representation of the Page.""" - return ''.format(self.title(), str(self._site)) + """Return a nice string representation of the Page.""" + return ''.format(self.title, str(self._site)) - def _force_validity(self): + def _assert_validity(self): """Used to ensure that our page's title is valid. If this method is called when our page is not valid (and after @@ -134,20 +144,20 @@ class Page(CopyrightMixin): e = "Page '{0}' is invalid.".format(self._title) raise exceptions.InvalidPageError(e) - def _force_existence(self): + def _assert_existence(self): """Used to ensure that our page exists. If this method is called when our page doesn't exist (and after _load_attributes() has been called), PageNotFoundError will be raised. - It will also call _force_validity() beforehand. + It will also call _assert_validity() beforehand. """ - self._force_validity() + self._assert_validity() if self._exists == 2: e = "Page '{0}' does not exist.".format(self._title) raise exceptions.PageNotFoundError(e) - def _load_wrapper(self): - """Calls _load_attributes() and follows redirects if we're supposed to. + def _load(self): + """Call _load_attributes() and follows redirects if we're supposed to. This method will only follow redirects if follow_redirects=True was passed to __init__() (perhaps indirectly passed by site.get_page()). @@ -170,13 +180,13 @@ class Page(CopyrightMixin): self._load_attributes() def _load_attributes(self, result=None): - """Loads various data from the API in a single query. + """Load various data from the API in a single query. Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl, ._protection, ._namespace, ._is_talkpage, ._creator, ._lastrevid, ._token, and ._starttimestamp using the API. It will do a query of - its own unless `result` is provided, in which case we'll pretend - `result` is what the query returned. + its own unless *result* is provided, in which case we'll pretend + *result* is what the query returned. Assuming the API is sound, this should not raise any exceptions. """ @@ -235,14 +245,14 @@ class Page(CopyrightMixin): pass def _load_content(self, result=None): - """Loads current page content from the API. + """Load current page content from the API. - If `result` is provided, we'll pretend that is the result of an API + If *result* is provided, we'll pretend that is the result of an API query and try to get content from that. Otherwise, we'll do an API query on our own. - Don't call this directly, ever - use .get(force=True) if you want to - force content reloading. + Don't call this directly, ever; use reload() followed by get() if you + want to force content reloading. """ if not result: params = {"action": "query", "prop": "revisions", "rvlimit": 1, @@ -258,14 +268,14 @@ class Page(CopyrightMixin): # self._load_attributes(). In that case, some of our attributes are # outdated, so force another self._load_attributes(): self._load_attributes() - self._force_existence() + self._assert_existence() def _edit(self, params=None, text=None, summary=None, minor=None, bot=None, force=None, section=None, captcha_id=None, captcha_word=None, tries=0): """Edit the page! - If `params` is given, we'll use it as our API query parameters. + If *params* is given, we'll use it as our API query parameters. Otherwise, we'll build params using the given kwargs via _build_edit_params(). @@ -281,7 +291,7 @@ class Page(CopyrightMixin): raise exceptions.PermissionsError(e) # Weed out invalid pages before we get too far: - self._force_validity() + self._assert_validity() # Build our API query string: if not params: @@ -420,28 +430,33 @@ class Page(CopyrightMixin): e = "AssertEdit: assertion '{0}' failed.".format(assertion) raise exceptions.PermissionsError(e) - def title(self, force=False): - """Returns the Page's title, or pagename. + @property + def site(self): + """The Page's corresponding Site object.""" + return self._site - This won't do any API queries on its own unless force is True, in which - case the title will be forcibly reloaded from the API (normalizing it, - and following redirects if follow_redirects=True was passed to - __init__()). Any other methods that do API queries will reload title on - their own, however, like exists() and get(). + @property + def title(self): + """The Page's title, or "pagename". + + This won't do any API queries on its own. Any other attributes or + methods that do API queries will reload the title, however, like + :py:attr:`exists` and :py:meth:`get`, potentially "normalizing" it or + following redirects if :py:attr:`self._follow_redirects` is ``True``. """ - if force: - self._load_wrapper() return self._title - def exists(self, force=False): - """Returns information about whether the Page exists or not. + @property + def exists(self): + """Information about whether the Page exists or not. - The returned "information" is a tuple with two items. The first is a - bool, either True if the page exists or False if it does not. The - second is a string giving more information, either "invalid", (title - is invalid, e.g. it contains "["), "missing", or "exists". + The "information" is a tuple with two items. The first is a bool, + either ``True`` if the page exists or ``False`` if it does not. The + second is a string giving more information, either ``"invalid"``, + (title is invalid, e.g. it contains ``"["``), ``"missing"``, or + ``"exists"``. - Makes an API query if force is True or if we haven't already made one. + Makes an API query only if we haven't already made one. """ cases = { 0: (None, "unknown"), @@ -449,32 +464,33 @@ class Page(CopyrightMixin): 2: (False, "missing"), 3: (True, "exists"), } - if self._exists == 0 or force: - self._load_wrapper() + if self._exists == 0: + self._load() return cases[self._exists] - def pageid(self, force=False): - """Returns an integer ID representing the Page. + @property + def pageid(self): + """An integer ID representing the Page. - Makes an API query if force is True or if we haven't already made one. + Makes an API query only if we haven't already made one. - Raises InvalidPageError or PageNotFoundError if the page name is + Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or + :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is invalid or the page does not exist, respectively. """ - if self._exists == 0 or force: - self._load_wrapper() - self._force_existence() # missing pages do not have IDs + if self._exists == 0: + self._load() + self._assert_existence() # Missing pages do not have IDs return self._pageid - def url(self, force=False): - """Returns the page's URL. + @property + def url(self): + """The page's URL. - Like title(), this won't do any API queries on its own unless force is - True. If the API was never queried for this page, we will attempt to - determine the URL ourselves based on the title. + Like :py:meth:`title`, this won't do any API queries on its own. If the + API was never queried for this page, we will attempt to determine the + URL ourselves based on the title. """ - if force: - self._load_wrapper() if self._fullurl: return self._fullurl else: @@ -482,91 +498,80 @@ class Page(CopyrightMixin): path = self._site._article_path.replace("$1", slug) return ''.join((self._site._base_url, path)) - def namespace(self, force=False): - """Returns the page's namespace ID (an integer). + @property + def namespace(self): + """The page's namespace ID (an integer). - Like title(), this won't do any API queries on its own unless force is - True. If the API was never queried for this page, we will attempt to - determine the namespace ourselves based on the title. + Like :py:meth:`title`, this won't do any API queries on its own. If the + API was never queried for this page, we will attempt to determine the + namespace ourselves based on the title. """ - if force: - self._load_wrapper() return self._namespace - def protection(self, force=False): - """Returns the page's current protection status. + @property + def protection(self): + """The page's current protection status. - Makes an API query if force is True or if we haven't already made one. + Makes an API query only if we haven't already made one. - Raises InvalidPageError if the page name is invalid. Will not raise an - error if the page is missing because those can still be protected. + Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` if the page + name is invalid. Won't raise an error if the page is missing because + those can still be create-protected. """ - if self._exists == 0 or force: - self._load_wrapper() - self._force_validity() # invalid pages cannot be protected + if self._exists == 0: + self._load() + self._assert_validity() # Invalid pages cannot be protected return self._protection - def creator(self, force=False): - """Returns the page's creator (i.e., the first user to edit the page). - - Makes an API query if force is True or if we haven't already made one. - Normally, we can get the creator along with everything else (except - content) in self._load_attributes(). However, due to a limitation in - the API (can't get the editor of one revision and the content of - another at both ends of the history), if our other attributes were only - loaded from get(), we'll have to do another API query. This is done - by calling ourselves again with force=True. + @property + def is_talkpage(self): + """``True`` if the page is a talkpage, otherwise ``False``. - Raises InvalidPageError or PageNotFoundError if the page name is - invalid or the page does not exist, respectively. + Like :py:meth:`title`, this won't do any API queries on its own. If the + API was never queried for this page, we will attempt to determine + whether it is a talkpage ourselves based on its namespace. """ - if self._exists == 0 or force: - self._load_wrapper() - self._force_existence() - if not self._creator and not force: - self.creator(force=True) - return self._creator - - def is_talkpage(self, force=False): - """Returns True if the page is a talkpage, else False. - - Like title(), this won't do any API queries on its own unless force is - True. If the API was never queried for this page, we will attempt to - determine the talkpage status ourselves based on its namespace ID. - """ - if force: - self._load_wrapper() return self._is_talkpage - def is_redirect(self, force=False): - """Returns True if the page is a redirect, else False. + @property + def is_redirect(self): + """``True`` if the page is a redirect, otherwise ``False``. - Makes an API query if force is True or if we haven't already made one. + Makes an API query only if we haven't already made one. - We will return False even if the page does not exist or is invalid. + We will return ``False`` even if the page does not exist or is invalid. """ - if self._exists == 0 or force: - self._load_wrapper() + if self._exists == 0: + self._load() return self._is_redirect - def toggle_talk(self, force=False, follow_redirects=None): - """Returns a content page's talk page, or vice versa. + def reload(self): + """Forcibly reload the page's attributes. + + Emphasis on *reload*: this is only necessary if there is reason to + believe they have changed. + """ + self._load() + if self._content is not None: + # Only reload content if it has already been loaded: + self._load_content() + + def toggle_talk(self, follow_redirects=None): + """Return a content page's talk page, or vice versa. The title of the new page is determined by namespace logic, not API - queries. We won't make any API queries on our own unless force is True, - and the only reason then would be to forcibly update the title or - follow redirects if we haven't already made an API query. + queries. We won't make any API queries on our own. - If `follow_redirects` is anything other than None (the default), it - will be passed to the new Page's __init__(). Otherwise, we'll use the - value passed to our own __init__(). + If *follow_redirects* is anything other than ``None`` (the default), it + will be passed to the new :py:class:`~earwigbot.wiki.page.Page` + object's :py:meth:`__init__`. Otherwise, we'll use the value passed to + our own :py:meth:`__init__`. - Will raise InvalidPageError if we try to get the talk page of a special - page (in the Special: or Media: namespaces), but we won't raise an - exception if our page is otherwise missing or invalid. + Will raise :py:exc:`~earwigbot.exceptions.InvalidPageError` if we try + to get the talk page of a special page (in the ``Special:`` or + ``Media:`` namespaces), but we won't raise an exception if our page is + otherwise missing or invalid. """ - if force: - self._load_wrapper() if self._namespace < 0: ns = self._site.namespace_id_to_name(self._namespace) e = "Pages in the {0} namespace can't have talk pages.".format(ns) @@ -587,7 +592,7 @@ class Page(CopyrightMixin): # If the new page is in namespace 0, don't do ":Title" (it's correct, # but unnecessary), just do "Title": if new_prefix: - new_title = ':'.join((new_prefix, body)) + new_title = u":".join((new_prefix, body)) else: new_title = body @@ -595,17 +600,13 @@ class Page(CopyrightMixin): follow_redirects = self._follow_redirects return Page(self._site, new_title, follow_redirects) - def get(self, force=False): - """Returns page content, which is cached if you try to call get again. - - Use `force` to forcibly reload page content even if we've already - loaded some. This is good if you want to edit a page multiple times, - and you want to get updated content before you make your second edit. + def get(self): + """Return page content, which is cached if you try to call get again. Raises InvalidPageError or PageNotFoundError if the page name is invalid or the page does not exist, respectively. """ - if force or self._exists == 0: + if self._exists == 0: # Kill two birds with one stone by doing an API query for both our # attributes and our page content: params = {"action": "query", "rvlimit": 1, "titles": self._title, @@ -613,54 +614,75 @@ class Page(CopyrightMixin): "intoken": "edit", "rvprop": "content|timestamp"} result = self._site._api_query(params) self._load_attributes(result=result) - self._force_existence() + self._assert_existence() self._load_content(result=result) # Follow redirects if we're told to: if self._keep_following and self._is_redirect: self._title = self.get_redirect_target() - self._keep_following = False # don't follow double redirects - self._content = None # reset the content we just loaded - self.get(force=True) + self._keep_following = False # Don't follow double redirects + self._exists = 0 # Force another API query + self.get() return self._content # Make sure we're dealing with a real page here. This may be outdated # if the page was deleted since we last called self._load_attributes(), # but self._load_content() can handle that: - self._force_existence() + self._assert_existence() if self._content is None: self._load_content() return self._content - def get_redirect_target(self, force=False): - """If the page is a redirect, returns its destination. - - Use `force` to forcibly reload content even if we've already loaded - some before. Note that this method calls get() for page content. + def get_redirect_target(self): + """If the page is a redirect, return its destination. - Raises InvalidPageError or PageNotFoundError if the page name is - invalid or the page does not exist, respectively. Raises RedirectError - if the page is not a redirect. + Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or + :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is + invalid or the page does not exist, respectively. Raises + :py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a + redirect. """ - content = self.get(force) + content = self.get() try: return re.findall(self.re_redirect, content, flags=re.I)[0] except IndexError: e = "The page does not appear to have a redirect target." raise exceptions.RedirectError(e) + def get_creator(self): + """Return the User object for the first person to edit the page. + + Makes an API query only if we haven't already made one. Normally, we + can get the creator along with everything else (except content) in + :py:meth:`_load_attributes`. However, due to a limitation in the API + (can't get the editor of one revision and the content of another at + both ends of the history), if our other attributes were only loaded + through :py:meth:`get`, we'll have to do another API query. + + Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or + :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is + invalid or the page does not exist, respectively. + """ + if self._exists == 0: + self._load() + self._assert_existence() + if not self._creator: + self._load() + self._assert_existence() + return self._site.get_user(self._creator) + def edit(self, text, summary, minor=False, bot=True, force=False): - """Replaces the page's content or creates a new page. + """Replace the page's content or creates a new page. - `text` is the new page content, with `summary` as the edit summary. - If `minor` is True, the edit will be marked as minor. If `bot` is true, - the edit will be marked as a bot edit, but only if we actually have a - bot flag. + *text* is the new page content, with *summary* as the edit summary. + If *minor* is ``True``, the edit will be marked as minor. If *bot* is + ``True``, the edit will be marked as a bot edit, but only if we + actually have a bot flag. - Use `force` to push the new content even if there's an edit conflict or + Use *force* to push the new content even if there's an edit conflict or the page was deleted/recreated between getting our edit token and editing our page. Be careful with this! """ @@ -668,12 +690,12 @@ class Page(CopyrightMixin): force=force) def add_section(self, text, title, minor=False, bot=True, force=False): - """Adds a new section to the bottom of the page. + """Add a new section to the bottom of the page. - The arguments for this are the same as those for edit(), but instead of - providing a summary, you provide a section title. + The arguments for this are the same as those for :py:meth:`edit`, but + instead of providing a summary, you provide a section title. - Likewise, raised exceptions are the same as edit()'s. + Likewise, raised exceptions are the same as :py:meth:`edit`'s. This should create the page if it does not already exist, with just the new section as content. diff --git a/earwigbot/wiki/site.py b/earwigbot/wiki/site.py index f90a2cc..4b95e9c 100644 --- a/earwigbot/wiki/site.py +++ b/earwigbot/wiki/site.py @@ -48,29 +48,38 @@ __all__ = ["Site"] class Site(object): """ - EarwigBot's Wiki Toolset: Site Class - - Represents a Site, with support for API queries and returning Pages, Users, - and Categories. The constructor takes a bunch of arguments and you probably - won't need to call it directly, rather tools.get_site() for returning Site - instances, tools.add_site() for adding new ones to config, and - tools.del_site() for removing old ones from config, should suffice. - - Attributes: - name -- the site's name (or "wikiid"), like "enwiki" - project -- the site's project name, like "wikipedia" - lang -- the site's language code, like "en" - domain -- the site's web domain, like "en.wikipedia.org" - - Public methods: - api_query -- does an API query with the given kwargs as params - sql_query -- does an SQL query and yields its results - get_replag -- returns the estimated database replication lag - namespace_id_to_name -- given a namespace ID, returns associated name(s) - namespace_name_to_id -- given a namespace name, returns the associated ID - get_page -- returns a Page object for the given title - get_category -- returns a Category object for the given title - get_user -- returns a User object for the given username + **EarwigBot's Wiki Toolset: Site Class** + + Represents a site, with support for API queries and returning + :py:class:`~earwigbot.wiki.page.Page`, + :py:class:`~earwigbot.wiki.user.User`, + and :py:class:`~earwigbot.wiki.category.Category` objects. The constructor + takes a bunch of arguments and you probably won't need to call it directly, + rather :py:meth:`wiki.get_site() ` + for returning :py:class:`Site` + instances, :py:meth:`wiki.add_site() + ` for adding new ones to our + database, and :py:meth:`wiki.remove_site() + ` for removing old ones from + our database, should suffice. + + *Attributes:* + + - :py:attr:`name`: the site's name (or "wikiid"), like ``"enwiki"`` + - :py:attr:`project`: the site's project name, like ``"wikipedia"`` + - :py:attr:`lang`: the site's language code, like ``"en"`` + - :py:attr:`domain`: the site's web domain, like ``"en.wikipedia.org"`` + + *Public methods:* + + - :py:meth:`api_query`: does an API query with kwargs as params + - :py:meth:`sql_query`: does an SQL query and yields its results + - :py:meth:`get_replag`: estimates the database replication lag + - :py:meth:`namespace_id_to_name`: returns names associated with an NS id + - :py:meth:`namespace_name_to_id`: returns the ID associated with a NS name + - :py:meth:`get_page`: returns a Page for the given title + - :py:meth:`get_category`: returns a Category for the given title + - :py:meth:`get_user`: returns a User object for the given name """ def __init__(self, name=None, project=None, lang=None, base_url=None, @@ -83,11 +92,11 @@ class Site(object): This probably isn't necessary to call yourself unless you're building a Site that's not in your config and you don't want to add it - normally - all you need is tools.get_site(name), which creates the Site for you + all you need is wiki.get_site(name), which creates the Site for you based on your config file and the sites database. We accept a bunch of - kwargs, but the only ones you really "need" are `base_url` and - `script_path` - this is enough to figure out an API url. `login`, a - tuple of (username, password), is highly recommended. `cookiejar` will + kwargs, but the only ones you really "need" are *base_url* and + *script_path*; this is enough to figure out an API url. *login*, a + tuple of (username, password), is highly recommended. *cookiejar will be used to store cookies, and we'll use a normal CookieJar if none is given. @@ -151,7 +160,7 @@ class Site(object): self._login(login) def __repr__(self): - """Returns the canonical string representation of the Site.""" + """Return the canonical string representation of the Site.""" res = ", ".join(( "Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", "base_url={_base_url!r}", "article_path={_article_path!r}", @@ -170,13 +179,12 @@ class Site(object): return res.format(login, cookies, agent, **self.__dict__) def __str__(self): - """Returns a nice string representation of the Site.""" + """Return a nice string representation of the Site.""" res = "" - return res.format(self.name(), self.project(), self.lang(), - self.domain()) + return res.format(self.name, self.project, self.lang, self.domain) def _urlencode_utf8(self, params): - """Implement urllib.urlencode(params) with support for unicode input.""" + """Implement urllib.urlencode() with support for unicode input.""" enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s) args = [] for key, val in params.iteritems(): @@ -186,30 +194,10 @@ class Site(object): return "&".join(args) def _api_query(self, params, tries=0, wait=5): - """Do an API query with `params` as a dict of parameters. + """Do an API query with *params* as a dict of parameters. - This will first attempt to construct an API url from self._base_url and - self._script_path. We need both of these, or else we'll raise - SiteAPIError. If self._base_url is protocol-relative (introduced in - MediaWiki 1.18), we'll choose HTTPS if self._user_https is True, - otherwise HTTP. - - We'll encode the given params, adding format=json along the way, as - well as &assert= and &maxlag= based on self._assert_edit and _maxlag. - Additionally, we'll sleep a bit if the last query was made less than - self._wait_between_queries seconds ago. The request is made through - self._opener, which has cookie support (self._cookiejar), a User-Agent - (wiki.constants.USER_AGENT), and Accept-Encoding set to "gzip". - - Assuming everything went well, we'll gunzip the data (if compressed), - load it as a JSON object, and return it. - - If our request failed for some reason, we'll raise SiteAPIError with - details. If that reason was due to maxlag, we'll sleep for a bit and - then repeat the query until we exceed self._max_retries. - - There's helpful MediaWiki API documentation at - . + See the documentation for :py:meth:`api_query` for full implementation + details. """ since_last_query = time() - self._last_query_time # Throttling support if since_last_query < self._wait_between_queries: @@ -301,8 +289,8 @@ class Site(object): was not given as a keyword argument. We'll do an API query to get the missing data, but only if there actually *is* missing data. - Additionally, you can call this with `force=True` to forcibly reload - all attributes. + Additionally, you can call this with *force* set to True to forcibly + reload all attributes. """ # All attributes to be loaded, except _namespaces, which is a special # case because it requires additional params in the API query: @@ -332,7 +320,7 @@ class Site(object): def _load_namespaces(self, result): """Fill self._namespaces with a dict of namespace IDs and names. - Called by _load_attributes() with API data as `result` when + Called by _load_attributes() with API data as *result* when self._namespaces was not given as an kwarg to __init__(). """ self._namespaces = {} @@ -381,13 +369,12 @@ class Site(object): (for that, we'd do self._login_info[0]), but rather to get our current username without an unnecessary ?action=query&meta=userinfo API query. """ - domain = self.domain() name = ''.join((self._name, "Token")) - cookie = self._get_cookie(name, domain) + cookie = self._get_cookie(name, self.domain) if cookie: name = ''.join((self._name, "UserName")) - user_name = self._get_cookie(name, domain) + user_name = self._get_cookie(name, self.domain) if user_name: return user_name.value @@ -399,7 +386,7 @@ class Site(object): continue # Build a regex that will match domains this cookie affects: search = ''.join(("(.*?)", re_escape(cookie.domain))) - if re_match(search, domain): # Test it against our site + if re_match(search, self.domain): # Test it against our site user_name = self._get_cookie("centralauth_User", cookie.domain) if user_name: return user_name.value @@ -464,8 +451,8 @@ class Site(object): Raises LoginError on login errors (duh), like bad passwords and nonexistent usernames. - `login` is a (username, password) tuple. `token` is the token returned - from our first request, and `attempt` is to prevent getting stuck in a + *login* is a (username, password) tuple. *token* is the token returned + from our first request, and *attempt* is to prevent getting stuck in a loop if MediaWiki isn't acting right. """ name, password = login @@ -535,26 +522,57 @@ class Site(object): self._sql_conn = oursql.connect(**args) + @property def name(self): - """Returns the Site's name (or "wikiid" in the API), like "enwiki".""" + """The Site's name (or "wikiid" in the API), like ``"enwiki"``.""" return self._name + @property def project(self): - """Returns the Site's project name in lowercase, like "wikipedia".""" + """The Site's project name in lowercase, like ``"wikipedia"``.""" return self._project + @property def lang(self): - """Returns the Site's language code, like "en" or "es".""" + """The Site's language code, like ``"en"`` or ``"es"``.""" return self._lang + @property def domain(self): - """Returns the Site's web domain, like "en.wikipedia.org".""" + """The Site's web domain, like ``"en.wikipedia.org"``.""" return urlparse(self._base_url).netloc def api_query(self, **kwargs): """Do an API query with `kwargs` as the parameters. - See _api_query()'s documentation for details. + This will first attempt to construct an API url from + :py:attr:`self._base_url` and :py:attr:`self._script_path`. We need + both of these, or else we'll raise + :py:exc:`~earwigbot.exceptions.SiteAPIError`. If + :py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki + 1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is + ``True``, otherwise HTTP. + + We'll encode the given params, adding ``format=json`` along the way, as + well as ``&assert=`` and ``&maxlag=`` based on + :py:attr:`self._assert_edit` and :py:attr:`_maxlag` respectively. + Additionally, we'll sleep a bit if the last query was made fewer than + :py:attr:`self._wait_between_queries` seconds ago. The request is made + through :py:attr:`self._opener`, which has cookie support + (:py:attr:`self._cookiejar`), a ``User-Agent`` + (:py:const:`earwigbot.wiki.constants.USER_AGENT`), and + ``Accept-Encoding`` set to ``"gzip"``. + + Assuming everything went well, we'll gunzip the data (if compressed), + load it as a JSON object, and return it. + + If our request failed for some reason, we'll raise + :py:exc:`~earwigbot.exceptions.SiteAPIError` with details. If that + reason was due to maxlag, we'll sleep for a bit and then repeat the + query until we exceed :py:attr:`self._max_retries`. + + There is helpful MediaWiki API documentation at `MediaWiki.org + `_. """ return self._api_query(kwargs) @@ -562,34 +580,33 @@ class Site(object): cursor_class=None, show_table=False): """Do an SQL query and yield its results. - If `plain_query` is True, we will force an unparameterized query. - Specifying both params and plain_query will cause an error. - - If `dict_cursor` is True, we will use oursql.DictCursor as our cursor, - otherwise the default oursql.Cursor. If `cursor_class` is given, it - will override this option. - - If `show_table` is True, the name of the table will be prepended to the - name of the column. This will mainly affect a DictCursor. - - Example: - >>> query = "SELECT user_id, user_registration FROM user WHERE user_name = ?" - >>> params = ("The Earwig",) - >>> result1 = site.sql_query(query, params) - >>> result2 = site.sql_query(query, params, dict_cursor=True) - >>> for row in result1: print row - (7418060L, '20080703215134') - >>> for row in result2: print row - {'user_id': 7418060L, 'user_registration': '20080703215134'} - - See _sql_connect() for information on how a connection is acquired. - - has helpful documentation on the - oursql module. - - This may raise SQLError() or one of oursql's exceptions - (oursql.ProgrammingError, oursql.InterfaceError, ...) if there were - problems with the query. + If *plain_query* is ``True``, we will force an unparameterized query. + Specifying both *params* and *plain_query* will cause an error. If + *dict_cursor* is ``True``, we will use :py:class:`oursql.DictCursor` as + our cursor, otherwise the default :py:class:`oursql.Cursor`. If + *cursor_class* is given, it will override this option. If *show_table* + is True, the name of the table will be prepended to the name of the + column. This will mainly affect an :py:class:`~oursql.DictCursor`. + + Example usage:: + + >>> query = "SELECT user_id, user_registration FROM user WHERE user_name = ?" + >>> params = ("The Earwig",) + >>> result1 = site.sql_query(query, params) + >>> result2 = site.sql_query(query, params, dict_cursor=True) + >>> for row in result1: print row + (7418060L, '20080703215134') + >>> for row in result2: print row + {'user_id': 7418060L, 'user_registration': '20080703215134'} + + This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of + oursql's exceptions (:py:exc:`oursql.ProgrammingError`, + :py:exc:`oursql.InterfaceError`, ...) if there were problems with the + query. + + See :py:meth:`_sql_connect` for information on how a connection is + acquired. Also relevant is `oursql's documentation + `_ for details on that package. """ if not cursor_class: if dict_cursor: @@ -608,11 +625,16 @@ class Site(object): def get_replag(self): """Return the estimated database replication lag in seconds. - + Requires SQL access. This function only makes sense on a replicated database (e.g. the Wikimedia Toolserver) and on a wiki that receives a large number of edits (ideally, at least one per second), or the result - may be larger than expected. + may be larger than expected, since it works by subtracting the current + time from the timestamp of the latest recent changes event. + + This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of + oursql's exceptions (:py:exc:`oursql.ProgrammingError`, + :py:exc:`oursql.InterfaceError`, ...) if there were problems. """ query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM recentchanges ORDER BY rc_timestamp DESC LIMIT 1""" @@ -622,14 +644,16 @@ class Site(object): def namespace_id_to_name(self, ns_id, all=False): """Given a namespace ID, returns associated namespace names. - If all is False (default), we'll return the first name in the list, - which is usually the localized version. Otherwise, we'll return the - entire list, which includes the canonical name. + If *all* is ``False`` (default), we'll return the first name in the + list, which is usually the localized version. Otherwise, we'll return + the entire list, which includes the canonical name. - For example, returns u"Wikipedia" if ns_id=4 and all=False on enwiki; - returns [u"Wikipedia", u"Project", u"WP"] if ns_id=4 and all=True. + For example, this returns ``u"Wikipedia"`` if *ns_id* = ``4`` and + *all* = ``False`` on ``enwiki``; returns ``[u"Wikipedia", u"Project", + u"WP"]`` if *ns_id* = ``4`` and *all* is ``True``. - Raises NamespaceNotFoundError if the ID is not found. + Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the ID + is not found. """ try: if all: @@ -643,10 +667,11 @@ class Site(object): def namespace_name_to_id(self, name): """Given a namespace name, returns the associated ID. - Like namespace_id_to_name(), but reversed. Case is ignored, because - namespaces are assumed to be case-insensitive. + Like :py:meth:`namespace_id_to_name`, but reversed. Case is ignored, + because namespaces are assumed to be case-insensitive. - Raises NamespaceNotFoundError if the name is not found. + Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the + name is not found. """ lname = name.lower() for ns_id, names in self._namespaces.items(): @@ -658,14 +683,18 @@ class Site(object): raise exceptions.NamespaceNotFoundError(e) def get_page(self, title, follow_redirects=False): - """Returns a Page object for the given title (pagename). + """Return a :py:class:`Page` object for the given title. - Will return a Category object instead if the given title is in the - category namespace. As Category is a subclass of Page, this should not - cause problems. + *follow_redirects* is passed directly to + :py:class:`~earwigbot.wiki.page.Page`'s constructor. Also, this will + return a :py:class:`~earwigbot.wiki.category.Category` object instead + if the given title is in the category namespace. As + :py:class:`~earwigbot.wiki.category.Category` is a subclass of + :py:class:`~earwigbot.wiki.page.Page`, this should not cause problems. Note that this doesn't do any direct checks for existence or - redirect-following - Page's methods provide that. + redirect-following: :py:class:`~earwigbot.wiki.page.Page`'s methods + provide that. """ prefixes = self.namespace_id_to_name(constants.NS_CATEGORY, all=True) prefix = title.split(":", 1)[0] @@ -675,20 +704,22 @@ class Site(object): return Page(self, title, follow_redirects) def get_category(self, catname, follow_redirects=False): - """Returns a Category object for the given category name. + """Return a :py:class:`Category` object for the given category name. - `catname` should be given *without* a namespace prefix. This method is - really just shorthand for get_page("Category:" + catname). + *catname* should be given *without* a namespace prefix. This method is + really just shorthand for :py:meth:`get_page("Category:" + catname) + `. """ prefix = self.namespace_id_to_name(constants.NS_CATEGORY) pagename = ':'.join((prefix, catname)) return Category(self, pagename, follow_redirects) def get_user(self, username=None): - """Returns a User object for the given username. + """Return a :py:class:`User` object for the given username. - If `username` is left as None, then a User object representing the - currently logged-in (or anonymous!) user is returned. + If *username* is left as ``None``, then a + :py:class:`~earwigbot.wiki.user.User` object representing the currently + logged-in (or anonymous!) user is returned. """ if not username: username = self._get_username() diff --git a/earwigbot/wiki/sitesdb.py b/earwigbot/wiki/sitesdb.py index c60fa13..034d0a4 100644 --- a/earwigbot/wiki/sitesdb.py +++ b/earwigbot/wiki/sitesdb.py @@ -35,20 +35,23 @@ __all__ = ["SitesDB"] class SitesDB(object): """ - EarwigBot's Wiki Toolset: Sites Database Manager + **EarwigBot's Wiki Toolset: Sites Database Manager** - This class controls the sites.db file, which stores information about all - wiki sites known to the bot. Three public methods act as bridges between - the bot's config files and Site objects: - get_site -- returns a Site object corresponding to a given site name - add_site -- stores a site in the database, given connection info - remove_site -- removes a site from the database, given its name + This class controls the :file:`sites.db` file, which stores information + about all wiki sites known to the bot. Three public methods act as bridges + between the bot's config files and :py:class:`~earwigbot.wiki.site.Site` + objects: + + - :py:meth:`get_site`: returns a Site object corresponding to a site + - :py:meth:`add_site`: stores a site in the database + - :py:meth:`remove_site`: removes a site from the database There's usually no need to use this class directly. All public methods - here are available as bot.wiki.get_site(), bot.wiki.add_site(), and - bot.wiki.remove_site(), which use a sites.db file located in the same - directory as our config.yml file. Lower-level access can be achieved - by importing the manager class (`from earwigbot.wiki import SitesDB`). + here are available as :py:meth:`bot.wiki.get_site`, + :py:meth:`bot.wiki.add_site`, and :py:meth:`bot.wiki.remove_site`, which + use a :file:`sites.db` file located in the same directory as our + :file:`config.yml` file. Lower-level access can be achieved by importing + the manager class (``from earwigbot.wiki import SitesDB``). """ def __init__(self, bot): @@ -157,7 +160,7 @@ class SitesDB(object): namespaces) def _make_site_object(self, name): - """Return a Site object associated with the site 'name' in our sitesdb. + """Return a Site object associated with the site *name* in our sitesdb. This calls _load_site_from_sitesdb(), so SiteNotFoundError will be raised if the site is not in our sitesdb. @@ -210,8 +213,8 @@ class SitesDB(object): namespaces are extracted from the site and inserted into the sites database. If the sitesdb doesn't exist, we'll create it first. """ - name = site.name() - sites_data = (name, site.project(), site.lang(), site._base_url, + name = site.name + sites_data = (name, site.project, site.lang, site._base_url, site._article_path, site._script_path) sql_data = [(name, key, val) for key, val in site._sql_data.iteritems()] ns_data = [] @@ -255,24 +258,25 @@ class SitesDB(object): """Return a Site instance based on information from the sitesdb. With no arguments, return the default site as specified by our config - file. This is config.wiki["defaultSite"]. + file. This is ``config.wiki["defaultSite"]``. - With 'name' specified, return the site with that name. This is - equivalent to the site's 'wikiid' in the API, like 'enwiki'. + With *name* specified, return the site with that name. This is + equivalent to the site's ``wikiid`` in the API, like *enwiki*. - With 'project' and 'lang' specified, return the site whose project and + With *project* and *lang* specified, return the site whose project and language match these values. If there are multiple sites with the same values (unlikely), this is not a reliable way of loading a site. Call - the function with an explicit 'name' in that case. + the function with an explicit *name* in that case. We will attempt to login to the site automatically using - config.wiki["username"] and config.wiki["password"] if both are + ``config.wiki["username"]`` and ``config.wiki["password"]`` if both are defined. Specifying a project without a lang or a lang without a project will - raise TypeError. If all three args are specified, 'name' will be first - tried, then 'project' and 'lang' if 'name' doesn't work. If a site - cannot be found in the sitesdb, SiteNotFoundError will be raised. An + raise :py:exc:`TypeError`. If all three args are specified, *name* will + be first tried, then *project* and *lang* if *name* doesn't work. If a + site cannot be found in the sitesdb, + :py:exc:`~earwigbot.exceptions.SiteNotFoundError` will be raised. An empty sitesdb will be created if none is found. """ # Someone specified a project without a lang, or vice versa: @@ -311,23 +315,27 @@ class SitesDB(object): script_path="/w", sql=None): """Add a site to the sitesdb so it can be retrieved with get_site(). - If only a project and a lang are given, we'll guess the base_url as - "//{lang}.{project}.org" (which is protocol-relative, becoming 'https' - if 'useHTTPS' is True in config otherwise 'http'). If this is wrong, - provide the correct base_url as an argument (in which case project and - lang are ignored). Most wikis use "/w" as the script path (meaning the - API is located at "{base_url}{script_path}/api.php" -> - "//{lang}.{project}.org/w/api.php"), so this is the default. If your - wiki is different, provide the script_path as an argument. The only - other argument to Site() that we can't get from config files or by - querying the wiki itself is SQL connection info, so provide a dict of - kwargs as `sql` and Site will pass it to oursql.connect(**sql), - allowing you to make queries with site.sql_query(). - - Returns True if the site was added successfully or False if the site is - already in our sitesdb (this can be done purposefully to update old - site info). Raises SiteNotFoundError if not enough information has - been provided to identify the site (e.g. a project but not a lang). + If only a project and a lang are given, we'll guess the *base_url* as + ``"//{lang}.{project}.org"`` (which is protocol-relative, becoming + ``"https"`` if *useHTTPS* is ``True`` in config otherwise ``"http"``). + If this is wrong, provide the correct *base_url* as an argument (in + which case project and lang are ignored). Most wikis use ``"/w"`` as + the script path (meaning the API is located at + ``"{base_url}{script_path}/api.php"`` -> + ``"//{lang}.{project}.org/w/api.php"``), so this is the default. If + your wiki is different, provide the script_path as an argument. The + only other argument to :py:class:`~earwigbot.wiki.site.Site` that we + can't get from config files or by querying the wiki itself is SQL + connection info, so provide a dict of kwargs as *sql* and Site will + pass it to :py:func:`oursql.connect(**sql) `, allowing + you to make queries with :py:meth:`site.sql_query + `. + + Returns ``True`` if the site was added successfully or ``False`` if the + site is already in our sitesdb (this can be done purposefully to update + old site info). Raises :py:exc:`~earwigbot.exception.SiteNotFoundError` + if not enough information has been provided to identify the site (e.g. + a *project* but not a *lang*). """ if not base_url: if not project or not lang: @@ -353,18 +361,18 @@ class SitesDB(object): search_config=search_config) self._add_site_to_sitesdb(site) - self._sites[site.name()] = site + self._sites[site.name] = site return site def remove_site(self, name=None, project=None, lang=None): """Remove a site from the sitesdb. - Returns True if the site was removed successfully or False if the site - was not in our sitesdb originally. If all three args (name, project, - and lang) are given, we'll first try 'name' and then try the latter two - if 'name' wasn't found in the database. Raises TypeError if a project - was given but not a language, or vice versa. Will create an empty - sitesdb if none was found. + Returns ``True`` if the site was removed successfully or ``False`` if + the site was not in our sitesdb originally. If all three args (*name*, + *project*, and *lang*) are given, we'll first try *name* and then try + the latter two if *name* wasn't found in the database. Raises + :py:exc:`TypeError` if a project was given but not a language, or vice + versa. Will create an empty sitesdb if none was found. """ # Someone specified a project without a lang, or vice versa: if (project and not lang) or (not project and lang): diff --git a/earwigbot/wiki/user.py b/earwigbot/wiki/user.py index 67c9567..c40478b 100644 --- a/earwigbot/wiki/user.py +++ b/earwigbot/wiki/user.py @@ -30,28 +30,33 @@ __all__ = ["User"] class User(object): """ - EarwigBot's Wiki Toolset: User Class - - Represents a User on a given Site. Has methods for getting a bunch of - information about the user, such as editcount and user rights, methods for - returning the user's userpage and talkpage, etc. - - Attributes: - name -- the user's username - exists -- True if the user exists, or False if they do not - userid -- an integer ID representing the user - blockinfo -- information about any current blocks on the user - groups -- a list of the user's groups - rights -- a list of the user's rights - editcount -- the number of edits made by the user - registration -- the time the user registered as a time.struct_time - emailable -- True if you can email the user, False if you cannot - gender -- the user's gender ("male", "female", or "unknown") - - Public methods: - reload -- forcibly reload the user's attributes - get_userpage -- returns a Page object representing the user's userpage - get_talkpage -- returns a Page object representing the user's talkpage + **EarwigBot's Wiki Toolset: User Class** + + Represents a user on a given :py:class:`~earwigbot.wiki.site.Site`. Has + methods for getting a bunch of information about the user, such as + editcount and user rights, methods for returning the user's userpage and + talkpage, etc. + + *Attributes:* + + - :py:attr:`name`: the user's username + - :py:attr:`exists`: ``True`` if the user exists, else ``False`` + - :py:attr:`userid`: an integer ID representing the user + - :py:attr:`blockinfo`: information about any current blocks on the user + - :py:attr:`groups`: a list of the user's groups + - :py:attr:`rights`: a list of the user's rights + - :py:attr:`editcount`: the number of edits made by the user + - :py:attr:`registration`: the time the user registered + - :py:attr:`emailable`: ``True`` if you can email the user, or ``False`` + - :py:attr:`gender`: the user's gender ("male"/"female"/"unknown") + + *Public methods:* + + - :py:meth:`reload`: forcibly reloads the user's attributes + - :py:meth:`get_userpage`: returns a Page object representing the user's + userpage + - :py:meth:`get_talkpage`: returns a Page object representing the user's + talkpage """ def __init__(self, site, name): @@ -71,26 +76,25 @@ class User(object): self._name = name def __repr__(self): - """Returns the canonical string representation of the User.""" + """Return the canonical string representation of the User.""" return "User(name={0!r}, site={1!r})".format(self._name, self._site) def __str__(self): - """Returns a nice string representation of the User.""" - return ''.format(self.name(), str(self._site)) + """Return a nice string representation of the User.""" + return ''.format(self._name, str(self._site)) - def _get_attribute(self, attr, force): + def _get_attribute(self, attr): """Internally used to get an attribute by name. We'll call _load_attributes() to get this (and all other attributes) - from the API if it is not already defined. If `force` is True, we'll - re-load them even if they've already been loaded. + from the API if it is not already defined. Raises UserNotFoundError if a nonexistant user prevents us from returning a certain attribute. """ - if not hasattr(self, attr) or force: + if not hasattr(self, attr): self._load_attributes() - if self._exists is False: + if not self._exists: e = "User '{0}' does not exist.".format(self._name) raise UserNotFoundError(e) return getattr(self, attr) @@ -150,105 +154,118 @@ class User(object): self._gender = res["gender"] - def name(self, force=False): - """Returns the user's name. + @property + def name(self): + """The user's username. - If `force` is True, we will load the name from the API and return that. - This could potentially return a "normalized" version of the name - for - example, without a "User:" prefix or without underscores. Unlike other - attribute getters, this will never make an API query without `force`. - - Note that if another attribute getter, like exists(), has already been - called, then the username has already been normalized. + This will never make an API query on its own, but if one has already + been made by the time this is retrieved, the username may have been + "normalized" from the original input to the constructor, converted into + a Unicode object, with underscores removed, etc. """ - if force: - self._load_attributes() return self._name - def exists(self, force=False): - """Returns True if the user exists, or False if they do not. + @property + def exists(self): + """``True`` if the user exists, or ``False`` if they do not. - Makes an API query if `force` is True or if we haven't made one - already. + Makes an API query only if we haven't made one already. """ - if not hasattr(self, "_exists") or force: + if not hasattr(self, "_exists"): self._load_attributes() return self._exists - def userid(self, force=False): - """Returns an integer ID used by MediaWiki to represent the user. + @property + def userid(self): + """An integer ID used by MediaWiki to represent the user. - Raises UserNotFoundError if the user does not exist. Makes an API query - if `force` is True or if we haven't made one already. + Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user + does not exist. Makes an API query only if we haven't made one already. """ - return self._get_attribute("_userid", force) + return self._get_attribute("_userid") - def blockinfo(self, force=False): - """Returns information about a current block on the user. + @property + def blockinfo(self): + """Information about any current blocks on the user. - If the user is not blocked, returns False. If they are, returns a dict - with three keys: "by" is the blocker's username, "reason" is the reason - why they were blocked, and "expiry" is when the block expires. + If the user is not blocked, returns ``False``. If they are, returns a + dict with three keys: ``"by"`` is the blocker's username, ``"reason"`` + is the reason why they were blocked, and ``"expiry"`` is when the block + expires. - Raises UserNotFoundError if the user does not exist. Makes an API query - if `force` is True or if we haven't made one already. + Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user + does not exist. Makes an API query only if we haven't made one already. """ return self._get_attribute("_blockinfo", force) - def groups(self, force=False): - """Returns a list of groups this user is in, including "*". + @property + def groups(self): + """A list of groups this user is in, including ``"*"``. - Raises UserNotFoundError if the user does not exist. Makes an API query - if `force` is True or if we haven't made one already. + Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user + does not exist. Makes an API query only if we haven't made one already. """ return self._get_attribute("_groups", force) - def rights(self, force=False): - """Returns a list of this user's rights. + @property + def rights(self): + """A list of this user's rights. - Raises UserNotFoundError if the user does not exist. Makes an API query - if `force` is True or if we haven't made one already. + Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user + does not exist. Makes an API query only if we haven't made one already. """ return self._get_attribute("_rights", force) - def editcount(self, force=False): + @property + def editcount(self): """Returns the number of edits made by the user. - Raises UserNotFoundError if the user does not exist. Makes an API query - if `force` is True or if we haven't made one already. + Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user + does not exist. Makes an API query only if we haven't made one already. """ return self._get_attribute("_editcount", force) - def registration(self, force=False): - """Returns the time the user registered as a time.struct_time object. + @property + def registration(self): + """The time the user registered as a :py:class:`time.struct_time`. - Raises UserNotFoundError if the user does not exist. Makes an API query - if `force` is True or if we haven't made one already. + Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user + does not exist. Makes an API query only if we haven't made one already. """ return self._get_attribute("_registration", force) - def emailable(self, force=False): - """Returns True if the user can be emailed, or False if they cannot. + @property + def emailable(self): + """``True`` if the user can be emailed, or ``False`` if they cannot. - Raises UserNotFoundError if the user does not exist. Makes an API query - if `force` is True or if we haven't made one already. + Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user + does not exist. Makes an API query only if we haven't made one already. """ return self._get_attribute("_emailable", force) - def gender(self, force=False): - """Returns the user's gender. + @property + def gender(self): + """The user's gender. - Can return either "male", "female", or "unknown", if they did not - specify it. + Can return either ``"male"``, ``"female"``, or ``"unknown"``, if they + did not specify it. - Raises UserNotFoundError if the user does not exist. Makes an API query - if `force` is True or if we haven't made one already. + Raises :py:exc:`~earwigbot.exceptions.UserNotFoundError` if the user + does not exist. Makes an API query only if we haven't made one already. """ return self._get_attribute("_gender", force) + def reload(self): + """Forcibly reload the user's attributes. + + Emphasis on *reload*: this is only necessary if there is reason to + believe they have changed. + """ + self._load_attributes() + def get_userpage(self): - """Returns a Page object representing the user's userpage. - + """Return a Page object representing the user's userpage. + No checks are made to see if it exists or not. Proper site namespace conventions are followed. """ @@ -257,8 +274,8 @@ class User(object): return Page(self._site, pagename) def get_talkpage(self): - """Returns a Page object representing the user's talkpage. - + """Return a Page object representing the user's talkpage. + No checks are made to see if it exists or not. Proper site namespace conventions are followed. """