|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271 |
- # Copyright (C) 2016 Ben Kurtovic <ben.kurtovic@gmail.com>
- #
- # Permission is hereby granted, free of charge, to any person obtaining a copy
- # of this software and associated documentation files (the "Software"), to deal
- # in the Software without restriction, including without limitation the rights
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- # copies of the Software, and to permit persons to whom the Software is
- # furnished to do so, subject to the following conditions:
- #
- # The above copyright notice and this permission notice shall be included in
- # all copies or substantial portions of the Software.
- #
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- # SOFTWARE.
-
- import re
- from collections import namedtuple
- from datetime import datetime
- from difflib import ndiff
- from queue import Queue
- from threading import Thread
-
- from earwigbot.commands import Command
- from earwigbot.exceptions import APIError
- from earwigbot.irc import RC
- from earwigbot.wiki import constants
-
- _Diff = namedtuple("_Diff", ["added", "removed"])
-
-
- class RCMonitor(Command):
- """Monitors the recent changes feed for certain edits and reports them to a
- dedicated channel."""
-
- name = "rc_monitor"
- commands = ["rc_monitor", "rcm"]
- hooks = ["msg", "rc"]
-
- def setup(self):
- try:
- self._channel = self.config.commands[self.name]["channel"]
- except KeyError:
- self._channel = None
- log = (
- "Cannot use without a report channel set as "
- 'config.commands["{0}"]["channel"]'
- )
- self.logger.warn(log.format(self.name))
- return
-
- self._stats = {
- "start": datetime.utcnow(),
- "edits": 0,
- "hits": 0,
- "max_backlog": 0,
- }
- self._levels = {}
- self._issues = {}
- self._descriptions = {}
- self._redirects = {}
- self._queue = Queue()
-
- self._thread = Thread(target=self._callback, name="rc_monitor")
- self._thread.daemon = True
- self._thread.running = True
- self._prepare_reports()
- self._thread.start()
-
- def check(self, data):
- if not self._channel:
- return
- return isinstance(data, RC) or (
- data.is_command and data.command in self.commands
- )
-
- def process(self, data):
- if isinstance(data, RC):
- newlen = self._queue.qsize() + 1
- self._queue.put(data)
- if newlen > self._stats["max_backlog"]:
- self._stats["max_backlog"] = newlen
- return
-
- if not self.config.irc["permissions"].is_admin(data):
- self.reply(data, "You must be a bot admin to use this command.")
- return
-
- since = self._stats["start"].strftime("%H:%M:%S, %d %B %Y")
- seconds = (datetime.utcnow() - self._stats["start"]).total_seconds()
- rate = self._stats["edits"] / seconds
- msg = (
- "\x02{edits:,}\x0f edits checked since {since} "
- "(\x02{rate:.2f}\x0f edits/sec); \x02{hits:,}\x0f hits; "
- "\x02{qsize:,}\x0f-edit backlog (\x02{max_backlog:,}\x0f max)."
- )
- self.reply(
- data,
- msg.format(
- since=since, rate=rate, qsize=self._queue.qsize(), **self._stats
- ),
- )
-
- def unload(self):
- self._thread.running = False
- self._queue.put(None)
-
- def _prepare_reports(self):
- """Set up internal tables for storing report information."""
- routine = 1
- alert = 2
- urgent = 3
-
- self._levels = {routine: "routine", alert: "alert", urgent: "URGENT"}
- self._issues = {"g10": alert}
- self._descriptions = {"g10": "CSD G10 nomination"}
-
- def _get_diff(self, oldrev, newrev):
- """Return the difference between two revisions.
-
- A diff is a 2-tuple: (list of lines added, list of lines removed).
- """
- site = self.bot.wiki.get_site()
- try:
- result = site.api_query(
- action="query",
- prop="revisions",
- rvprop="ids|content",
- rvslots="main",
- revids=(oldrev + "|" + newrev) if oldrev else newrev,
- )
- except APIError:
- return None
-
- try:
- pages = result["query"]["pages"].values()
- except IndexError:
- return None
- if len(pages) != 1:
- return None
- revs = pages[0]["revisions"]
-
- if not oldrev:
- try:
- text = revs[0]["slots"]["main"]["*"]
- except (IndexError, KeyError):
- return None
- return _Diff(text.splitlines(), [])
-
- try:
- oldtext = [
- rv["slots"]["main"]["*"] for rv in revs if rv["revid"] == int(oldrev)
- ][0]
- newtext = [
- rv["slots"]["main"]["*"] for rv in revs if rv["revid"] == int(newrev)
- ][0]
- except (IndexError, KeyError):
- return None
-
- lines = list(ndiff(oldtext.splitlines(), newtext.splitlines()))
- added = [line[2:] for line in lines if line[:2] == "+ "]
- removed = [line[2:] for line in lines if line[:2] == "- "]
- return _Diff(added, removed)
-
- def _fetch_redirects(self, template):
- """Return a list of valid names for a given template."""
- site = self.bot.wiki.get_site()
- try:
- result = site.api_query(
- action="query",
- list="backlinks",
- blfilterredir="redirects",
- blnamespace=constants.NS_TEMPLATE,
- bllimit=50,
- bltitle="Template:" + template,
- )
- except APIError:
- return []
-
- redirs = {
- link["title"].split(":", 1)[1].lower()
- for link in result["query"]["backlinks"]
- }
- redirs.add(template)
- return redirs
-
- def _fast_template_search(self, template):
- """Return a compiled regular expression for matching templates."""
- if template not in self._redirects:
- redirects = self._fetch_redirects(template)
- if not redirects:
- return None
- self._redirects[template] = redirects
-
- search = "|".join(
- r"(template:)?" + re.escape(tmpl).replace(r"\ ", r"[ _]")
- for tmpl in self._redirects[template]
- )
- return re.compile(r"\{\{\s*(" + search + r")\s*(\||\}\})", re.U | re.I)
-
- def _evaluate_csd(self, diff):
- """Evaluate a diff for CSD tagging."""
- regex = self._fast_template_search("db-g10")
- if not regex:
- return []
-
- if any(regex.search(line) for line in diff.added):
- return ["g10"]
- return []
-
- def _evaluate(self, event):
- """Return heuristic information about the given RC event."""
- oldrev = re.search(r"(?:\?|&)oldid=(.*?)(?:&|$)", event.url)
- newrev = re.search(r"(?:\?|&)diff=(.*?)(?:&|$)", event.url)
- if not oldrev:
- return []
- if newrev:
- diff = self._get_diff(oldrev.group(1), newrev.group(1))
- else:
- diff = self._get_diff(None, oldrev.group(1))
- if not diff:
- return []
-
- issues = []
- issues.extend(self._evaluate_csd(diff))
- issues.sort(key=lambda issue: self._issues[issue], reverse=True)
- return issues
-
- def _format(self, rc, report):
- """Format a RC event for the report channel."""
- level = self._levels[max(self._issues[issue] for issue in report)]
- descr = ", ".join(self._descriptions[issue] for issue in report)
- notify = " ".join("!rcm-" + issue for issue in report)
- cmnt = rc.comment if len(rc.comment) <= 50 else rc.comment[:47] + "..."
-
- msg = (
- "[\x02{level}\x0f] ({descr}) [\x02{notify}\x0f]\x0306 * "
- "\x0314[[\x0307{title}\x0314]]\x0306 * \x0303{user}\x0306 * "
- "\x0302{url}\x0306 * \x0310{comment}"
- )
- return msg.format(
- level=level,
- descr=descr,
- notify=notify,
- title=rc.page,
- user=rc.user,
- url=rc.url,
- comment=cmnt,
- )
-
- def _handle_event(self, event):
- """Process a recent change event."""
- if not event.is_edit or "B" in event.flags:
- return
- report = self._evaluate(event)
- self._stats["edits"] += 1
- if report:
- self.say(self._channel, self._format(event, report))
- self._stats["hits"] += 1
-
- def _callback(self):
- """Internal callback for the RC monitor thread."""
- while self._thread.running:
- event = self._queue.get()
- if not self._thread.running:
- break
- self._handle_event(event)
|