Browse Source

Implement speedy detection in RC monitor.

pull/10/merge
Ben Kurtovic 9 years ago
parent
commit
a89950cd3f
1 changed files with 87 additions and 16 deletions
  1. +87
    -16
      commands/rc_monitor.py

+ 87
- 16
commands/rc_monitor.py View File

@@ -20,12 +20,19 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from collections import namedtuple
from datetime import datetime
from difflib import ndiff
from Queue import Queue
import re
from threading import Thread

from earwigbot.commands import Command
from earwigbot.exceptions import APIError
from earwigbot.irc import RC
from earwigbot.wiki import constants

_Diff = namedtuple("_Diff", ["added", "removed"])

class RCMonitor(Command):
"""Monitors the recent changes feed for certain edits and reports them to a
@@ -51,6 +58,7 @@ class RCMonitor(Command):
self._levels = {}
self._issues = {}
self._descriptions = {}
self._redirects = {}
self._queue = Queue()

self._thread = Thread(target=self._callback, name="rc_monitor")
@@ -102,31 +110,94 @@ class RCMonitor(Command):
urgent: "URGENT"
}
self._issues = {
"random": routine,
"random2": urgent,
# ...
"g10": alert
}
self._descriptions = {
"random": "common random test",
"random2": "rare random test",
# ...
"g10": "CSD G10 nomination"
}

def _get_diff(self, oldrev, newrev):
"""Return the difference between two revisions.

A diff is a 2-tuple: (list of lines added, list of lines removed).
"""
site = self.bot.wiki.get_site()
try:
result = site.api_query(
action="query", prop="revisions", rvprop="ids|content",
revids=oldrev + "|" + newrev)
except APIError:
return None

try:
pages = result["query"]["pages"].values()
except IndexError:
return None
if len(pages) != 1:
return None
revs = pages[0]["revisions"]
try:
oldtext = [rv["*"] for rv in revs if rv["revid"] == int(oldrev)][0]
newtext = [rv["*"] for rv in revs if rv["revid"] == int(newrev)][0]
except (IndexError, KeyError):
return None

lines = list(ndiff(oldtext.splitlines(), newtext.splitlines()))
added = [line[2:] for line in lines if line[:2] == "+ "]
removed = [line[2:] for line in lines if line[:2] == "- "]
return _Diff(added, removed)

def _fetch_redirects(self, template):
"""Return a list of valid names for a given template."""
site = self.bot.wiki.get_site()
try:
result = site.api_query(
action="query", list="backlinks", blfilterredir="redirects",
blnamespace=constants.NS_TEMPLATE, bllimit=50,
bltitle="Template:" + template)
except APIError:
return []

redirs = {link["title"].split(":", 1)[1].lower()
for link in result["query"]["backlinks"]}
redirs.add(template)
return redirs

def _fast_template_search(self, template):
"""Return a compiled regular expression for matching templates."""
if template not in self._redirects:
redirects = self._fetch_redirects(template)
if not redirects:
return None
self._redirects[template] = redirects

search = "|".join("(template:)?" + re.escape(tmpl).replace(" ", "[ _]")
for tmpl in self._redirects[template])
return re.compile(r"\{\{\s*" + search + r"\s*(\||\}\})", re.U|re.I)

def _evaluate_csd(self, diff):
"""Evaluate a diff for CSD tagging."""
regex = self._fast_template_search("db-g10")
if not regex:
return []

if any(regex.search(line) for line in diff.added):
return ["g10"]
return []

def _evaluate(self, event):
"""Return heuristic information about the given RC event."""
issues = []
oldrev = re.search(r"(?:\?|&)oldid=(.*?)(?:&|$)", event.url)
newrev = re.search(r"(?:\?|&)diff=(.*?)(?:&|$)", event.url)
if not oldrev or not newrev:
return []

# TODO
from random import random
rand = random()
if rand < 0.05:
issues.append("random")
if rand < 0.01:
issues.append("random2")
# END TODO
diff = self._get_diff(oldrev.group(1), newrev.group(1))
if not diff:
return []

issues = []
issues.extend(self._evaluate_csd(diff))
issues.sort(key=lambda issue: self._issues[issue], reverse=True)
return issues

@@ -146,7 +217,7 @@ class RCMonitor(Command):

def _handle_event(self, event):
"""Process a recent change event."""
if not event.is_edit:
if not event.is_edit or "B" in event.flags:
return
report = self._evaluate(event)
self._stats["edits"] += 1


Loading…
Cancel
Save