瀏覽代碼

Implemented limited wildcard matching in ExceptionsDB.

tags/v0.1^2
Ben Kurtovic 12 年之前
父節點
當前提交
292f04fe2d
共有 2 個檔案被更改,包括 13 行新增6 行删除
  1. +1
    -1
      earwigbot/tasks/afc_copyvios.py
  2. +12
    -5
      earwigbot/wiki/copyvios/exclusions.py

+ 1
- 1
earwigbot/tasks/afc_copyvios.py 查看文件

@@ -42,7 +42,7 @@ class AFCCopyvios(Task):
self.min_confidence = cfg.get("minConfidence", 0.5) self.min_confidence = cfg.get("minConfidence", 0.5)
self.max_queries = cfg.get("maxQueries", 10) self.max_queries = cfg.get("maxQueries", 10)
self.cache_results = cfg.get("cacheResults", False) self.cache_results = cfg.get("cacheResults", False)
default_summary = "Tagging suspected [[WP:COPYVIO|copyright violation]] of {url}"
default_summary = "Tagging suspected [[WP:COPYVIO|copyright violation]] of {url}."
self.summary = self.make_summary(cfg.get("summary", default_summary)) self.summary = self.make_summary(cfg.get("summary", default_summary))


# Connection data for our SQL database: # Connection data for our SQL database:


+ 12
- 5
earwigbot/wiki/copyvios/exclusions.py 查看文件

@@ -24,6 +24,7 @@ import re
import sqlite3 as sqlite import sqlite3 as sqlite
from threading import Lock from threading import Lock
from time import time from time import time
from urlparse import urlparse


from earwigbot import exceptions from earwigbot import exceptions


@@ -34,7 +35,8 @@ default_sources = {
"Wikipedia:Mirrors and forks/Abc", "Wikipedia:Mirrors and forks/Def", "Wikipedia:Mirrors and forks/Abc", "Wikipedia:Mirrors and forks/Def",
"Wikipedia:Mirrors and forks/Ghi", "Wikipedia:Mirrors and forks/Jkl", "Wikipedia:Mirrors and forks/Ghi", "Wikipedia:Mirrors and forks/Jkl",
"Wikipedia:Mirrors and forks/Mno", "Wikipedia:Mirrors and forks/Pqr", "Wikipedia:Mirrors and forks/Mno", "Wikipedia:Mirrors and forks/Pqr",
"Wikipedia:Mirrors and forks/Stu", "Wikipedia:Mirrors and forks/Vwxyz"
"Wikipedia:Mirrors and forks/Stu", "Wikipedia:Mirrors and forks/Vwxyz",
"User:EarwigBot/Copyvios/Exclusion list"
] ]
} }


@@ -131,11 +133,11 @@ class ExclusionsDB(object):
return result[0] if result else 0 return result[0] if result else 0


def sync(self, sitename): def sync(self, sitename):
"""Update the database if it hasn't been updated in the past month.
"""Update the database if it hasn't been updated in the past week.


This only updates the exclusions database for the *sitename* site. This only updates the exclusions database for the *sitename* site.
""" """
max_staleness = 60 * 60 * 24 * 30
max_staleness = 60 * 60 * 24 * 7
time_since_update = int(time() - self._get_last_update(sitename)) time_since_update = int(time() - self._get_last_update(sitename))
if time_since_update > max_staleness: if time_since_update > max_staleness:
log = u"Updating stale database: {0} (last updated {1} seconds ago)" log = u"Updating stale database: {0} (last updated {1} seconds ago)"
@@ -153,8 +155,13 @@ class ExclusionsDB(object):
normalized = re.sub("https?://", "", url.lower()) normalized = re.sub("https?://", "", url.lower())
query = "SELECT exclusion_url FROM exclusions WHERE exclusion_sitename = ?" query = "SELECT exclusion_url FROM exclusions WHERE exclusion_sitename = ?"
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
for row in conn.execute(query, (sitename,)):
if normalized.startswith(row[0]):
for (excl,) in conn.execute(query, (sitename,)):
if excl.startswith("*."):
netloc = urlparse(url.lower()).netloc
matches = True if excl[2:] in netloc else False
else:
matches = True if normalized.startswith(excl) else False
if matches:
log = u"Exclusion detected in {0} for {1}" log = u"Exclusion detected in {0} for {1}"
self._logger.debug(log.format(sitename, url)) self._logger.debug(log.format(sitename, url))
return True return True


Loading…
取消
儲存