Parcourir la source

Add support for User:EranBot/Copyright/Blacklist (closes #52)

tags/v0.2
Ben Kurtovic il y a 9 ans
Parent
révision
a2c10650a8
1 fichiers modifiés avec 14 ajouts et 2 suppressions
  1. +14
    -2
      earwigbot/wiki/copyvios/exclusions.py

+ 14
- 2
earwigbot/wiki/copyvios/exclusions.py Voir le fichier

@@ -32,7 +32,8 @@ __all__ = ["ExclusionsDB"]

DEFAULT_SOURCES = {
"all": [ # Applies to all, but located on enwiki
"User:EarwigBot/Copyvios/Exclusions"
"User:EarwigBot/Copyvios/Exclusions",
"User:EranBot/Copyright/Blacklist"
],
"enwiki": [
"Wikipedia:Mirrors and forks/Abc", "Wikipedia:Mirrors and forks/Def",
@@ -90,6 +91,13 @@ class ExclusionsDB(object):
except exceptions.PageNotFoundError:
return urls

if site == "enwiki" and source == "User:EranBot/Copyright/Blacklist":
for line in data.splitlines()[1:]:
line = re.sub(r"(#|==).*$", "", line).strip()
if line:
urls.add("re:" + line)
return

regexes = [
r"url\s*=\s*(?:\<nowiki\>)?(?:https?:)?(?://)?(.*?)(?:\</nowiki\>.*?)?\s*$",
r"\*\s*Site:\s*(?:\[|\<nowiki\>)?(?:https?:)?(?://)?(.*?)(?:\].*?|\</nowiki\>.*?)?\s*$"
@@ -168,7 +176,11 @@ class ExclusionsDB(object):
with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
for (excl,) in conn.execute(query, (sitename, "all")):
if excl.startswith("*."):
matches = excl[2:] in urlparse(url.lower()).netloc
parsed = urlparse(url.lower())
matches = excl[2:] in parsed.netloc
if matches and "/" in excl:
excl_path = excl[excl.index("/") + 1]
matches = excl_path.startswith(parsed.path)
elif excl.startswith("re:"):
matches = re.match(excl[3:], normalized)
else:


Chargement…
Annuler
Enregistrer