diff --git a/earwigbot/wiki/copyvios/exclusions.py b/earwigbot/wiki/copyvios/exclusions.py index 82beda9..bfa0f62 100644 --- a/earwigbot/wiki/copyvios/exclusions.py +++ b/earwigbot/wiki/copyvios/exclusions.py @@ -32,7 +32,8 @@ __all__ = ["ExclusionsDB"] DEFAULT_SOURCES = { "all": [ # Applies to all, but located on enwiki - "User:EarwigBot/Copyvios/Exclusions" + "User:EarwigBot/Copyvios/Exclusions", + "User:EranBot/Copyright/Blacklist" ], "enwiki": [ "Wikipedia:Mirrors and forks/Abc", "Wikipedia:Mirrors and forks/Def", @@ -90,6 +91,13 @@ class ExclusionsDB(object): except exceptions.PageNotFoundError: return urls + if site == "enwiki" and source == "User:EranBot/Copyright/Blacklist": + for line in data.splitlines()[1:]: + line = re.sub(r"(#|==).*$", "", line).strip() + if line: + urls.add("re:" + line) + return + regexes = [ r"url\s*=\s*(?:\)?(?:https?:)?(?://)?(.*?)(?:\.*?)?\s*$", r"\*\s*Site:\s*(?:\[|\)?(?:https?:)?(?://)?(.*?)(?:\].*?|\.*?)?\s*$" @@ -168,7 +176,11 @@ class ExclusionsDB(object): with sqlite.connect(self._dbfile) as conn, self._db_access_lock: for (excl,) in conn.execute(query, (sitename, "all")): if excl.startswith("*."): - matches = excl[2:] in urlparse(url.lower()).netloc + parsed = urlparse(url.lower()) + matches = excl[2:] in parsed.netloc + if matches and "/" in excl: + excl_path = excl[excl.index("/") + 1] + matches = excl_path.startswith(parsed.path) elif excl.startswith("re:"): matches = re.match(excl[3:], normalized) else: