diff --git a/earwigbot/wiki/copyvios/exclusions.py b/earwigbot/wiki/copyvios/exclusions.py
index 82beda9..bfa0f62 100644
--- a/earwigbot/wiki/copyvios/exclusions.py
+++ b/earwigbot/wiki/copyvios/exclusions.py
@@ -32,7 +32,8 @@ __all__ = ["ExclusionsDB"]
DEFAULT_SOURCES = {
"all": [ # Applies to all, but located on enwiki
- "User:EarwigBot/Copyvios/Exclusions"
+ "User:EarwigBot/Copyvios/Exclusions",
+ "User:EranBot/Copyright/Blacklist"
],
"enwiki": [
"Wikipedia:Mirrors and forks/Abc", "Wikipedia:Mirrors and forks/Def",
@@ -90,6 +91,13 @@ class ExclusionsDB(object):
except exceptions.PageNotFoundError:
return urls
+ if site == "enwiki" and source == "User:EranBot/Copyright/Blacklist":
+ for line in data.splitlines()[1:]:
+ line = re.sub(r"(#|==).*$", "", line).strip()
+ if line:
+ urls.add("re:" + line)
+ return
+
regexes = [
r"url\s*=\s*(?:\)?(?:https?:)?(?://)?(.*?)(?:\.*?)?\s*$",
r"\*\s*Site:\s*(?:\[|\)?(?:https?:)?(?://)?(.*?)(?:\].*?|\.*?)?\s*$"
@@ -168,7 +176,11 @@ class ExclusionsDB(object):
with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
for (excl,) in conn.execute(query, (sitename, "all")):
if excl.startswith("*."):
- matches = excl[2:] in urlparse(url.lower()).netloc
+ parsed = urlparse(url.lower())
+ matches = excl[2:] in parsed.netloc
+ if matches and "/" in excl:
+ excl_path = excl[excl.index("/") + 1]
+ matches = excl_path.startswith(parsed.path)
elif excl.startswith("re:"):
matches = re.match(excl[3:], normalized)
else: