Browse Source

Once again, reengineer regexes.

tags/v0.2
Ben Kurtovic 12 years ago
parent
commit
333201226b
1 changed files with 10 additions and 10 deletions
  1. +10
    -10
      earwigbot/wiki/copyvios/exclusions.py

+ 10
- 10
earwigbot/wiki/copyvios/exclusions.py View File

@@ -91,8 +91,8 @@ class ExclusionsDB(object):
return urls return urls


regexes = [ regexes = [
r"url\s*=\s*(?:<nowiki>)?(?:https?:)?(?://)?(.*)(?:</nowiki>)?",
r"\*\s*Site:\s*(?:\[|\<nowiki\>)?(?:https?:)?(?://)?(.*)(?:\]|\</nowiki\>)?"
r"url\s*=\s*(?:\<nowiki\>)?(?:https?:)?(?://)?(.*?)(?:\</nowiki\>.*?)?\s*$",
r"\*\s*Site:\s*(?:\[|\<nowiki\>)?(?:https?:)?(?://)?(.*?)(?:\].*?|\</nowiki\>.*?)?\s*$"
] ]
for regex in regexes: for regex in regexes:
find = re.findall(regex, data, re.I) find = re.findall(regex, data, re.I)
@@ -101,13 +101,13 @@ class ExclusionsDB(object):


def _update(self, sitename): def _update(self, sitename):
"""Update the database from listed sources in the index.""" """Update the database from listed sources in the index."""
query1 = "SELECT source_page FROM sources WHERE source_sitename = ?;"
query1 = "SELECT source_page FROM sources WHERE source_sitename = ?"
query2 = "SELECT exclusion_url FROM exclusions WHERE exclusion_sitename = ?" query2 = "SELECT exclusion_url FROM exclusions WHERE exclusion_sitename = ?"
query3 = "DELETE FROM exclusions WHERE exclusion_sitename = ? AND exclusion_url = ?" query3 = "DELETE FROM exclusions WHERE exclusion_sitename = ? AND exclusion_url = ?"
query4 = "INSERT INTO exclusions VALUES (?, ?);"
query5 = "SELECT 1 FROM updates WHERE update_sitename = ?;"
query6 = "UPDATE updates SET update_time = ? WHERE update_sitename = ?;"
query7 = "INSERT INTO updates VALUES (?, ?);"
query4 = "INSERT INTO exclusions VALUES (?, ?)"
query5 = "SELECT 1 FROM updates WHERE update_sitename = ?"
query6 = "UPDATE updates SET update_time = ? WHERE update_sitename = ?"
query7 = "INSERT INTO updates VALUES (?, ?)"


if sitename == "all": if sitename == "all":
site = self._sitesdb.get_site("enwiki") site = self._sitesdb.get_site("enwiki")
@@ -130,7 +130,7 @@ class ExclusionsDB(object):


def _get_last_update(self, sitename): def _get_last_update(self, sitename):
"""Return the UNIX timestamp of the last time the db was updated.""" """Return the UNIX timestamp of the last time the db was updated."""
query = "SELECT update_time FROM updates WHERE update_sitename = ?;"
query = "SELECT update_time FROM updates WHERE update_sitename = ?"
with sqlite.connect(self._dbfile) as conn, self._db_access_lock: with sqlite.connect(self._dbfile) as conn, self._db_access_lock:
try: try:
result = conn.execute(query, (sitename,)).fetchone() result = conn.execute(query, (sitename,)).fetchone()
@@ -140,11 +140,11 @@ class ExclusionsDB(object):
return result[0] if result else 0 return result[0] if result else 0


def sync(self, sitename): def sync(self, sitename):
"""Update the database if it hasn't been updated in the past week.
"""Update the database if it hasn't been updated in the past day.


This only updates the exclusions database for the *sitename* site. This only updates the exclusions database for the *sitename* site.
""" """
max_staleness = 60 * 60 * 24 * 7
max_staleness = 60 * 60 * 24
time_since_update = int(time() - self._get_last_update(sitename)) time_since_update = int(time() - self._get_last_update(sitename))
if time_since_update > max_staleness: if time_since_update > max_staleness:
log = u"Updating stale database: {0} (last updated {1} seconds ago)" log = u"Updating stale database: {0} (last updated {1} seconds ago)"


Loading…
Cancel
Save