@@ -1,299 +1,10 @@ | |||
<%! | |||
from datetime import datetime | |||
from hashlib import sha256 | |||
from itertools import count | |||
from os.path import expanduser | |||
from re import sub, UNICODE | |||
from sys import path | |||
from time import time | |||
from urlparse import parse_qs, urlparse | |||
from earwigbot import exceptions | |||
from urlparse import parse_qs | |||
from earwigbot.bot import Bot | |||
import oursql | |||
def get_results(bot, lang, project, name, all_projects, title, url, query): | |||
site = get_site(bot, lang, project, name, all_projects) | |||
if not site: | |||
return None, None, None | |||
page = site.get_page(title) | |||
try: | |||
page.get() # Make sure that the page exists before we check it! | |||
except (exceptions.PageNotFoundError, exceptions.InvalidPageError): | |||
return site, page, None | |||
# if url: | |||
# result = get_url_specific_results(page, url) | |||
# else: | |||
# conn = open_sql_connection(bot, "copyvioCache") | |||
# if not query.get("nocache"): | |||
# result = get_cached_results(page, conn) | |||
# if query.get("nocache") or not result: | |||
# result = get_fresh_results(page, conn) | |||
tstart = time() | |||
mc1 = __import__("earwigbot").wiki.copyvios.MarkovChain(page.get()) | |||
mc2 = __import__("earwigbot").wiki.copyvios.MarkovChain(u"This is some random textual content for a page.") | |||
mci = __import__("earwigbot").wiki.copyvios.MarkovChainIntersection(mc1, mc2) | |||
result = __import__("earwigbot").wiki.copyvios.CopyvioCheckResult( | |||
True, 0.67123, "http://example.com/", 7, mc1, (mc2, mci)) | |||
result.cached = False | |||
result.tdiff = time() - tstart | |||
# END TEST BLOCK | |||
return site, page, result | |||
def get_site(bot, lang, project, name, all_projects): | |||
if project not in [proj[0] for proj in all_projects]: | |||
return None | |||
if project == "wikimedia" and name: # Special sites: | |||
try: | |||
return bot.wiki.get_site(name=name) | |||
except exceptions.SiteNotFoundError: | |||
try: | |||
return bot.wiki.add_site(lang=lang, project=project) | |||
except (exceptions.APIError, exceptions.LoginError): | |||
return None | |||
try: | |||
return bot.wiki.get_site(lang=lang, project=project) | |||
except exceptions.SiteNotFoundError: | |||
try: | |||
return bot.wiki.add_site(lang=lang, project=project) | |||
except (exceptions.APIError, exceptions.LoginError): | |||
return None | |||
def get_url_specific_results(page, url): | |||
t_start = time() | |||
result = page.copyvio_compare(url) | |||
result.cached = False | |||
result.tdiff = time() - t_start | |||
return result | |||
def open_sql_connection(bot, dbname): | |||
conn_args = bot.config.wiki["_toolserverSQL"][dbname] | |||
if "read_default_file" not in conn_args and "user" not in conn_args and "passwd" not in conn_args: | |||
conn_args["read_default_file"] = expanduser("~/.my.cnf") | |||
if "autoping" not in conn_args: | |||
conn_args["autoping"] = True | |||
if "autoreconnect" not in conn_args: | |||
conn_args["autoreconnect"] = True | |||
return oursql.connect(**conn_args) | |||
def get_cached_results(page, conn): | |||
query1 = "DELETE FROM cache WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)" | |||
query2 = "SELECT cache_url, cache_time, cache_queries, cache_process_time FROM cache WHERE cache_id = ? AND cache_hash = ?" | |||
pageid = page.pageid() | |||
hash = sha256(page.get()).hexdigest() | |||
t_start = time() | |||
with conn.cursor() as cursor: | |||
cursor.execute(query1) | |||
cursor.execute(query2, (pageid, hash)) | |||
results = cursor.fetchall() | |||
if not results: | |||
return None | |||
url, cache_time, num_queries, original_tdiff = results[0] | |||
result = page.copyvio_compare(url) | |||
result.cached = True | |||
result.queries = num_queries | |||
result.tdiff = time() - t_start | |||
result.original_tdiff = original_tdiff | |||
result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC") | |||
result.cache_age = format_date(cache_time) | |||
return result | |||
def format_date(cache_time): | |||
diff = datetime.utcnow() - cache_time | |||
if diff.seconds > 3600: | |||
return "{0} hours".format(diff.seconds / 3600) | |||
if diff.seconds > 60: | |||
return "{0} minutes".format(diff.seconds / 60) | |||
return "{0} seconds".format(diff.seconds) | |||
def get_fresh_results(page, conn): | |||
t_start = time() | |||
result = page.copyvio_check(max_queries=10) | |||
result.cached = False | |||
result.tdiff = time() - t_start | |||
cache_result(page, result, conn) | |||
return result | |||
def cache_result(page, result, conn): | |||
pageid = page.pageid() | |||
hash = sha256(page.get()).hexdigest() | |||
query1 = "SELECT 1 FROM cache WHERE cache_id = ?" | |||
query2 = "DELETE FROM cache WHERE cache_id = ?" | |||
query3 = "INSERT INTO cache VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?, ?)" | |||
with conn.cursor() as cursor: | |||
cursor.execute(query1, (pageid,)) | |||
if cursor.fetchall(): | |||
cursor.execute(query2, (pageid,)) | |||
cursor.execute(query3, (pageid, hash, result.url, result.queries, | |||
result.tdiff)) | |||
def get_sites(bot): | |||
max_staleness = 60 * 60 * 24 * 7 | |||
conn = open_sql_connection(bot, "globals") | |||
query1 = "SELECT update_time FROM updates WHERE update_service = ?" | |||
query2 = "SELECT lang_code, lang_name FROM language" | |||
query3 = "SELECT project_code, project_name FROM project" | |||
with conn.cursor() as cursor: | |||
cursor.execute(query1, ("sites",)) | |||
try: | |||
time_since_update = int(time() - cursor.fetchall()[0][0]) | |||
except IndexError: | |||
time_since_update = time() | |||
if time_since_update > max_staleness: | |||
update_sites(bot.wiki.get_site(), cursor) | |||
cursor.execute(query2) | |||
langs = [] | |||
for code, name in cursor.fetchall(): | |||
if "\U" in name: | |||
name = name.decode("unicode_escape") | |||
langs.append((code, name)) | |||
cursor.execute(query3) | |||
projects = cursor.fetchall() | |||
return langs, projects | |||
def update_sites(site, cursor): | |||
matrix = site.api_query(action="sitematrix")["sitematrix"] | |||
del matrix["count"] | |||
languages, projects = set(), set() | |||
for site in matrix.itervalues(): | |||
if isinstance(site, list): # Special sites | |||
bad_sites = ["closed", "private", "fishbowl"] | |||
for special in site: | |||
if all([key not in special for key in bad_sites]): | |||
full = urlparse(special["url"]).netloc | |||
if full.count(".") == 1: # No subdomain, so use "www" | |||
lang, project = "www", full.split(".")[0] | |||
else: | |||
lang, project = full.rsplit(".", 2)[:2] | |||
code = u"{0}::{1}".format(lang, special["dbname"]) | |||
name = special["code"].capitalize() | |||
languages.add((code, u"{0} ({1})".format(lang, name))) | |||
projects.add((project, project.capitalize())) | |||
continue | |||
this = set() | |||
for web in site["site"]: | |||
if "closed" in web: | |||
continue | |||
project = "wikipedia" if web["code"] == u"wiki" else web["code"] | |||
this.add((project, project.capitalize())) | |||
if this: | |||
code = site["code"] | |||
if "\U" in site["name"].encode("unicode_escape"): | |||
name = site["name"].encode("unicode_escape") | |||
else: | |||
name = site["name"] | |||
languages.add((code, u"{0} ({1})".format(code, name))) | |||
projects |= this | |||
save_site_updates(cursor, languages, projects) | |||
def save_site_updates(cursor, languages, projects): | |||
query1 = "SELECT lang_code, lang_name FROM language" | |||
query2 = "DELETE FROM language WHERE lang_code = ? AND lang_name = ?" | |||
query3 = "INSERT INTO language VALUES (?, ?)" | |||
query4 = "SELECT project_code, project_name FROM project" | |||
query5 = "DELETE FROM project WHERE project_code = ? AND project_name = ?" | |||
query6 = "INSERT INTO project VALUES (?, ?)" | |||
query7 = "SELECT 1 FROM updates WHERE update_service = ?" | |||
query8 = "UPDATE updates SET update_time = ? WHERE update_service = ?" | |||
query9 = "INSERT INTO updates VALUES (?, ?)" | |||
synchronize_sites_with_db(cursor, languages, query1, query2, query3) | |||
synchronize_sites_with_db(cursor, projects, query4, query5, query6) | |||
cursor.execute(query7, ("sites",)) | |||
if cursor.fetchall(): | |||
cursor.execute(query8, (time(), "sites")) | |||
else: | |||
cursor.execute(query9, ("sites", time())) | |||
def synchronize_sites_with_db(cursor, updates, q_list, q_rmv, q_update): | |||
removals = [] | |||
cursor.execute(q_list) | |||
for site in cursor: | |||
updates.remove(site) if site in updates else removals.append(site) | |||
cursor.executemany(q_rmv, removals) | |||
cursor.executemany(q_update, updates) | |||
def highlight_delta(chain, delta): | |||
processed = [] | |||
prev_prev = prev = chain.START | |||
i = 0 | |||
all_words = chain.text.split() | |||
paragraphs = chain.text.split("\n") | |||
for paragraph in paragraphs: | |||
processed_words = [] | |||
words = paragraph.split(" ") | |||
for word, i in zip(words, count(i)): | |||
try: | |||
next = strip_word(all_words[i+1]) | |||
except IndexError: | |||
next = chain.END | |||
sword = strip_word(word) | |||
block = (prev_prev, prev) # Block for before | |||
alock = (prev, sword) # Block for after | |||
before = [block in delta.chain and sword in delta.chain[block]] | |||
after = [alock in delta.chain and next in delta.chain[alock]] | |||
is_first = i == 0 | |||
is_last = i + 1 == len(all_words) | |||
res = highlight_word(word, before, after, is_first, is_last) | |||
processed_words.append(res) | |||
prev_prev = prev | |||
prev = sword | |||
processed.append(u" ".join(processed_words)) | |||
i += 1 | |||
return u"<br /><br />".join(processed) | |||
def highlight_word(word, before, after, is_first, is_last): | |||
if before and after: | |||
# Word is in the middle of a highlighted block, so don't change | |||
# anything unless this is the first word (force block to start) or | |||
# the last word (force block to end): | |||
res = word | |||
if is_first: | |||
res = u'<span class="cv-hl">' + res | |||
if is_last: | |||
res += u'</span>' | |||
elif before: | |||
# Word is the last in a highlighted block, so fade it out and then | |||
# end the block; force open a block before the word if this is the | |||
# first word: | |||
res = fade_word(word, u"out") + u"</span>" | |||
if is_first: | |||
res = u'<span class="cv-hl">' + res | |||
elif after: | |||
# Word is the first in a highlighted block, so start the block and | |||
# then fade it in; force close the block after the word if this is | |||
# the last word: | |||
res = u'<span class="cv-hl">' + fade_word(word, u"in") | |||
if is_last: | |||
res += u"</span>" | |||
else: | |||
# Word is completely outside of a highlighted block, so do nothing: | |||
res = word | |||
return res | |||
def fade_word(word, dir): | |||
if len(word) <= 4: | |||
return u'<span class="cv-hl-{0}">{1}</span>'.format(dir, word) | |||
if dir == u"out": | |||
return u'{0}<span class="cv-hl-out">{1}</span>'.format(word[:-4], word[-4:]) | |||
return u'<span class="cv-hl-in">{0}</span>{1}'.format(word[:4], word[4:]) | |||
def strip_word(word): | |||
return sub("[^\w\s-]", "", word.lower(), flags=UNICODE) | |||
def urlstrip(url): | |||
if url.startswith("http://"): | |||
url = url[7:] | |||
if url.startswith("https://"): | |||
url = url[8:] | |||
if url.startswith("www."): | |||
url = url[4:] | |||
if url.endswith("/"): | |||
url = url[:-1] | |||
return url | |||
%>\ | |||
<%namespace file="/support/copyvios/__init__.py" import="get_results, highlight_delta"/>\ | |||
<%namespace file="/support/sites.py" import="get_site, get_sites"/>\ | |||
<%namespace file="/support/misc.py" import="urlstrip"/>\ | |||
<% | |||
lang = orig_lang = project = name = title = url = None | |||
query = parse_qs(environ["QUERY_STRING"]) | |||
@@ -0,0 +1,4 @@ | |||
# -*- coding: utf-8 -*- | |||
from .checker import get_results | |||
from .highlighter import highlight_delta |
@@ -0,0 +1,96 @@ | |||
# -*- coding: utf-8 -*- | |||
from datetime import datetime | |||
from hashlib import sha256 | |||
from time import time | |||
from earwigbot import exceptions | |||
def get_results(bot, lang, project, name, all_projects, title, url, query): | |||
site = get_site(bot, lang, project, name, all_projects) | |||
if not site: | |||
return None, None, None | |||
page = site.get_page(title) | |||
try: | |||
page.get() # Make sure that the page exists before we check it! | |||
except (exceptions.PageNotFoundError, exceptions.InvalidPageError): | |||
return site, page, None | |||
# if url: | |||
# result = get_url_specific_results(page, url) | |||
# else: | |||
# conn = open_sql_connection(bot, "copyvioCache") | |||
# if not query.get("nocache"): | |||
# result = get_cached_results(page, conn) | |||
# if query.get("nocache") or not result: | |||
# result = get_fresh_results(page, conn) | |||
tstart = time() | |||
mc1 = __import__("earwigbot").wiki.copyvios.MarkovChain(page.get()) | |||
mc2 = __import__("earwigbot").wiki.copyvios.MarkovChain(u"This is some random textual content for a page.") | |||
mci = __import__("earwigbot").wiki.copyvios.MarkovChainIntersection(mc1, mc2) | |||
result = __import__("earwigbot").wiki.copyvios.CopyvioCheckResult( | |||
True, 0.67123, "http://example.com/", 7, mc1, (mc2, mci)) | |||
result.cached = False | |||
result.tdiff = time() - tstart | |||
# END TEST BLOCK | |||
return site, page, result | |||
def get_url_specific_results(page, url): | |||
t_start = time() | |||
result = page.copyvio_compare(url) | |||
result.cached = False | |||
result.tdiff = time() - t_start | |||
return result | |||
def get_cached_results(page, conn): | |||
query1 = "DELETE FROM cache WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)" | |||
query2 = "SELECT cache_url, cache_time, cache_queries, cache_process_time FROM cache WHERE cache_id = ? AND cache_hash = ?" | |||
pageid = page.pageid() | |||
hash = sha256(page.get()).hexdigest() | |||
t_start = time() | |||
with conn.cursor() as cursor: | |||
cursor.execute(query1) | |||
cursor.execute(query2, (pageid, hash)) | |||
results = cursor.fetchall() | |||
if not results: | |||
return None | |||
url, cache_time, num_queries, original_tdiff = results[0] | |||
result = page.copyvio_compare(url) | |||
result.cached = True | |||
result.queries = num_queries | |||
result.tdiff = time() - t_start | |||
result.original_tdiff = original_tdiff | |||
result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC") | |||
result.cache_age = format_date(cache_time) | |||
return result | |||
def format_date(cache_time): | |||
diff = datetime.utcnow() - cache_time | |||
if diff.seconds > 3600: | |||
return "{0} hours".format(diff.seconds / 3600) | |||
if diff.seconds > 60: | |||
return "{0} minutes".format(diff.seconds / 60) | |||
return "{0} seconds".format(diff.seconds) | |||
def get_fresh_results(page, conn): | |||
t_start = time() | |||
result = page.copyvio_check(max_queries=10) | |||
result.cached = False | |||
result.tdiff = time() - t_start | |||
cache_result(page, result, conn) | |||
return result | |||
def cache_result(page, result, conn): | |||
pageid = page.pageid() | |||
hash = sha256(page.get()).hexdigest() | |||
query1 = "SELECT 1 FROM cache WHERE cache_id = ?" | |||
query2 = "DELETE FROM cache WHERE cache_id = ?" | |||
query3 = "INSERT INTO cache VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?, ?)" | |||
with conn.cursor() as cursor: | |||
cursor.execute(query1, (pageid,)) | |||
if cursor.fetchall(): | |||
cursor.execute(query2, (pageid,)) | |||
cursor.execute(query3, (pageid, hash, result.url, result.queries, | |||
result.tdiff)) |
@@ -0,0 +1,71 @@ | |||
# -*- coding: utf-8 -*- | |||
from re import sub, UNICODE | |||
def highlight_delta(chain, delta): | |||
processed = [] | |||
prev_prev = prev = chain.START | |||
i = 0 | |||
all_words = chain.text.split() | |||
paragraphs = chain.text.split("\n") | |||
for paragraph in paragraphs: | |||
processed_words = [] | |||
words = paragraph.split(" ") | |||
for i, word in enumerate(words, i) | |||
try: | |||
next = strip_word(all_words[i+1]) | |||
except IndexError: | |||
next = chain.END | |||
sword = strip_word(word) | |||
block = (prev_prev, prev) # Block for before | |||
alock = (prev, sword) # Block for after | |||
before = [block in delta.chain and sword in delta.chain[block]] | |||
after = [alock in delta.chain and next in delta.chain[alock]] | |||
is_first = i == 0 | |||
is_last = i + 1 == len(all_words) | |||
res = highlight_word(word, before, after, is_first, is_last) | |||
processed_words.append(res) | |||
prev_prev = prev | |||
prev = sword | |||
processed.append(u" ".join(processed_words)) | |||
i += 1 | |||
return u"<br /><br />".join(processed) | |||
def highlight_word(word, before, after, is_first, is_last): | |||
if before and after: | |||
# Word is in the middle of a highlighted block, so don't change | |||
# anything unless this is the first word (force block to start) or | |||
# the last word (force block to end): | |||
res = word | |||
if is_first: | |||
res = u'<span class="cv-hl">' + res | |||
if is_last: | |||
res += u'</span>' | |||
elif before: | |||
# Word is the last in a highlighted block, so fade it out and then | |||
# end the block; force open a block before the word if this is the | |||
# first word: | |||
res = fade_word(word, u"out") + u"</span>" | |||
if is_first: | |||
res = u'<span class="cv-hl">' + res | |||
elif after: | |||
# Word is the first in a highlighted block, so start the block and | |||
# then fade it in; force close the block after the word if this is | |||
# the last word: | |||
res = u'<span class="cv-hl">' + fade_word(word, u"in") | |||
if is_last: | |||
res += u"</span>" | |||
else: | |||
# Word is completely outside of a highlighted block, so do nothing: | |||
res = word | |||
return res | |||
def fade_word(word, dir): | |||
if len(word) <= 4: | |||
return u'<span class="cv-hl-{0}">{1}</span>'.format(dir, word) | |||
if dir == u"out": | |||
return u'{0}<span class="cv-hl-out">{1}</span>'.format(word[:-4], word[-4:]) | |||
return u'<span class="cv-hl-in">{0}</span>{1}'.format(word[:4], word[4:]) | |||
def strip_word(word): | |||
return sub("[^\w\s-]", "", word.lower(), flags=UNICODE) |
@@ -0,0 +1,26 @@ | |||
# -*- coding: utf-8 -*- | |||
from os.path import expanduser | |||
import oursql | |||
def open_sql_connection(bot, dbname): | |||
conn_args = bot.config.wiki["_toolserverSQL"][dbname] | |||
if "read_default_file" not in conn_args and "user" not in conn_args and "passwd" not in conn_args: | |||
conn_args["read_default_file"] = expanduser("~/.my.cnf") | |||
if "autoping" not in conn_args: | |||
conn_args["autoping"] = True | |||
if "autoreconnect" not in conn_args: | |||
conn_args["autoreconnect"] = True | |||
return oursql.connect(**conn_args) | |||
def urlstrip(context, url): | |||
if url.startswith("http://"): | |||
url = url[7:] | |||
if url.startswith("https://"): | |||
url = url[8:] | |||
if url.startswith("www."): | |||
url = url[4:] | |||
if url.endswith("/"): | |||
url = url[:-1] | |||
return url |
@@ -0,0 +1,110 @@ | |||
# -*- coding: utf-8 -*- | |||
from time import time | |||
from urlparse import urlparse | |||
from earwigbot import exceptions | |||
def get_site(bot, lang, project, name, all_projects): | |||
if project not in [proj[0] for proj in all_projects]: | |||
return None | |||
if project == "wikimedia" and name: # Special sites: | |||
try: | |||
return bot.wiki.get_site(name=name) | |||
except exceptions.SiteNotFoundError: | |||
try: | |||
return bot.wiki.add_site(lang=lang, project=project) | |||
except (exceptions.APIError, exceptions.LoginError): | |||
return None | |||
try: | |||
return bot.wiki.get_site(lang=lang, project=project) | |||
except exceptions.SiteNotFoundError: | |||
try: | |||
return bot.wiki.add_site(lang=lang, project=project) | |||
except (exceptions.APIError, exceptions.LoginError): | |||
return None | |||
def get_sites(bot): | |||
max_staleness = 60 * 60 * 24 * 7 | |||
conn = open_sql_connection(bot, "globals") | |||
query1 = "SELECT update_time FROM updates WHERE update_service = ?" | |||
query2 = "SELECT lang_code, lang_name FROM language" | |||
query3 = "SELECT project_code, project_name FROM project" | |||
with conn.cursor() as cursor: | |||
cursor.execute(query1, ("sites",)) | |||
try: | |||
time_since_update = int(time() - cursor.fetchall()[0][0]) | |||
except IndexError: | |||
time_since_update = time() | |||
if time_since_update > max_staleness: | |||
update_sites(bot.wiki.get_site(), cursor) | |||
cursor.execute(query2) | |||
langs = [] | |||
for code, name in cursor.fetchall(): | |||
if "\U" in name: | |||
name = name.decode("unicode_escape") | |||
langs.append((code, name)) | |||
cursor.execute(query3) | |||
projects = cursor.fetchall() | |||
return langs, projects | |||
def update_sites(site, cursor): | |||
matrix = site.api_query(action="sitematrix")["sitematrix"] | |||
del matrix["count"] | |||
languages, projects = set(), set() | |||
for site in matrix.itervalues(): | |||
if isinstance(site, list): # Special sites | |||
bad_sites = ["closed", "private", "fishbowl"] | |||
for special in site: | |||
if all([key not in special for key in bad_sites]): | |||
full = urlparse(special["url"]).netloc | |||
if full.count(".") == 1: # No subdomain, so use "www" | |||
lang, project = "www", full.split(".")[0] | |||
else: | |||
lang, project = full.rsplit(".", 2)[:2] | |||
code = u"{0}::{1}".format(lang, special["dbname"]) | |||
name = special["code"].capitalize() | |||
languages.add((code, u"{0} ({1})".format(lang, name))) | |||
projects.add((project, project.capitalize())) | |||
continue | |||
this = set() | |||
for web in site["site"]: | |||
if "closed" in web: | |||
continue | |||
project = "wikipedia" if web["code"] == u"wiki" else web["code"] | |||
this.add((project, project.capitalize())) | |||
if this: | |||
code = site["code"] | |||
if "\U" in site["name"].encode("unicode_escape"): | |||
name = site["name"].encode("unicode_escape") | |||
else: | |||
name = site["name"] | |||
languages.add((code, u"{0} ({1})".format(code, name))) | |||
projects |= this | |||
save_site_updates(cursor, languages, projects) | |||
def save_site_updates(cursor, languages, projects): | |||
query1 = "SELECT lang_code, lang_name FROM language" | |||
query2 = "DELETE FROM language WHERE lang_code = ? AND lang_name = ?" | |||
query3 = "INSERT INTO language VALUES (?, ?)" | |||
query4 = "SELECT project_code, project_name FROM project" | |||
query5 = "DELETE FROM project WHERE project_code = ? AND project_name = ?" | |||
query6 = "INSERT INTO project VALUES (?, ?)" | |||
query7 = "SELECT 1 FROM updates WHERE update_service = ?" | |||
query8 = "UPDATE updates SET update_time = ? WHERE update_service = ?" | |||
query9 = "INSERT INTO updates VALUES (?, ?)" | |||
synchronize_sites_with_db(cursor, languages, query1, query2, query3) | |||
synchronize_sites_with_db(cursor, projects, query4, query5, query6) | |||
cursor.execute(query7, ("sites",)) | |||
if cursor.fetchall(): | |||
cursor.execute(query8, (time(), "sites")) | |||
else: | |||
cursor.execute(query9, ("sites", time())) | |||
def synchronize_sites_with_db(cursor, updates, q_list, q_rmv, q_update): | |||
removals = [] | |||
cursor.execute(q_list) | |||
for site in cursor: | |||
updates.remove(site) if site in updates else removals.append(site) | |||
cursor.executemany(q_rmv, removals) | |||
cursor.executemany(q_update, updates) |