@@ -0,0 +1 @@ | |||||
# -*- coding: utf-8 -*- |
@@ -16,13 +16,13 @@ def get_results(context, bot, site, title, url, query): | |||||
return page, None | return page, None | ||||
# if url: | # if url: | ||||
# result = get_url_specific_results(page, url) | |||||
# result = _get_url_specific_results(page, url) | |||||
# else: | # else: | ||||
# conn = open_sql_connection(bot, "copyvioCache") | # conn = open_sql_connection(bot, "copyvioCache") | ||||
# if not query.get("nocache"): | # if not query.get("nocache"): | ||||
# result = get_cached_results(page, conn) | |||||
# result = _get_cached_results(page, conn) | |||||
# if query.get("nocache") or not result: | # if query.get("nocache") or not result: | ||||
# result = get_fresh_results(page, conn) | |||||
# result = _get_fresh_results(page, conn) | |||||
tstart = time() | tstart = time() | ||||
mc1 = __import__("earwigbot").wiki.copyvios.MarkovChain(page.get()) | mc1 = __import__("earwigbot").wiki.copyvios.MarkovChain(page.get()) | ||||
mc2 = __import__("earwigbot").wiki.copyvios.MarkovChain(u"This is some random textual content for a page.") | mc2 = __import__("earwigbot").wiki.copyvios.MarkovChain(u"This is some random textual content for a page.") | ||||
@@ -34,14 +34,14 @@ def get_results(context, bot, site, title, url, query): | |||||
# END TEST BLOCK | # END TEST BLOCK | ||||
return page, result | return page, result | ||||
def get_url_specific_results(page, url): | |||||
def _get_url_specific_results(page, url): | |||||
t_start = time() | t_start = time() | ||||
result = page.copyvio_compare(url) | result = page.copyvio_compare(url) | ||||
result.cached = False | result.cached = False | ||||
result.tdiff = time() - t_start | result.tdiff = time() - t_start | ||||
return result | return result | ||||
def get_cached_results(page, conn): | |||||
def _get_cached_results(page, conn): | |||||
query1 = "DELETE FROM cache WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)" | query1 = "DELETE FROM cache WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)" | ||||
query2 = "SELECT cache_url, cache_time, cache_queries, cache_process_time FROM cache WHERE cache_id = ? AND cache_hash = ?" | query2 = "SELECT cache_url, cache_time, cache_queries, cache_process_time FROM cache WHERE cache_id = ? AND cache_hash = ?" | ||||
pageid = page.pageid() | pageid = page.pageid() | ||||
@@ -62,10 +62,10 @@ def get_cached_results(page, conn): | |||||
result.tdiff = time() - t_start | result.tdiff = time() - t_start | ||||
result.original_tdiff = original_tdiff | result.original_tdiff = original_tdiff | ||||
result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC") | result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC") | ||||
result.cache_age = format_date(cache_time) | |||||
result.cache_age = _format_date(cache_time) | |||||
return result | return result | ||||
def format_date(cache_time): | |||||
def _format_date(cache_time): | |||||
diff = datetime.utcnow() - cache_time | diff = datetime.utcnow() - cache_time | ||||
if diff.seconds > 3600: | if diff.seconds > 3600: | ||||
return "{0} hours".format(diff.seconds / 3600) | return "{0} hours".format(diff.seconds / 3600) | ||||
@@ -73,15 +73,15 @@ def format_date(cache_time): | |||||
return "{0} minutes".format(diff.seconds / 60) | return "{0} minutes".format(diff.seconds / 60) | ||||
return "{0} seconds".format(diff.seconds) | return "{0} seconds".format(diff.seconds) | ||||
def get_fresh_results(page, conn): | |||||
def _get_fresh_results(page, conn): | |||||
t_start = time() | t_start = time() | ||||
result = page.copyvio_check(max_queries=10) | result = page.copyvio_check(max_queries=10) | ||||
result.cached = False | result.cached = False | ||||
result.tdiff = time() - t_start | result.tdiff = time() - t_start | ||||
cache_result(page, result, conn) | |||||
_cache_result(page, result, conn) | |||||
return result | return result | ||||
def cache_result(page, result, conn): | |||||
def _cache_result(page, result, conn): | |||||
pageid = page.pageid() | pageid = page.pageid() | ||||
hash = sha256(page.get()).hexdigest() | hash = sha256(page.get()).hexdigest() | ||||
query1 = "SELECT 1 FROM cache WHERE cache_id = ?" | query1 = "SELECT 1 FROM cache WHERE cache_id = ?" | ||||
@@ -13,17 +13,17 @@ def highlight_delta(context, chain, delta): | |||||
words = paragraph.split(" ") | words = paragraph.split(" ") | ||||
for i, word in enumerate(words, i): | for i, word in enumerate(words, i): | ||||
try: | try: | ||||
next = strip_word(all_words[i+1]) | |||||
next = _strip_word(all_words[i+1]) | |||||
except IndexError: | except IndexError: | ||||
next = chain.END | next = chain.END | ||||
sword = strip_word(word) | |||||
sword = _strip_word(word) | |||||
block = (prev_prev, prev) # Block for before | block = (prev_prev, prev) # Block for before | ||||
alock = (prev, sword) # Block for after | alock = (prev, sword) # Block for after | ||||
before = [block in delta.chain and sword in delta.chain[block]] | before = [block in delta.chain and sword in delta.chain[block]] | ||||
after = [alock in delta.chain and next in delta.chain[alock]] | after = [alock in delta.chain and next in delta.chain[alock]] | ||||
is_first = i == 0 | is_first = i == 0 | ||||
is_last = i + 1 == len(all_words) | is_last = i + 1 == len(all_words) | ||||
res = highlight_word(word, before, after, is_first, is_last) | |||||
res = _highlight_word(word, before, after, is_first, is_last) | |||||
processed_words.append(res) | processed_words.append(res) | ||||
prev_prev = prev | prev_prev = prev | ||||
prev = sword | prev = sword | ||||
@@ -31,7 +31,7 @@ def highlight_delta(context, chain, delta): | |||||
i += 1 | i += 1 | ||||
return u"<br /><br />".join(processed) | return u"<br /><br />".join(processed) | ||||
def highlight_word(word, before, after, is_first, is_last): | |||||
def _highlight_word(word, before, after, is_first, is_last): | |||||
if before and after: | if before and after: | ||||
# Word is in the middle of a highlighted block, so don't change | # Word is in the middle of a highlighted block, so don't change | ||||
# anything unless this is the first word (force block to start) or | # anything unless this is the first word (force block to start) or | ||||
@@ -45,14 +45,14 @@ def highlight_word(word, before, after, is_first, is_last): | |||||
# Word is the last in a highlighted block, so fade it out and then | # Word is the last in a highlighted block, so fade it out and then | ||||
# end the block; force open a block before the word if this is the | # end the block; force open a block before the word if this is the | ||||
# first word: | # first word: | ||||
res = fade_word(word, u"out") + u"</span>" | |||||
res = _fade_word(word, u"out") + u"</span>" | |||||
if is_first: | if is_first: | ||||
res = u'<span class="cv-hl">' + res | res = u'<span class="cv-hl">' + res | ||||
elif after: | elif after: | ||||
# Word is the first in a highlighted block, so start the block and | # Word is the first in a highlighted block, so start the block and | ||||
# then fade it in; force close the block after the word if this is | # then fade it in; force close the block after the word if this is | ||||
# the last word: | # the last word: | ||||
res = u'<span class="cv-hl">' + fade_word(word, u"in") | |||||
res = u'<span class="cv-hl">' + _fade_word(word, u"in") | |||||
if is_last: | if is_last: | ||||
res += u"</span>" | res += u"</span>" | ||||
else: | else: | ||||
@@ -60,12 +60,12 @@ def highlight_word(word, before, after, is_first, is_last): | |||||
res = word | res = word | ||||
return res | return res | ||||
def fade_word(word, dir): | |||||
def _fade_word(word, dir): | |||||
if len(word) <= 4: | if len(word) <= 4: | ||||
return u'<span class="cv-hl-{0}">{1}</span>'.format(dir, word) | return u'<span class="cv-hl-{0}">{1}</span>'.format(dir, word) | ||||
if dir == u"out": | if dir == u"out": | ||||
return u'{0}<span class="cv-hl-out">{1}</span>'.format(word[:-4], word[-4:]) | return u'{0}<span class="cv-hl-out">{1}</span>'.format(word[:-4], word[-4:]) | ||||
return u'<span class="cv-hl-in">{0}</span>{1}'.format(word[:4], word[4:]) | return u'<span class="cv-hl-in">{0}</span>{1}'.format(word[:4], word[4:]) | ||||
def strip_word(word): | |||||
def _strip_word(word): | |||||
return sub("[^\w\s-]", "", word.lower(), flags=UNICODE) | return sub("[^\w\s-]", "", word.lower(), flags=UNICODE) |
@@ -39,7 +39,7 @@ def get_sites(context, bot): | |||||
except IndexError: | except IndexError: | ||||
time_since_update = time() | time_since_update = time() | ||||
if time_since_update > max_staleness: | if time_since_update > max_staleness: | ||||
update_sites(bot.wiki.get_site(), cursor) | |||||
_update_sites(bot.wiki.get_site(), cursor) | |||||
cursor.execute(query2) | cursor.execute(query2) | ||||
langs = [] | langs = [] | ||||
for code, name in cursor.fetchall(): | for code, name in cursor.fetchall(): | ||||
@@ -50,7 +50,7 @@ def get_sites(context, bot): | |||||
projects = cursor.fetchall() | projects = cursor.fetchall() | ||||
return langs, projects | return langs, projects | ||||
def update_sites(site, cursor): | |||||
def _update_sites(site, cursor): | |||||
matrix = site.api_query(action="sitematrix")["sitematrix"] | matrix = site.api_query(action="sitematrix")["sitematrix"] | ||||
del matrix["count"] | del matrix["count"] | ||||
languages, projects = set(), set() | languages, projects = set(), set() | ||||
@@ -83,9 +83,9 @@ def update_sites(site, cursor): | |||||
name = site["name"] | name = site["name"] | ||||
languages.add((code, u"{0} ({1})".format(code, name))) | languages.add((code, u"{0} ({1})".format(code, name))) | ||||
projects |= this | projects |= this | ||||
save_site_updates(cursor, languages, projects) | |||||
_save_site_updates(cursor, languages, projects) | |||||
def save_site_updates(cursor, languages, projects): | |||||
def _save_site_updates(cursor, languages, projects): | |||||
query1 = "SELECT lang_code, lang_name FROM language" | query1 = "SELECT lang_code, lang_name FROM language" | ||||
query2 = "DELETE FROM language WHERE lang_code = ? AND lang_name = ?" | query2 = "DELETE FROM language WHERE lang_code = ? AND lang_name = ?" | ||||
query3 = "INSERT INTO language VALUES (?, ?)" | query3 = "INSERT INTO language VALUES (?, ?)" | ||||
@@ -95,15 +95,15 @@ def save_site_updates(cursor, languages, projects): | |||||
query7 = "SELECT 1 FROM updates WHERE update_service = ?" | query7 = "SELECT 1 FROM updates WHERE update_service = ?" | ||||
query8 = "UPDATE updates SET update_time = ? WHERE update_service = ?" | query8 = "UPDATE updates SET update_time = ? WHERE update_service = ?" | ||||
query9 = "INSERT INTO updates VALUES (?, ?)" | query9 = "INSERT INTO updates VALUES (?, ?)" | ||||
synchronize_sites_with_db(cursor, languages, query1, query2, query3) | |||||
synchronize_sites_with_db(cursor, projects, query4, query5, query6) | |||||
_synchronize_sites_with_db(cursor, languages, query1, query2, query3) | |||||
_synchronize_sites_with_db(cursor, projects, query4, query5, query6) | |||||
cursor.execute(query7, ("sites",)) | cursor.execute(query7, ("sites",)) | ||||
if cursor.fetchall(): | if cursor.fetchall(): | ||||
cursor.execute(query8, (time(), "sites")) | cursor.execute(query8, (time(), "sites")) | ||||
else: | else: | ||||
cursor.execute(query9, ("sites", time())) | cursor.execute(query9, ("sites", time())) | ||||
def synchronize_sites_with_db(cursor, updates, q_list, q_rmv, q_update): | |||||
def _synchronize_sites_with_db(cursor, updates, q_list, q_rmv, q_update): | |||||
removals = [] | removals = [] | ||||
cursor.execute(q_list) | cursor.execute(q_list) | ||||
for site in cursor: | for site in cursor: | ||||