@@ -0,0 +1 @@ | |||
# -*- coding: utf-8 -*- |
@@ -16,13 +16,13 @@ def get_results(context, bot, site, title, url, query): | |||
return page, None | |||
# if url: | |||
# result = get_url_specific_results(page, url) | |||
# result = _get_url_specific_results(page, url) | |||
# else: | |||
# conn = open_sql_connection(bot, "copyvioCache") | |||
# if not query.get("nocache"): | |||
# result = get_cached_results(page, conn) | |||
# result = _get_cached_results(page, conn) | |||
# if query.get("nocache") or not result: | |||
# result = get_fresh_results(page, conn) | |||
# result = _get_fresh_results(page, conn) | |||
tstart = time() | |||
mc1 = __import__("earwigbot").wiki.copyvios.MarkovChain(page.get()) | |||
mc2 = __import__("earwigbot").wiki.copyvios.MarkovChain(u"This is some random textual content for a page.") | |||
@@ -34,14 +34,14 @@ def get_results(context, bot, site, title, url, query): | |||
# END TEST BLOCK | |||
return page, result | |||
def get_url_specific_results(page, url): | |||
def _get_url_specific_results(page, url): | |||
t_start = time() | |||
result = page.copyvio_compare(url) | |||
result.cached = False | |||
result.tdiff = time() - t_start | |||
return result | |||
def get_cached_results(page, conn): | |||
def _get_cached_results(page, conn): | |||
query1 = "DELETE FROM cache WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)" | |||
query2 = "SELECT cache_url, cache_time, cache_queries, cache_process_time FROM cache WHERE cache_id = ? AND cache_hash = ?" | |||
pageid = page.pageid() | |||
@@ -62,10 +62,10 @@ def get_cached_results(page, conn): | |||
result.tdiff = time() - t_start | |||
result.original_tdiff = original_tdiff | |||
result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC") | |||
result.cache_age = format_date(cache_time) | |||
result.cache_age = _format_date(cache_time) | |||
return result | |||
def format_date(cache_time): | |||
def _format_date(cache_time): | |||
diff = datetime.utcnow() - cache_time | |||
if diff.seconds > 3600: | |||
return "{0} hours".format(diff.seconds / 3600) | |||
@@ -73,15 +73,15 @@ def format_date(cache_time): | |||
return "{0} minutes".format(diff.seconds / 60) | |||
return "{0} seconds".format(diff.seconds) | |||
def get_fresh_results(page, conn): | |||
def _get_fresh_results(page, conn): | |||
t_start = time() | |||
result = page.copyvio_check(max_queries=10) | |||
result.cached = False | |||
result.tdiff = time() - t_start | |||
cache_result(page, result, conn) | |||
_cache_result(page, result, conn) | |||
return result | |||
def cache_result(page, result, conn): | |||
def _cache_result(page, result, conn): | |||
pageid = page.pageid() | |||
hash = sha256(page.get()).hexdigest() | |||
query1 = "SELECT 1 FROM cache WHERE cache_id = ?" | |||
@@ -13,17 +13,17 @@ def highlight_delta(context, chain, delta): | |||
words = paragraph.split(" ") | |||
for i, word in enumerate(words, i): | |||
try: | |||
next = strip_word(all_words[i+1]) | |||
next = _strip_word(all_words[i+1]) | |||
except IndexError: | |||
next = chain.END | |||
sword = strip_word(word) | |||
sword = _strip_word(word) | |||
block = (prev_prev, prev) # Block for before | |||
alock = (prev, sword) # Block for after | |||
before = [block in delta.chain and sword in delta.chain[block]] | |||
after = [alock in delta.chain and next in delta.chain[alock]] | |||
is_first = i == 0 | |||
is_last = i + 1 == len(all_words) | |||
res = highlight_word(word, before, after, is_first, is_last) | |||
res = _highlight_word(word, before, after, is_first, is_last) | |||
processed_words.append(res) | |||
prev_prev = prev | |||
prev = sword | |||
@@ -31,7 +31,7 @@ def highlight_delta(context, chain, delta): | |||
i += 1 | |||
return u"<br /><br />".join(processed) | |||
def highlight_word(word, before, after, is_first, is_last): | |||
def _highlight_word(word, before, after, is_first, is_last): | |||
if before and after: | |||
# Word is in the middle of a highlighted block, so don't change | |||
# anything unless this is the first word (force block to start) or | |||
@@ -45,14 +45,14 @@ def highlight_word(word, before, after, is_first, is_last): | |||
# Word is the last in a highlighted block, so fade it out and then | |||
# end the block; force open a block before the word if this is the | |||
# first word: | |||
res = fade_word(word, u"out") + u"</span>" | |||
res = _fade_word(word, u"out") + u"</span>" | |||
if is_first: | |||
res = u'<span class="cv-hl">' + res | |||
elif after: | |||
# Word is the first in a highlighted block, so start the block and | |||
# then fade it in; force close the block after the word if this is | |||
# the last word: | |||
res = u'<span class="cv-hl">' + fade_word(word, u"in") | |||
res = u'<span class="cv-hl">' + _fade_word(word, u"in") | |||
if is_last: | |||
res += u"</span>" | |||
else: | |||
@@ -60,12 +60,12 @@ def highlight_word(word, before, after, is_first, is_last): | |||
res = word | |||
return res | |||
def fade_word(word, dir): | |||
def _fade_word(word, dir): | |||
if len(word) <= 4: | |||
return u'<span class="cv-hl-{0}">{1}</span>'.format(dir, word) | |||
if dir == u"out": | |||
return u'{0}<span class="cv-hl-out">{1}</span>'.format(word[:-4], word[-4:]) | |||
return u'<span class="cv-hl-in">{0}</span>{1}'.format(word[:4], word[4:]) | |||
def strip_word(word): | |||
def _strip_word(word): | |||
return sub("[^\w\s-]", "", word.lower(), flags=UNICODE) |
@@ -39,7 +39,7 @@ def get_sites(context, bot): | |||
except IndexError: | |||
time_since_update = time() | |||
if time_since_update > max_staleness: | |||
update_sites(bot.wiki.get_site(), cursor) | |||
_update_sites(bot.wiki.get_site(), cursor) | |||
cursor.execute(query2) | |||
langs = [] | |||
for code, name in cursor.fetchall(): | |||
@@ -50,7 +50,7 @@ def get_sites(context, bot): | |||
projects = cursor.fetchall() | |||
return langs, projects | |||
def update_sites(site, cursor): | |||
def _update_sites(site, cursor): | |||
matrix = site.api_query(action="sitematrix")["sitematrix"] | |||
del matrix["count"] | |||
languages, projects = set(), set() | |||
@@ -83,9 +83,9 @@ def update_sites(site, cursor): | |||
name = site["name"] | |||
languages.add((code, u"{0} ({1})".format(code, name))) | |||
projects |= this | |||
save_site_updates(cursor, languages, projects) | |||
_save_site_updates(cursor, languages, projects) | |||
def save_site_updates(cursor, languages, projects): | |||
def _save_site_updates(cursor, languages, projects): | |||
query1 = "SELECT lang_code, lang_name FROM language" | |||
query2 = "DELETE FROM language WHERE lang_code = ? AND lang_name = ?" | |||
query3 = "INSERT INTO language VALUES (?, ?)" | |||
@@ -95,15 +95,15 @@ def save_site_updates(cursor, languages, projects): | |||
query7 = "SELECT 1 FROM updates WHERE update_service = ?" | |||
query8 = "UPDATE updates SET update_time = ? WHERE update_service = ?" | |||
query9 = "INSERT INTO updates VALUES (?, ?)" | |||
synchronize_sites_with_db(cursor, languages, query1, query2, query3) | |||
synchronize_sites_with_db(cursor, projects, query4, query5, query6) | |||
_synchronize_sites_with_db(cursor, languages, query1, query2, query3) | |||
_synchronize_sites_with_db(cursor, projects, query4, query5, query6) | |||
cursor.execute(query7, ("sites",)) | |||
if cursor.fetchall(): | |||
cursor.execute(query8, (time(), "sites")) | |||
else: | |||
cursor.execute(query9, ("sites", time())) | |||
def synchronize_sites_with_db(cursor, updates, q_list, q_rmv, q_update): | |||
def _synchronize_sites_with_db(cursor, updates, q_list, q_rmv, q_update): | |||
removals = [] | |||
cursor.execute(q_list) | |||
for site in cursor: | |||