diff --git a/pages/support/__init__.py b/pages/support/__init__.py new file mode 100644 index 0000000..89907bf --- /dev/null +++ b/pages/support/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/pages/support/copyvios/checker.py b/pages/support/copyvios/checker.py index 048a0fa..5731ac7 100644 --- a/pages/support/copyvios/checker.py +++ b/pages/support/copyvios/checker.py @@ -16,13 +16,13 @@ def get_results(context, bot, site, title, url, query): return page, None # if url: - # result = get_url_specific_results(page, url) + # result = _get_url_specific_results(page, url) # else: # conn = open_sql_connection(bot, "copyvioCache") # if not query.get("nocache"): - # result = get_cached_results(page, conn) + # result = _get_cached_results(page, conn) # if query.get("nocache") or not result: - # result = get_fresh_results(page, conn) + # result = _get_fresh_results(page, conn) tstart = time() mc1 = __import__("earwigbot").wiki.copyvios.MarkovChain(page.get()) mc2 = __import__("earwigbot").wiki.copyvios.MarkovChain(u"This is some random textual content for a page.") @@ -34,14 +34,14 @@ def get_results(context, bot, site, title, url, query): # END TEST BLOCK return page, result -def get_url_specific_results(page, url): +def _get_url_specific_results(page, url): t_start = time() result = page.copyvio_compare(url) result.cached = False result.tdiff = time() - t_start return result -def get_cached_results(page, conn): +def _get_cached_results(page, conn): query1 = "DELETE FROM cache WHERE cache_time < DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 3 DAY)" query2 = "SELECT cache_url, cache_time, cache_queries, cache_process_time FROM cache WHERE cache_id = ? AND cache_hash = ?" pageid = page.pageid() @@ -62,10 +62,10 @@ def get_cached_results(page, conn): result.tdiff = time() - t_start result.original_tdiff = original_tdiff result.cache_time = cache_time.strftime("%b %d, %Y %H:%M:%S UTC") - result.cache_age = format_date(cache_time) + result.cache_age = _format_date(cache_time) return result -def format_date(cache_time): +def _format_date(cache_time): diff = datetime.utcnow() - cache_time if diff.seconds > 3600: return "{0} hours".format(diff.seconds / 3600) @@ -73,15 +73,15 @@ def format_date(cache_time): return "{0} minutes".format(diff.seconds / 60) return "{0} seconds".format(diff.seconds) -def get_fresh_results(page, conn): +def _get_fresh_results(page, conn): t_start = time() result = page.copyvio_check(max_queries=10) result.cached = False result.tdiff = time() - t_start - cache_result(page, result, conn) + _cache_result(page, result, conn) return result -def cache_result(page, result, conn): +def _cache_result(page, result, conn): pageid = page.pageid() hash = sha256(page.get()).hexdigest() query1 = "SELECT 1 FROM cache WHERE cache_id = ?" diff --git a/pages/support/copyvios/highlighter.py b/pages/support/copyvios/highlighter.py index df9f9ba..0c0b17e 100644 --- a/pages/support/copyvios/highlighter.py +++ b/pages/support/copyvios/highlighter.py @@ -13,17 +13,17 @@ def highlight_delta(context, chain, delta): words = paragraph.split(" ") for i, word in enumerate(words, i): try: - next = strip_word(all_words[i+1]) + next = _strip_word(all_words[i+1]) except IndexError: next = chain.END - sword = strip_word(word) + sword = _strip_word(word) block = (prev_prev, prev) # Block for before alock = (prev, sword) # Block for after before = [block in delta.chain and sword in delta.chain[block]] after = [alock in delta.chain and next in delta.chain[alock]] is_first = i == 0 is_last = i + 1 == len(all_words) - res = highlight_word(word, before, after, is_first, is_last) + res = _highlight_word(word, before, after, is_first, is_last) processed_words.append(res) prev_prev = prev prev = sword @@ -31,7 +31,7 @@ def highlight_delta(context, chain, delta): i += 1 return u"

".join(processed) -def highlight_word(word, before, after, is_first, is_last): +def _highlight_word(word, before, after, is_first, is_last): if before and after: # Word is in the middle of a highlighted block, so don't change # anything unless this is the first word (force block to start) or @@ -45,14 +45,14 @@ def highlight_word(word, before, after, is_first, is_last): # Word is the last in a highlighted block, so fade it out and then # end the block; force open a block before the word if this is the # first word: - res = fade_word(word, u"out") + u"" + res = _fade_word(word, u"out") + u"" if is_first: res = u'' + res elif after: # Word is the first in a highlighted block, so start the block and # then fade it in; force close the block after the word if this is # the last word: - res = u'' + fade_word(word, u"in") + res = u'' + _fade_word(word, u"in") if is_last: res += u"" else: @@ -60,12 +60,12 @@ def highlight_word(word, before, after, is_first, is_last): res = word return res -def fade_word(word, dir): +def _fade_word(word, dir): if len(word) <= 4: return u'{1}'.format(dir, word) if dir == u"out": return u'{0}{1}'.format(word[:-4], word[-4:]) return u'{0}{1}'.format(word[:4], word[4:]) -def strip_word(word): +def _strip_word(word): return sub("[^\w\s-]", "", word.lower(), flags=UNICODE) diff --git a/pages/support/sites.py b/pages/support/sites.py index 9da2a95..75896da 100644 --- a/pages/support/sites.py +++ b/pages/support/sites.py @@ -39,7 +39,7 @@ def get_sites(context, bot): except IndexError: time_since_update = time() if time_since_update > max_staleness: - update_sites(bot.wiki.get_site(), cursor) + _update_sites(bot.wiki.get_site(), cursor) cursor.execute(query2) langs = [] for code, name in cursor.fetchall(): @@ -50,7 +50,7 @@ def get_sites(context, bot): projects = cursor.fetchall() return langs, projects -def update_sites(site, cursor): +def _update_sites(site, cursor): matrix = site.api_query(action="sitematrix")["sitematrix"] del matrix["count"] languages, projects = set(), set() @@ -83,9 +83,9 @@ def update_sites(site, cursor): name = site["name"] languages.add((code, u"{0} ({1})".format(code, name))) projects |= this - save_site_updates(cursor, languages, projects) + _save_site_updates(cursor, languages, projects) -def save_site_updates(cursor, languages, projects): +def _save_site_updates(cursor, languages, projects): query1 = "SELECT lang_code, lang_name FROM language" query2 = "DELETE FROM language WHERE lang_code = ? AND lang_name = ?" query3 = "INSERT INTO language VALUES (?, ?)" @@ -95,15 +95,15 @@ def save_site_updates(cursor, languages, projects): query7 = "SELECT 1 FROM updates WHERE update_service = ?" query8 = "UPDATE updates SET update_time = ? WHERE update_service = ?" query9 = "INSERT INTO updates VALUES (?, ?)" - synchronize_sites_with_db(cursor, languages, query1, query2, query3) - synchronize_sites_with_db(cursor, projects, query4, query5, query6) + _synchronize_sites_with_db(cursor, languages, query1, query2, query3) + _synchronize_sites_with_db(cursor, projects, query4, query5, query6) cursor.execute(query7, ("sites",)) if cursor.fetchall(): cursor.execute(query8, (time(), "sites")) else: cursor.execute(query9, ("sites", time())) -def synchronize_sites_with_db(cursor, updates, q_list, q_rmv, q_update): +def _synchronize_sites_with_db(cursor, updates, q_list, q_rmv, q_update): removals = [] cursor.execute(q_list) for site in cursor: