@@ -53,7 +53,7 @@ def _build_url(screen, filename, url, imgwidth, imgheight): | |||||
if width >= imgwidth: | if width >= imgwidth: | ||||
return url | return url | ||||
url = url.replace("/commons/", "/commons/thumb/") | url = url.replace("/commons/", "/commons/thumb/") | ||||
return "%s/%dpx-%s" % (url, width, urllib.quote(filename)) | |||||
return "%s/%dpx-%s" % (url, width, urllib.quote(filename.encode("utf8"))) | |||||
_BACKGROUNDS = { | _BACKGROUNDS = { | ||||
"potd": _get_fresh_potd, | "potd": _get_fresh_potd, | ||||
@@ -78,8 +78,9 @@ def set_background(selected): | |||||
screen_cache = g.cookies["CopyviosScreenCache"].value | screen_cache = g.cookies["CopyviosScreenCache"].value | ||||
try: | try: | ||||
screen = loads(screen_cache) | screen = loads(screen_cache) | ||||
int(screen["width"]) | |||||
int(screen["height"]) | |||||
screen = {"width": int(screen["width"]), "height": int(screen["height"])} | |||||
if screen["width"] <= 0 or screen["height"] <= 0: | |||||
raise ValueError() | |||||
except (ValueError, KeyError): | except (ValueError, KeyError): | ||||
screen = {"width": 1024, "height": 768} | screen = {"width": 1024, "height": 768} | ||||
else: | else: | ||||
@@ -94,10 +94,16 @@ def _get_results(query, follow=True): | |||||
elif scheme not in ["http", "https"]: | elif scheme not in ["http", "https"]: | ||||
query.error = "bad URI" | query.error = "bad URI" | ||||
return | return | ||||
degree = 5 | |||||
if query.degree: | |||||
try: | |||||
degree = int(query.degree) | |||||
except ValueError: | |||||
pass | |||||
result = page.copyvio_compare(query.url, min_confidence=T_SUSPECT, | result = page.copyvio_compare(query.url, min_confidence=T_SUSPECT, | ||||
max_time=30) | |||||
max_time=10, degree=degree) | |||||
if result.best.chains[0] is EMPTY: | if result.best.chains[0] is EMPTY: | ||||
query.error = "timeout" if result.time > 30 else "no data" | |||||
query.error = "timeout" if result.time > 10 else "no data" | |||||
return | return | ||||
query.result = result | query.result = result | ||||
query.result.cached = False | query.result.cached = False | ||||
@@ -140,7 +146,7 @@ def _perform_check(query, page, use_engine, use_links): | |||||
if not query.result: | if not query.result: | ||||
try: | try: | ||||
query.result = page.copyvio_check( | query.result = page.copyvio_check( | ||||
min_confidence=T_SUSPECT, max_queries=8, max_time=45, | |||||
min_confidence=T_SUSPECT, max_queries=8, max_time=30, | |||||
no_searches=not use_engine, no_links=not use_links, | no_searches=not use_engine, no_links=not use_links, | ||||
short_circuit=not query.noskip) | short_circuit=not query.noskip) | ||||
except exceptions.SearchQueryError as exc: | except exceptions.SearchQueryError as exc: | ||||
@@ -190,7 +196,7 @@ def _get_cached_results(page, conn, mode, noskip): | |||||
url, confidence, skipped, excluded = data.pop(0) | url, confidence, skipped, excluded = data.pop(0) | ||||
if skipped: # Should be impossible: data must be bad; run a new check | if skipped: # Should be impossible: data must be bad; run a new check | ||||
return None | return None | ||||
result = page.copyvio_compare(url, min_confidence=T_SUSPECT, max_time=30) | |||||
result = page.copyvio_compare(url, min_confidence=T_SUSPECT, max_time=10) | |||||
if abs(result.confidence - confidence) >= 0.0001: | if abs(result.confidence - confidence) >= 0.0001: | ||||
return None | return None | ||||
@@ -30,7 +30,7 @@ def _make_api_request(page_title, lang): | |||||
'lang': lang, | 'lang': lang, | ||||
'report': 1} | 'report': 1} | ||||
result = requests.get(TURNITIN_API_ENDPOINT, params=api_parameters) | |||||
result = requests.get(TURNITIN_API_ENDPOINT, params=api_parameters, verify=False) | |||||
# use literal_eval to *safely* parse the resulting dict-containing string | # use literal_eval to *safely* parse the resulting dict-containing string | ||||
try: | try: | ||||
parsed_api_result = literal_eval(result.text) | parsed_api_result = literal_eval(result.text) | ||||
@@ -1,18 +1,21 @@ | |||||
#!/bin/env python3 | |||||
import argparse | |||||
import re | import re | ||||
import sqlite3 | import sqlite3 | ||||
REGEX = re.compile( | REGEX = re.compile( | ||||
r'^' | r'^' | ||||
r'{address space usage: (?P<used_bytes>\d+) bytes/(?P<used_mb>\w+)} ' | |||||
r'{rss usage: (?P<rss_bytes>\d+) bytes/(?P<rss_mb>\w+)} ' | |||||
r'{address space usage: (?P<used_bytes>-?\d+) bytes/(?P<used_mb>\w+)} ' | |||||
r'{rss usage: (?P<rss_bytes>-?\d+) bytes/(?P<rss_mb>\w+)} ' | |||||
r'\[pid: (?P<pid>\d+)\|app: -\|req: -/-\] (?P<ip>[0-9.]+) \(-\) ' | r'\[pid: (?P<pid>\d+)\|app: -\|req: -/-\] (?P<ip>[0-9.]+) \(-\) ' | ||||
r'{(?P<vars>\d+) vars in (?P<var_bytes>\d+) bytes} ' | r'{(?P<vars>\d+) vars in (?P<var_bytes>\d+) bytes} ' | ||||
r'\[(?P<date>[0-9A-Za-z: ]+)\] (?P<method>\w+) (?P<url>.*?) => ' | r'\[(?P<date>[0-9A-Za-z: ]+)\] (?P<method>\w+) (?P<url>.*?) => ' | ||||
r'generated (?P<resp_bytes>\d+) bytes in (?P<msecs>\d+) msecs ' | r'generated (?P<resp_bytes>\d+) bytes in (?P<msecs>\d+) msecs ' | ||||
r'\((?P<proto>[A-Z0-9/.]+) (?P<status>\d+)\) ' | |||||
r'\((- http://hasty.ai)?(?P<proto>[A-Z0-9/.]+) (?P<status>\d+)\) ' | |||||
r'(?P<headers>\d+) headers in (?P<header_bytes>\d+) bytes ' | r'(?P<headers>\d+) headers in (?P<header_bytes>\d+) bytes ' | ||||
r'\((?P<switches>\d+) switches on core (?P<core>\d+)\) ' | r'\((?P<switches>\d+) switches on core (?P<core>\d+)\) ' | ||||
r'(?P<agent>.*?)' | r'(?P<agent>.*?)' | ||||
r'( (?P<referer>https?://[^ ]*?))?( -)?( http(://|%3A%2F%2F)hasty\.ai)?' | |||||
r'$' | r'$' | ||||
) | ) | ||||
@@ -20,17 +23,27 @@ def save_logs(logs): | |||||
columns = sorted(REGEX.groupindex, key=lambda col: REGEX.groupindex[col]) | columns = sorted(REGEX.groupindex, key=lambda col: REGEX.groupindex[col]) | ||||
conn = sqlite3.Connection('logs.db') | conn = sqlite3.Connection('logs.db') | ||||
cur = conn.cursor() | cur = conn.cursor() | ||||
cur.execute('CREATE TABLE logs(%s)' % ', '.join(columns)) | |||||
cur.execute('CREATE TABLE IF NOT EXISTS logs(%s)' % ', '.join(columns)) | |||||
cur.executemany('INSERT INTO logs VALUES (%s)' % ', '.join(['?'] * len(columns)), | cur.executemany('INSERT INTO logs VALUES (%s)' % ', '.join(['?'] * len(columns)), | ||||
[[log[col] for col in columns] for log in logs]) | [[log[col] for col in columns] for log in logs]) | ||||
conn.commit() | conn.commit() | ||||
conn.close() | conn.close() | ||||
def read_logs(path): | def read_logs(path): | ||||
with open(path) as fp: | |||||
with open(path, 'r', errors='replace') as fp: | |||||
lines = fp.readlines() | lines = fp.readlines() | ||||
return [REGEX.match(line.strip()).groupdict() for line in lines | |||||
if line.startswith('{address space usage')] | |||||
parsed = [(line, REGEX.match(line.strip())) for line in lines | |||||
if line.startswith('{address space usage')] | |||||
for line, match in parsed: | |||||
if not match: | |||||
print('failed to parse:', line.strip()) | |||||
return [match.groupdict() for _, match in parsed if match] | |||||
def main(): | |||||
parser = argparse.ArgumentParser() | |||||
parser.add_argument('logfile', default='uwsgi.log') | |||||
args = parser.parse_args() | |||||
save_logs(read_logs(args.logfile)) | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
save_logs(read_logs('uwsgi.log')) | |||||
main() |