@@ -53,7 +53,7 @@ def _build_url(screen, filename, url, imgwidth, imgheight): | |||
if width >= imgwidth: | |||
return url | |||
url = url.replace("/commons/", "/commons/thumb/") | |||
return "%s/%dpx-%s" % (url, width, urllib.quote(filename)) | |||
return "%s/%dpx-%s" % (url, width, urllib.quote(filename.encode("utf8"))) | |||
_BACKGROUNDS = { | |||
"potd": _get_fresh_potd, | |||
@@ -78,8 +78,9 @@ def set_background(selected): | |||
screen_cache = g.cookies["CopyviosScreenCache"].value | |||
try: | |||
screen = loads(screen_cache) | |||
int(screen["width"]) | |||
int(screen["height"]) | |||
screen = {"width": int(screen["width"]), "height": int(screen["height"])} | |||
if screen["width"] <= 0 or screen["height"] <= 0: | |||
raise ValueError() | |||
except (ValueError, KeyError): | |||
screen = {"width": 1024, "height": 768} | |||
else: | |||
@@ -94,10 +94,16 @@ def _get_results(query, follow=True): | |||
elif scheme not in ["http", "https"]: | |||
query.error = "bad URI" | |||
return | |||
degree = 5 | |||
if query.degree: | |||
try: | |||
degree = int(query.degree) | |||
except ValueError: | |||
pass | |||
result = page.copyvio_compare(query.url, min_confidence=T_SUSPECT, | |||
max_time=30) | |||
max_time=10, degree=degree) | |||
if result.best.chains[0] is EMPTY: | |||
query.error = "timeout" if result.time > 30 else "no data" | |||
query.error = "timeout" if result.time > 10 else "no data" | |||
return | |||
query.result = result | |||
query.result.cached = False | |||
@@ -140,7 +146,7 @@ def _perform_check(query, page, use_engine, use_links): | |||
if not query.result: | |||
try: | |||
query.result = page.copyvio_check( | |||
min_confidence=T_SUSPECT, max_queries=8, max_time=45, | |||
min_confidence=T_SUSPECT, max_queries=8, max_time=30, | |||
no_searches=not use_engine, no_links=not use_links, | |||
short_circuit=not query.noskip) | |||
except exceptions.SearchQueryError as exc: | |||
@@ -190,7 +196,7 @@ def _get_cached_results(page, conn, mode, noskip): | |||
url, confidence, skipped, excluded = data.pop(0) | |||
if skipped: # Should be impossible: data must be bad; run a new check | |||
return None | |||
result = page.copyvio_compare(url, min_confidence=T_SUSPECT, max_time=30) | |||
result = page.copyvio_compare(url, min_confidence=T_SUSPECT, max_time=10) | |||
if abs(result.confidence - confidence) >= 0.0001: | |||
return None | |||
@@ -30,7 +30,7 @@ def _make_api_request(page_title, lang): | |||
'lang': lang, | |||
'report': 1} | |||
result = requests.get(TURNITIN_API_ENDPOINT, params=api_parameters) | |||
result = requests.get(TURNITIN_API_ENDPOINT, params=api_parameters, verify=False) | |||
# use literal_eval to *safely* parse the resulting dict-containing string | |||
try: | |||
parsed_api_result = literal_eval(result.text) | |||
@@ -1,18 +1,21 @@ | |||
#!/bin/env python3 | |||
import argparse | |||
import re | |||
import sqlite3 | |||
REGEX = re.compile( | |||
r'^' | |||
r'{address space usage: (?P<used_bytes>\d+) bytes/(?P<used_mb>\w+)} ' | |||
r'{rss usage: (?P<rss_bytes>\d+) bytes/(?P<rss_mb>\w+)} ' | |||
r'{address space usage: (?P<used_bytes>-?\d+) bytes/(?P<used_mb>\w+)} ' | |||
r'{rss usage: (?P<rss_bytes>-?\d+) bytes/(?P<rss_mb>\w+)} ' | |||
r'\[pid: (?P<pid>\d+)\|app: -\|req: -/-\] (?P<ip>[0-9.]+) \(-\) ' | |||
r'{(?P<vars>\d+) vars in (?P<var_bytes>\d+) bytes} ' | |||
r'\[(?P<date>[0-9A-Za-z: ]+)\] (?P<method>\w+) (?P<url>.*?) => ' | |||
r'generated (?P<resp_bytes>\d+) bytes in (?P<msecs>\d+) msecs ' | |||
r'\((?P<proto>[A-Z0-9/.]+) (?P<status>\d+)\) ' | |||
r'\((- http://hasty.ai)?(?P<proto>[A-Z0-9/.]+) (?P<status>\d+)\) ' | |||
r'(?P<headers>\d+) headers in (?P<header_bytes>\d+) bytes ' | |||
r'\((?P<switches>\d+) switches on core (?P<core>\d+)\) ' | |||
r'(?P<agent>.*?)' | |||
r'( (?P<referer>https?://[^ ]*?))?( -)?( http(://|%3A%2F%2F)hasty\.ai)?' | |||
r'$' | |||
) | |||
@@ -20,17 +23,27 @@ def save_logs(logs): | |||
columns = sorted(REGEX.groupindex, key=lambda col: REGEX.groupindex[col]) | |||
conn = sqlite3.Connection('logs.db') | |||
cur = conn.cursor() | |||
cur.execute('CREATE TABLE logs(%s)' % ', '.join(columns)) | |||
cur.execute('CREATE TABLE IF NOT EXISTS logs(%s)' % ', '.join(columns)) | |||
cur.executemany('INSERT INTO logs VALUES (%s)' % ', '.join(['?'] * len(columns)), | |||
[[log[col] for col in columns] for log in logs]) | |||
conn.commit() | |||
conn.close() | |||
def read_logs(path): | |||
with open(path) as fp: | |||
with open(path, 'r', errors='replace') as fp: | |||
lines = fp.readlines() | |||
return [REGEX.match(line.strip()).groupdict() for line in lines | |||
if line.startswith('{address space usage')] | |||
parsed = [(line, REGEX.match(line.strip())) for line in lines | |||
if line.startswith('{address space usage')] | |||
for line, match in parsed: | |||
if not match: | |||
print('failed to parse:', line.strip()) | |||
return [match.groupdict() for _, match in parsed if match] | |||
def main(): | |||
parser = argparse.ArgumentParser() | |||
parser.add_argument('logfile', default='uwsgi.log') | |||
args = parser.parse_args() | |||
save_logs(read_logs(args.logfile)) | |||
if __name__ == '__main__': | |||
save_logs(read_logs('uwsgi.log')) | |||
main() |