From 19c873f1c8937e0682093929ef0d949adf96cea5 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 20 Aug 2024 01:17:39 +0000 Subject: [PATCH] Random bugfixes --- copyvios/background.py | 7 ++++--- copyvios/checker.py | 14 ++++++++++---- copyvios/turnitin.py | 2 +- scripts/log_analyzer.py | 29 +++++++++++++++++++++-------- 4 files changed, 36 insertions(+), 16 deletions(-) mode change 100644 => 100755 scripts/log_analyzer.py diff --git a/copyvios/background.py b/copyvios/background.py index fd7d4ee..ef8d8e2 100644 --- a/copyvios/background.py +++ b/copyvios/background.py @@ -53,7 +53,7 @@ def _build_url(screen, filename, url, imgwidth, imgheight): if width >= imgwidth: return url url = url.replace("/commons/", "/commons/thumb/") - return "%s/%dpx-%s" % (url, width, urllib.quote(filename)) + return "%s/%dpx-%s" % (url, width, urllib.quote(filename.encode("utf8"))) _BACKGROUNDS = { "potd": _get_fresh_potd, @@ -78,8 +78,9 @@ def set_background(selected): screen_cache = g.cookies["CopyviosScreenCache"].value try: screen = loads(screen_cache) - int(screen["width"]) - int(screen["height"]) + screen = {"width": int(screen["width"]), "height": int(screen["height"])} + if screen["width"] <= 0 or screen["height"] <= 0: + raise ValueError() except (ValueError, KeyError): screen = {"width": 1024, "height": 768} else: diff --git a/copyvios/checker.py b/copyvios/checker.py index 22b1a02..c892db3 100644 --- a/copyvios/checker.py +++ b/copyvios/checker.py @@ -94,10 +94,16 @@ def _get_results(query, follow=True): elif scheme not in ["http", "https"]: query.error = "bad URI" return + degree = 5 + if query.degree: + try: + degree = int(query.degree) + except ValueError: + pass result = page.copyvio_compare(query.url, min_confidence=T_SUSPECT, - max_time=30) + max_time=10, degree=degree) if result.best.chains[0] is EMPTY: - query.error = "timeout" if result.time > 30 else "no data" + query.error = "timeout" if result.time > 10 else "no data" return query.result = result query.result.cached = False @@ -140,7 +146,7 @@ def _perform_check(query, page, use_engine, use_links): if not query.result: try: query.result = page.copyvio_check( - min_confidence=T_SUSPECT, max_queries=8, max_time=45, + min_confidence=T_SUSPECT, max_queries=8, max_time=30, no_searches=not use_engine, no_links=not use_links, short_circuit=not query.noskip) except exceptions.SearchQueryError as exc: @@ -190,7 +196,7 @@ def _get_cached_results(page, conn, mode, noskip): url, confidence, skipped, excluded = data.pop(0) if skipped: # Should be impossible: data must be bad; run a new check return None - result = page.copyvio_compare(url, min_confidence=T_SUSPECT, max_time=30) + result = page.copyvio_compare(url, min_confidence=T_SUSPECT, max_time=10) if abs(result.confidence - confidence) >= 0.0001: return None diff --git a/copyvios/turnitin.py b/copyvios/turnitin.py index a6177e9..15d7ded 100644 --- a/copyvios/turnitin.py +++ b/copyvios/turnitin.py @@ -30,7 +30,7 @@ def _make_api_request(page_title, lang): 'lang': lang, 'report': 1} - result = requests.get(TURNITIN_API_ENDPOINT, params=api_parameters) + result = requests.get(TURNITIN_API_ENDPOINT, params=api_parameters, verify=False) # use literal_eval to *safely* parse the resulting dict-containing string try: parsed_api_result = literal_eval(result.text) diff --git a/scripts/log_analyzer.py b/scripts/log_analyzer.py old mode 100644 new mode 100755 index 390868c..2c77071 --- a/scripts/log_analyzer.py +++ b/scripts/log_analyzer.py @@ -1,18 +1,21 @@ +#!/bin/env python3 +import argparse import re import sqlite3 REGEX = re.compile( r'^' - r'{address space usage: (?P\d+) bytes/(?P\w+)} ' - r'{rss usage: (?P\d+) bytes/(?P\w+)} ' + r'{address space usage: (?P-?\d+) bytes/(?P\w+)} ' + r'{rss usage: (?P-?\d+) bytes/(?P\w+)} ' r'\[pid: (?P\d+)\|app: -\|req: -/-\] (?P[0-9.]+) \(-\) ' r'{(?P\d+) vars in (?P\d+) bytes} ' r'\[(?P[0-9A-Za-z: ]+)\] (?P\w+) (?P.*?) => ' r'generated (?P\d+) bytes in (?P\d+) msecs ' - r'\((?P[A-Z0-9/.]+) (?P\d+)\) ' + r'\((- http://hasty.ai)?(?P[A-Z0-9/.]+) (?P\d+)\) ' r'(?P\d+) headers in (?P\d+) bytes ' r'\((?P\d+) switches on core (?P\d+)\) ' r'(?P.*?)' + r'( (?Phttps?://[^ ]*?))?( -)?( http(://|%3A%2F%2F)hasty\.ai)?' r'$' ) @@ -20,17 +23,27 @@ def save_logs(logs): columns = sorted(REGEX.groupindex, key=lambda col: REGEX.groupindex[col]) conn = sqlite3.Connection('logs.db') cur = conn.cursor() - cur.execute('CREATE TABLE logs(%s)' % ', '.join(columns)) + cur.execute('CREATE TABLE IF NOT EXISTS logs(%s)' % ', '.join(columns)) cur.executemany('INSERT INTO logs VALUES (%s)' % ', '.join(['?'] * len(columns)), [[log[col] for col in columns] for log in logs]) conn.commit() conn.close() def read_logs(path): - with open(path) as fp: + with open(path, 'r', errors='replace') as fp: lines = fp.readlines() - return [REGEX.match(line.strip()).groupdict() for line in lines - if line.startswith('{address space usage')] + parsed = [(line, REGEX.match(line.strip())) for line in lines + if line.startswith('{address space usage')] + for line, match in parsed: + if not match: + print('failed to parse:', line.strip()) + return [match.groupdict() for _, match in parsed if match] + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('logfile', default='uwsgi.log') + args = parser.parse_args() + save_logs(read_logs(args.logfile)) if __name__ == '__main__': - save_logs(read_logs('uwsgi.log')) + main()