Random bugfixes

4 months ago · 19c873f1c8
--- a/copyvios/background.py
+++ b/copyvios/background.py
@@ -53,7 +53,7 @@ def _build_url(screen, filename, url, imgwidth, imgheight):
    if width >= imgwidth:
        return url
    url = url.replace("/commons/", "/commons/thumb/")
    return "%s/%dpx-%s" % (url, width, urllib.quote(filename))
    return "%s/%dpx-%s" % (url, width, urllib.quote(filename.encode("utf8")))

 _BACKGROUNDS = {
    "potd": _get_fresh_potd,
@@ -78,8 +78,9 @@ def set_background(selected):
        screen_cache = g.cookies["CopyviosScreenCache"].value
        try:
            screen = loads(screen_cache)
            int(screen["width"])
            int(screen["height"])
            screen = {"width": int(screen["width"]), "height": int(screen["height"])}
            if screen["width"] <= 0 or screen["height"] <= 0:
                raise ValueError()
        except (ValueError, KeyError):
            screen = {"width": 1024, "height": 768}
    else:
--- a/copyvios/checker.py
+++ b/copyvios/checker.py
@@ -94,10 +94,16 @@ def _get_results(query, follow=True):
        elif scheme not in ["http", "https"]:
            query.error = "bad URI"
            return
        degree = 5
        if query.degree:
            try:
                degree = int(query.degree)
            except ValueError:
                pass
        result = page.copyvio_compare(query.url, min_confidence=T_SUSPECT,
                                      max_time=30)
                                      max_time=10, degree=degree)
        if result.best.chains[0] is EMPTY:
            query.error = "timeout" if result.time > 30 else "no data"
            query.error = "timeout" if result.time > 10 else "no data"
            return
        query.result = result
        query.result.cached = False
@@ -140,7 +146,7 @@ def _perform_check(query, page, use_engine, use_links):
    if not query.result:
        try:
            query.result = page.copyvio_check(
                min_confidence=T_SUSPECT, max_queries=8, max_time=45,
                min_confidence=T_SUSPECT, max_queries=8, max_time=30,
                no_searches=not use_engine, no_links=not use_links,
                short_circuit=not query.noskip)
        except exceptions.SearchQueryError as exc:
@@ -190,7 +196,7 @@ def _get_cached_results(page, conn, mode, noskip):
    url, confidence, skipped, excluded = data.pop(0)
    if skipped:  # Should be impossible: data must be bad; run a new check
        return None
    result = page.copyvio_compare(url, min_confidence=T_SUSPECT, max_time=30)
    result = page.copyvio_compare(url, min_confidence=T_SUSPECT, max_time=10)
    if abs(result.confidence - confidence) >= 0.0001:
        return None

--- a/copyvios/turnitin.py
+++ b/copyvios/turnitin.py
@@ -30,7 +30,7 @@ def _make_api_request(page_title, lang):
                      'lang': lang,
                      'report': 1}

    result = requests.get(TURNITIN_API_ENDPOINT, params=api_parameters)
    result = requests.get(TURNITIN_API_ENDPOINT, params=api_parameters, verify=False)
    # use literal_eval to *safely* parse the resulting dict-containing string
    try:
        parsed_api_result = literal_eval(result.text)
--- a/scripts/log_analyzer.py
+++ b/scripts/log_analyzer.py
@@ -1,18 +1,21 @@
 #!/bin/env python3
 import argparse
 import re
 import sqlite3

 REGEX = re.compile(
    r'^'
    r'{address space usage: (?P<used_bytes>\d+) bytes/(?P<used_mb>\w+)} '
    r'{rss usage: (?P<rss_bytes>\d+) bytes/(?P<rss_mb>\w+)} '
    r'{address space usage: (?P<used_bytes>-?\d+) bytes/(?P<used_mb>\w+)} '
    r'{rss usage: (?P<rss_bytes>-?\d+) bytes/(?P<rss_mb>\w+)} '
    r'\[pid: (?P<pid>\d+)\|app: -\|req: -/-\] (?P<ip>[0-9.]+) \(-\) '
    r'{(?P<vars>\d+) vars in (?P<var_bytes>\d+) bytes} '
    r'\[(?P<date>[0-9A-Za-z: ]+)\] (?P<method>\w+) (?P<url>.*?) => '
    r'generated (?P<resp_bytes>\d+) bytes in (?P<msecs>\d+) msecs '
    r'\((?P<proto>[A-Z0-9/.]+) (?P<status>\d+)\) '
    r'\((- http://hasty.ai)?(?P<proto>[A-Z0-9/.]+) (?P<status>\d+)\) '
    r'(?P<headers>\d+) headers in (?P<header_bytes>\d+) bytes '
    r'\((?P<switches>\d+) switches on core (?P<core>\d+)\) '
    r'(?P<agent>.*?)'
    r'( (?P<referer>https?://[^ ]*?))?( -)?( http(://|%3A%2F%2F)hasty\.ai)?'
    r'$'
 )

@@ -20,17 +23,27 @@ def save_logs(logs):
    columns = sorted(REGEX.groupindex, key=lambda col: REGEX.groupindex[col])
    conn = sqlite3.Connection('logs.db')
    cur = conn.cursor()
    cur.execute('CREATE TABLE logs(%s)' % ', '.join(columns))
    cur.execute('CREATE TABLE IF NOT EXISTS logs(%s)' % ', '.join(columns))
    cur.executemany('INSERT INTO logs VALUES (%s)' % ', '.join(['?'] * len(columns)),
                    [[log[col] for col in columns] for log in logs])
    conn.commit()
    conn.close()

 def read_logs(path):
    with open(path) as fp:
    with open(path, 'r', errors='replace') as fp:
        lines = fp.readlines()
    return [REGEX.match(line.strip()).groupdict() for line in lines
            if line.startswith('{address space usage')]
    parsed = [(line, REGEX.match(line.strip())) for line in lines
              if line.startswith('{address space usage')]
    for line, match in parsed:
        if not match:
            print('failed to parse:', line.strip())
    return [match.groupdict() for _, match in parsed if match]

 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('logfile', default='uwsgi.log')
    args = parser.parse_args()
    save_logs(read_logs(args.logfile))

 if __name__ == '__main__':
    save_logs(read_logs('uwsgi.log'))
    main()