Browse Source

Random bugfixes

multi-sources
Ben Kurtovic 4 months ago
committed by Ben Kurtovic
parent
commit
19c873f1c8
4 changed files with 36 additions and 16 deletions
  1. +4
    -3
      copyvios/background.py
  2. +10
    -4
      copyvios/checker.py
  3. +1
    -1
      copyvios/turnitin.py
  4. +21
    -8
      scripts/log_analyzer.py

+ 4
- 3
copyvios/background.py View File

@@ -53,7 +53,7 @@ def _build_url(screen, filename, url, imgwidth, imgheight):
if width >= imgwidth:
return url
url = url.replace("/commons/", "/commons/thumb/")
return "%s/%dpx-%s" % (url, width, urllib.quote(filename))
return "%s/%dpx-%s" % (url, width, urllib.quote(filename.encode("utf8")))

_BACKGROUNDS = {
"potd": _get_fresh_potd,
@@ -78,8 +78,9 @@ def set_background(selected):
screen_cache = g.cookies["CopyviosScreenCache"].value
try:
screen = loads(screen_cache)
int(screen["width"])
int(screen["height"])
screen = {"width": int(screen["width"]), "height": int(screen["height"])}
if screen["width"] <= 0 or screen["height"] <= 0:
raise ValueError()
except (ValueError, KeyError):
screen = {"width": 1024, "height": 768}
else:


+ 10
- 4
copyvios/checker.py View File

@@ -94,10 +94,16 @@ def _get_results(query, follow=True):
elif scheme not in ["http", "https"]:
query.error = "bad URI"
return
degree = 5
if query.degree:
try:
degree = int(query.degree)
except ValueError:
pass
result = page.copyvio_compare(query.url, min_confidence=T_SUSPECT,
max_time=30)
max_time=10, degree=degree)
if result.best.chains[0] is EMPTY:
query.error = "timeout" if result.time > 30 else "no data"
query.error = "timeout" if result.time > 10 else "no data"
return
query.result = result
query.result.cached = False
@@ -140,7 +146,7 @@ def _perform_check(query, page, use_engine, use_links):
if not query.result:
try:
query.result = page.copyvio_check(
min_confidence=T_SUSPECT, max_queries=8, max_time=45,
min_confidence=T_SUSPECT, max_queries=8, max_time=30,
no_searches=not use_engine, no_links=not use_links,
short_circuit=not query.noskip)
except exceptions.SearchQueryError as exc:
@@ -190,7 +196,7 @@ def _get_cached_results(page, conn, mode, noskip):
url, confidence, skipped, excluded = data.pop(0)
if skipped: # Should be impossible: data must be bad; run a new check
return None
result = page.copyvio_compare(url, min_confidence=T_SUSPECT, max_time=30)
result = page.copyvio_compare(url, min_confidence=T_SUSPECT, max_time=10)
if abs(result.confidence - confidence) >= 0.0001:
return None



+ 1
- 1
copyvios/turnitin.py View File

@@ -30,7 +30,7 @@ def _make_api_request(page_title, lang):
'lang': lang,
'report': 1}

result = requests.get(TURNITIN_API_ENDPOINT, params=api_parameters)
result = requests.get(TURNITIN_API_ENDPOINT, params=api_parameters, verify=False)
# use literal_eval to *safely* parse the resulting dict-containing string
try:
parsed_api_result = literal_eval(result.text)


+ 21
- 8
scripts/log_analyzer.py View File

@@ -1,18 +1,21 @@
#!/bin/env python3
import argparse
import re
import sqlite3

REGEX = re.compile(
r'^'
r'{address space usage: (?P<used_bytes>\d+) bytes/(?P<used_mb>\w+)} '
r'{rss usage: (?P<rss_bytes>\d+) bytes/(?P<rss_mb>\w+)} '
r'{address space usage: (?P<used_bytes>-?\d+) bytes/(?P<used_mb>\w+)} '
r'{rss usage: (?P<rss_bytes>-?\d+) bytes/(?P<rss_mb>\w+)} '
r'\[pid: (?P<pid>\d+)\|app: -\|req: -/-\] (?P<ip>[0-9.]+) \(-\) '
r'{(?P<vars>\d+) vars in (?P<var_bytes>\d+) bytes} '
r'\[(?P<date>[0-9A-Za-z: ]+)\] (?P<method>\w+) (?P<url>.*?) => '
r'generated (?P<resp_bytes>\d+) bytes in (?P<msecs>\d+) msecs '
r'\((?P<proto>[A-Z0-9/.]+) (?P<status>\d+)\) '
r'\((- http://hasty.ai)?(?P<proto>[A-Z0-9/.]+) (?P<status>\d+)\) '
r'(?P<headers>\d+) headers in (?P<header_bytes>\d+) bytes '
r'\((?P<switches>\d+) switches on core (?P<core>\d+)\) '
r'(?P<agent>.*?)'
r'( (?P<referer>https?://[^ ]*?))?( -)?( http(://|%3A%2F%2F)hasty\.ai)?'
r'$'
)

@@ -20,17 +23,27 @@ def save_logs(logs):
columns = sorted(REGEX.groupindex, key=lambda col: REGEX.groupindex[col])
conn = sqlite3.Connection('logs.db')
cur = conn.cursor()
cur.execute('CREATE TABLE logs(%s)' % ', '.join(columns))
cur.execute('CREATE TABLE IF NOT EXISTS logs(%s)' % ', '.join(columns))
cur.executemany('INSERT INTO logs VALUES (%s)' % ', '.join(['?'] * len(columns)),
[[log[col] for col in columns] for log in logs])
conn.commit()
conn.close()

def read_logs(path):
with open(path) as fp:
with open(path, 'r', errors='replace') as fp:
lines = fp.readlines()
return [REGEX.match(line.strip()).groupdict() for line in lines
if line.startswith('{address space usage')]
parsed = [(line, REGEX.match(line.strip())) for line in lines
if line.startswith('{address space usage')]
for line, match in parsed:
if not match:
print('failed to parse:', line.strip())
return [match.groupdict() for _, match in parsed if match]

def main():
parser = argparse.ArgumentParser()
parser.add_argument('logfile', default='uwsgi.log')
args = parser.parse_args()
save_logs(read_logs(args.logfile))

if __name__ == '__main__':
save_logs(read_logs('uwsgi.log'))
main()

Loading…
Cancel
Save