diff --git a/scripts/log_analyzer.py b/scripts/log_analyzer.py new file mode 100644 index 0000000..390868c --- /dev/null +++ b/scripts/log_analyzer.py @@ -0,0 +1,36 @@ +import re +import sqlite3 + +REGEX = re.compile( + r'^' + r'{address space usage: (?P\d+) bytes/(?P\w+)} ' + r'{rss usage: (?P\d+) bytes/(?P\w+)} ' + r'\[pid: (?P\d+)\|app: -\|req: -/-\] (?P[0-9.]+) \(-\) ' + r'{(?P\d+) vars in (?P\d+) bytes} ' + r'\[(?P[0-9A-Za-z: ]+)\] (?P\w+) (?P.*?) => ' + r'generated (?P\d+) bytes in (?P\d+) msecs ' + r'\((?P[A-Z0-9/.]+) (?P\d+)\) ' + r'(?P\d+) headers in (?P\d+) bytes ' + r'\((?P\d+) switches on core (?P\d+)\) ' + r'(?P.*?)' + r'$' +) + +def save_logs(logs): + columns = sorted(REGEX.groupindex, key=lambda col: REGEX.groupindex[col]) + conn = sqlite3.Connection('logs.db') + cur = conn.cursor() + cur.execute('CREATE TABLE logs(%s)' % ', '.join(columns)) + cur.executemany('INSERT INTO logs VALUES (%s)' % ', '.join(['?'] * len(columns)), + [[log[col] for col in columns] for log in logs]) + conn.commit() + conn.close() + +def read_logs(path): + with open(path) as fp: + lines = fp.readlines() + return [REGEX.match(line.strip()).groupdict() for line in lines + if line.startswith('{address space usage')] + +if __name__ == '__main__': + save_logs(read_logs('uwsgi.log')) diff --git a/templates/api.mako b/templates/api.mako index 18913b2..befaf2f 100644 --- a/templates/api.mako +++ b/templates/api.mako @@ -40,7 +40,7 @@ % if help:

Copyvio Detector API

-

This is the first version of the API for Earwig's Copyvio Detector. It works, but some bugs might still need to be ironed out, so please report any if you see them.

+

This is the first version of the API for Earwig's Copyvio Detector. Please report any issues you encounter.

Requests

The API responds to GET requests made to https://copyvios.toolforge.org/api.json. Parameters are described in the tables below:

@@ -252,7 +252,8 @@ ] }

Etiquette

- The tool uses the same workers to handle all requests, so making concurrent API calls is only going to slow you down. Most operations are not rate-limited, but full searches with use_engine=True are globally limited to around a thousand per day. Be respectful! +

The tool uses the same workers to handle all requests, so making concurrent API calls is only going to slow you down. Most operations are not rate-limited, but full searches with use_engine=True are globally limited to around a thousand per day. Be respectful!

+

Aside from testing, you must set a reasonable user agent that identifies your bot and and gives some way to contact you. You may be blocked if using an improper user agent (for example, the default user agent set by your HTTP library), or if your bot makes requests too frequently.

Example

https://copyvios.toolforge.org/api.json?version=1&action=search&project=wikipedia&lang=en&title=User:EarwigBot/Copyvios/Tests/2

{