A copyright violation detector running on Wikimedia Cloud Services https://tools.wmflabs.org/copyvios/
25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

log_analyzer.py 1.8 KiB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. #!/bin/env python3
  2. import argparse
  3. import re
  4. import sqlite3
  5. REGEX = re.compile(
  6. r'^'
  7. r'{address space usage: (?P<used_bytes>-?\d+) bytes/(?P<used_mb>\w+)} '
  8. r'{rss usage: (?P<rss_bytes>-?\d+) bytes/(?P<rss_mb>\w+)} '
  9. r'\[pid: (?P<pid>\d+)\|app: -\|req: -/-\] (?P<ip>[0-9.]+) \(-\) '
  10. r'{(?P<vars>\d+) vars in (?P<var_bytes>\d+) bytes} '
  11. r'\[(?P<date>[0-9A-Za-z: ]+)\] (?P<method>\w+) (?P<url>.*?) => '
  12. r'generated (?P<resp_bytes>\d+) bytes in (?P<msecs>\d+) msecs '
  13. r'\((- http://hasty.ai)?(?P<proto>[A-Z0-9/.]+) (?P<status>\d+)\) '
  14. r'(?P<headers>\d+) headers in (?P<header_bytes>\d+) bytes '
  15. r'\((?P<switches>\d+) switches on core (?P<core>\d+)\) '
  16. r'(?P<agent>.*?)'
  17. r'( (?P<referer>https?://[^ ]*?))?( -)?( http(://|%3A%2F%2F)hasty\.ai)?'
  18. r'$'
  19. )
  20. def save_logs(logs):
  21. columns = sorted(REGEX.groupindex, key=lambda col: REGEX.groupindex[col])
  22. conn = sqlite3.Connection('logs.db')
  23. cur = conn.cursor()
  24. cur.execute('CREATE TABLE IF NOT EXISTS logs(%s)' % ', '.join(columns))
  25. cur.executemany('INSERT INTO logs VALUES (%s)' % ', '.join(['?'] * len(columns)),
  26. [[log[col] for col in columns] for log in logs])
  27. conn.commit()
  28. conn.close()
  29. def read_logs(path):
  30. with open(path, 'r', errors='replace') as fp:
  31. lines = fp.readlines()
  32. parsed = [(line, REGEX.match(line.strip())) for line in lines
  33. if line.startswith('{address space usage')]
  34. for line, match in parsed:
  35. if not match:
  36. print('failed to parse:', line.strip())
  37. return [match.groupdict() for _, match in parsed if match]
  38. def main():
  39. parser = argparse.ArgumentParser()
  40. parser.add_argument('logfile', default='uwsgi.log')
  41. args = parser.parse_args()
  42. save_logs(read_logs(args.logfile))
  43. if __name__ == '__main__':
  44. main()