A copyright violation detector running on Wikimedia Cloud Services https://tools.wmflabs.org/copyvios/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

37 lines
1.3 KiB

  1. import re
  2. import sqlite3
  3. REGEX = re.compile(
  4. r'^'
  5. r'{address space usage: (?P<used_bytes>\d+) bytes/(?P<used_mb>\w+)} '
  6. r'{rss usage: (?P<rss_bytes>\d+) bytes/(?P<rss_mb>\w+)} '
  7. r'\[pid: (?P<pid>\d+)\|app: -\|req: -/-\] (?P<ip>[0-9.]+) \(-\) '
  8. r'{(?P<vars>\d+) vars in (?P<var_bytes>\d+) bytes} '
  9. r'\[(?P<date>[0-9A-Za-z: ]+)\] (?P<method>\w+) (?P<url>.*?) => '
  10. r'generated (?P<resp_bytes>\d+) bytes in (?P<msecs>\d+) msecs '
  11. r'\((?P<proto>[A-Z0-9/.]+) (?P<status>\d+)\) '
  12. r'(?P<headers>\d+) headers in (?P<header_bytes>\d+) bytes '
  13. r'\((?P<switches>\d+) switches on core (?P<core>\d+)\) '
  14. r'(?P<agent>.*?)'
  15. r'$'
  16. )
  17. def save_logs(logs):
  18. columns = sorted(REGEX.groupindex, key=lambda col: REGEX.groupindex[col])
  19. conn = sqlite3.Connection('logs.db')
  20. cur = conn.cursor()
  21. cur.execute('CREATE TABLE logs(%s)' % ', '.join(columns))
  22. cur.executemany('INSERT INTO logs VALUES (%s)' % ', '.join(['?'] * len(columns)),
  23. [[log[col] for col in columns] for log in logs])
  24. conn.commit()
  25. conn.close()
  26. def read_logs(path):
  27. with open(path) as fp:
  28. lines = fp.readlines()
  29. return [REGEX.match(line.strip()).groupdict() for line in lines
  30. if line.startswith('{address space usage')]
  31. if __name__ == '__main__':
  32. save_logs(read_logs('uwsgi.log'))