A copyright violation detector running on Wikimedia Cloud Services https://tools.wmflabs.org/copyvios/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

61 lines
1.9 KiB

  1. #!/bin/env python3
  2. import argparse
  3. import re
  4. import sqlite3
  5. from typing import Any
  6. REGEX = re.compile(
  7. r"^"
  8. r"{address space usage: (?P<used_bytes>-?\d+) bytes/(?P<used_mb>\w+)} "
  9. r"{rss usage: (?P<rss_bytes>-?\d+) bytes/(?P<rss_mb>\w+)} "
  10. r"\[pid: (?P<pid>\d+)\|app: -\|req: -/-\] (?P<ip>[0-9.]+) \(-\) "
  11. r"{(?P<vars>\d+) vars in (?P<var_bytes>\d+) bytes} "
  12. r"\[(?P<date>[0-9A-Za-z: ]+)\] (?P<method>\w+) (?P<url>.*?) => "
  13. r"generated (?P<resp_bytes>\d+) bytes in (?P<msecs>\d+) msecs "
  14. r"\((- http://hasty.ai)?(?P<proto>[A-Z0-9/.]+) (?P<status>\d+)\) "
  15. r"(?P<headers>\d+) headers in (?P<header_bytes>\d+) bytes "
  16. r"\((?P<switches>\d+) switches on core (?P<core>\d+)\) "
  17. r"(?P<agent>.*?)"
  18. r"( (?P<referer>https?://[^ ]*?))?( -)?( http(://|%3A%2F%2F)hasty\.ai)?"
  19. r"$"
  20. )
  21. def save_logs(logs: list[dict[str, Any]]) -> None:
  22. columns = sorted(REGEX.groupindex, key=lambda col: REGEX.groupindex[col])
  23. conn = sqlite3.Connection("logs.db")
  24. cur = conn.cursor()
  25. cur.execute(f"CREATE TABLE IF NOT EXISTS logs({', '.join(columns)})")
  26. params = ", ".join(["?"] * len(columns))
  27. cur.executemany(
  28. f"INSERT INTO logs VALUES ({params})",
  29. [[log[col] for col in columns] for log in logs],
  30. )
  31. conn.commit()
  32. conn.close()
  33. def read_logs(path: str) -> list[dict[str, Any]]:
  34. with open(path, errors="replace") as fp:
  35. lines = fp.readlines()
  36. parsed = [
  37. (line, REGEX.match(line.strip()))
  38. for line in lines
  39. if line.startswith("{address space usage")
  40. ]
  41. for line, match in parsed:
  42. if not match:
  43. print("failed to parse:", line.strip())
  44. return [match.groupdict() for _, match in parsed if match]
  45. def main():
  46. parser = argparse.ArgumentParser()
  47. parser.add_argument("logfile", default="uwsgi.log")
  48. args = parser.parse_args()
  49. save_logs(read_logs(args.logfile))
  50. if __name__ == "__main__":
  51. main()