diff --git a/bitshift/crawler/crawl.py b/bitshift/crawler/crawl.py index d1922c9..c121866 100644 --- a/bitshift/crawler/crawl.py +++ b/bitshift/crawler/crawl.py @@ -45,7 +45,7 @@ def _configure_logging(): logging.getLogger("urllib3").setLevel(logging.WARNING) formatter = logging.Formatter( - fmt=("%(asctime)s %(levelname)s %(name)s %(funcName)s" + fmt=("%(asctime)s %(levelname)s %(name)s:%(funcName)s" " %(message)s"), datefmt="%y-%m-%d %H:%M:%S") handler = logging.handlers.TimedRotatingFileHandler( diff --git a/bitshift/parser/python.py b/bitshift/parser/python.py index d0cd7d3..713cae9 100644 --- a/bitshift/parser/python.py +++ b/bitshift/parser/python.py @@ -1,4 +1,7 @@ import ast +import re + +encoding_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE) class _CachedWalker(ast.NodeVisitor): """ @@ -154,7 +157,25 @@ def parse_py(codelet): :type code: Codelet """ - tree = ast.parse(codelet.code) + def strip_encoding(lines): + """Strips the encoding line from a file, which breaks the parser.""" + try: + first = next(lines) + if not encoding_re.match(first): + yield first + second = next(lines) + if not encoding_re.match(second): + yield second + except StopIteration: + return + for line in lines: + yield line + + try: + tree = ast.parse("\n".join(strip_encoding(codelet.code.splitlines()))) + except SyntaxError: + ## TODO: add some logging here? + return cutter = _CachedWalker() cutter.visit(tree) codelet.symbols = cutter.accum