diff --git a/func_smash.py b/func_smash.py index d085766..a42cd57 100644 --- a/func_smash.py +++ b/func_smash.py @@ -1,47 +1,15 @@ +from code import interact import imp import opcode import os import random import re import sys -import traceback import types MARKOV_START = -1 MARKOV_END = -2 -def run(): - try: - path, name = os.path.split(sys.argv[1]) - name = re.sub("\.pyc?$", "", name) - except IndexError: - raise RuntimeError("Needs a filename as a command-line argument") - f, path, desc = imp.find_module(name, [path]) - try: - module = imp.load_module(name, f, path, desc) - finally: - f.close() - - corpus = module.corpus - chain = make_chain(corpus) - func = make_function(chain, "func") - print "Using {0}-function corpus.".format(len(corpus)) - print "Smashed function disassembly:" - print_function(func) - print - n = 12.0 - print "func(%s) =" % n, func(n) - - if len(sys.argv) > 2 and sys.argv[2] == "-i": # Allow input after the fact - print - while 1: - try: - input(">>> ") - except EOFError: - break - except Exception: - traceback.print_exc() - def make_chain(funcs): chain = {} for func in funcs: @@ -50,87 +18,96 @@ def make_chain(funcs): def make_function(chain, name, argcount=1): codes, constants, varnames = _make_codes(chain) - nlocals = len(varnames) - stacksize = 1024 # High limit? - flags = 0 # Denotes funcs with *args and/or **kwargs; nothing for now - codestring = "".join([chr(code) for code in codes]) names = () - filename = "" - firstlineno = 1 + codestring = "".join([chr(code) for code in codes]) lnotab = "" - code = types.CodeType(argcount, nlocals, stacksize, flags, codestring, - constants, names, varnames, filename, name, - firstlineno, lnotab) + code = types.CodeType(argcount, len(varnames), 1024, 0, codestring, + constants, names, varnames, "", name, 1, + lnotab) func = types.FunctionType(code, globals(), name) return func def print_chain(chain): print "{" - for key in sorted(chain.keys()): - op = _int_to_opname(key) - targets = {} - for op2 in chain[key]: - target = _int_to_opname(op2[0]) - if op2[0] >= opcode.HAVE_ARGUMENT: - target = "{0}({1})".format(target, op2[1]) + for code in sorted(chain.keys()): + name = _opcode_to_opname(code) + target_counts = {} + for tcode in chain[code]: + target = _opcode_to_opname(tcode[0]) + if tcode[0] >= opcode.HAVE_ARGUMENT: + target = "{0}({1})".format(target, tcode[1]) try: - targets[target] += 1 + target_counts[target] += 1 except KeyError: - targets[target] = 1 - targs = [] - for optarget, count in targets.iteritems(): + target_counts[target] = 1 + targets = [] + for target, count in target_counts.iteritems(): if count == 1: - targs.append(optarget) + targets.append(target) else: - targs.append("{0}x {1}".format(count, optarget)) - targs.sort() - print op.rjust(20), "=> [{0}]".format(", ".join(targs)) + targets.append("{0}x {1}".format(count, target)) + targets.sort() + print name.rjust(20), "=> [{0}]".format(", ".join(targets)) print "}" def print_function(func): - co = func.__code__ - code = co.co_code - n = len(code) + codeobj = func.__code__ + codestring = codeobj.co_code + length = len(codestring) i = 0 - while i < n: - op = ord(code[i]) + while i < length: + code = ord(codestring[i]) i += 1 - print opcode.opname[op].rjust(20), - if op >= opcode.HAVE_ARGUMENT: - arg = _get_argument(co, code, i, op) + print opcode.opname[code].rjust(20), + if code >= opcode.HAVE_ARGUMENT: + arg = _get_argument(codeobj, codestring, i, code) i += 2 print " ({0})".format(arg) else: print +def run(): + try: + path, name = os.path.split(sys.argv[1]) + name = re.sub("\.pyc?$", "", name) + except IndexError: + raise RuntimeError("Needs a filename as a command-line argument") + file_obj, path, desc = imp.find_module(name, [path]) + try: + module = imp.load_module(name, file_obj, path, desc) + finally: + file_obj.close() + + _demo(module.corpus) + def _parse_func(func, chain): - co = func.__code__ - code = co.co_code - n = len(code) + codeobj = func.__code__ + codestring = codeobj.co_code + length = len(codestring) i = 0 - lastop = MARKOV_START - while i < n: - op = ord(code[i]) + prevcode = MARKOV_START + while i < length: + code = ord(codestring[i]) i += 1 - if op >= opcode.HAVE_ARGUMENT: - arg = _get_argument(co, code, i, op) + if code >= opcode.HAVE_ARGUMENT: + arg = _get_argument(codeobj, codestring, i, code) i += 2 else: arg = None - _chain_append(chain, lastop, (op, arg)) - lastop = op - _chain_append(chain, op, (MARKOV_END, None)) - -def _get_argument(co, code, i, op): - oparg = ord(code[i]) + ord(code[i + 1]) * 256 - if op in opcode.hasconst: - return co.co_consts[oparg] - elif op in opcode.haslocal: - return co.co_varnames[oparg] - elif op in opcode.hascompare: - return opcode.cmp_op[oparg] - raise NotImplementedError(op, opcode.opname[op]) + _chain_append(chain, prevcode, (code, arg)) + prevcode = code + _chain_append(chain, code, (MARKOV_END, None)) + +def _get_argument(codeobj, codestring, i, code): + arg = ord(codestring[i]) + ord(codestring[i + 1]) * 256 + if code in opcode.hasconst: + return codeobj.co_consts[arg] + elif code in opcode.haslocal: + return codeobj.co_varnames[arg] + elif code in opcode.hascompare: + return opcode.cmp_op[arg] + raise NotImplementedError(code, opcode.opname[code]) def _chain_append(chain, first, second): try: @@ -140,37 +117,50 @@ def _chain_append(chain, first, second): def _make_codes(chain): codes = [] - code = random.choice(chain[MARKOV_START]) + instruction = random.choice(chain[MARKOV_START]) constants, varnames = [], [] while 1: - op, arg = code - if op == MARKOV_END: + code, arg = instruction + if code == MARKOV_END: break - codes.append(op) - if op >= opcode.HAVE_ARGUMENT: - if op in opcode.hasconst: + codes.append(code) + if code >= opcode.HAVE_ARGUMENT: + if code in opcode.hasconst: if arg not in constants: constants.append(arg) args = constants - elif op in opcode.haslocal: + elif code in opcode.haslocal: if arg not in varnames: varnames.append(arg) args = varnames - elif op in opcode.hascompare: + elif code in opcode.hascompare: args = opcode.cmp_op else: - raise NotImplementedError(op, opcode.opname[op]) + raise NotImplementedError(code, opcode.opname[code]) codes.append(args.index(arg) % 256) codes.append(args.index(arg) // 256) - code = random.choice(chain[op]) + instruction = random.choice(chain[code]) return codes, tuple(constants), tuple(varnames) -def _int_to_opname(op): - if op == MARKOV_START: +def _opcode_to_opname(code): + if code == MARKOV_START: return "START" - elif op == MARKOV_END: + elif code == MARKOV_END: return "END" - return opcode.opname[op] + return opcode.opname[code] + +def _demo(corpus, arg=12.0): + chain = make_chain(corpus) + func = make_function(chain, "func") + print "Using {0}-function corpus.".format(len(corpus)) + print "Smashed function disassembly:" + print_function(func) + print + print "func({0}) =".format(arg), func(arg) + + if len(sys.argv) > 2 and sys.argv[2] == "-i": + variables = dict(globals().items() + locals().items()) + interact(banner="", local=variables) if __name__ == "__main__": run()