Generates random Python functions using Markov chains
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

219 rivejä
7.2 KiB

  1. from argparse import ArgumentParser
  2. from code import interact
  3. import imp
  4. import opcode
  5. import os
  6. import random
  7. import re
  8. import types
  9. import prettify
  10. OPMAP = opcode.opmap
  11. OP_HASBUILD = [OPMAP[n] for n in ("BUILD_TUPLE", "BUILD_LIST", "BUILD_MAP",
  12. "BUILD_SET")]
  13. OP_HASCALL = [OPMAP[n] for n in ("CALL_FUNCTION", "CALL_FUNCTION_VAR",
  14. "CALL_FUNCTION_KW", "CALL_FUNCTION_VAR_KW")]
  15. OP_HASMAKE = [OPMAP[n] for n in ("MAKE_FUNCTION", "MAKE_CLOSURE")]
  16. OP_LITERALARG = opcode.hasjabs + opcode.hasjrel + OP_HASBUILD + OP_HASMAKE
  17. MARKOV_START = -1
  18. MARKOV_END = -2
  19. def make_chain(funcs):
  20. chain = {}
  21. for func in funcs:
  22. _parse_func(func, chain)
  23. return chain
  24. def make_function(chain, name, argcount=1):
  25. codes, constants, names, varnames = _make_codes(chain)
  26. codestring = "".join([chr(code) for code in codes])
  27. lnotab = ""
  28. code = types.CodeType(argcount, len(varnames), 1024, 0, codestring,
  29. constants, names, varnames, "<smash>", name, 1,
  30. lnotab)
  31. func = types.FunctionType(code, globals(), name)
  32. return func
  33. def print_chain(chain):
  34. print "{"
  35. for code in sorted(chain.keys()):
  36. name = _opcode_to_opname(code)
  37. target_counts = {}
  38. for tcode in chain[code]:
  39. target = _opcode_to_opname(tcode[0])
  40. if tcode[0] >= opcode.HAVE_ARGUMENT:
  41. target = "{0}({1!r})".format(target, tcode[1])
  42. try:
  43. target_counts[target] += 1
  44. except KeyError:
  45. target_counts[target] = 1
  46. targets = []
  47. for target, count in target_counts.iteritems():
  48. if count == 1:
  49. targets.append(target)
  50. else:
  51. targets.append("{0}x {1}".format(count, target))
  52. targets.sort()
  53. print name.rjust(20), "=> [{0}]".format(", ".join(targets))
  54. print "}"
  55. def print_function(func):
  56. codeobj = func.__code__
  57. codestring = codeobj.co_code
  58. length = len(codestring)
  59. i = 0
  60. while i < length:
  61. code = ord(codestring[i])
  62. print opcode.opname[code].rjust(20), str(i).rjust(3),
  63. i += 1
  64. if code >= opcode.HAVE_ARGUMENT:
  65. arg = _get_argument(codeobj, codestring, i, code)
  66. i += 2
  67. if code in opcode.hascompare:
  68. print " ({0})".format(arg)
  69. elif code in opcode.hasjabs:
  70. print " (to {0})".format(arg)
  71. elif code in opcode.hasjrel:
  72. print " (+{0})".format(arg)
  73. elif code in OP_HASBUILD:
  74. print " ({0} items)".format(arg)
  75. elif code in OP_HASCALL:
  76. print " ({0} args, {1} kwargs)".format(*arg)
  77. elif code in OP_HASMAKE:
  78. print " ({0} defaults)".format(arg)
  79. else:
  80. print " ({0!r})".format(arg)
  81. else:
  82. print
  83. def run():
  84. parser = ArgumentParser(prog="func-smash",
  85. description="Smash functions together!")
  86. parser.add_argument("path", metavar="path", help="path to the corpus file")
  87. parser.add_argument("-n", "--no-run", action="store_true",
  88. help="don't run the function after smashing it")
  89. parser.add_argument("-i", "--interpret", action="store_true",
  90. help="open a interpreter session after smashing")
  91. parser.add_argument("-p", "--prettify", action="store_true",
  92. help="prettify the function after smashing it")
  93. args = parser.parse_args()
  94. path, name = os.path.split(args.path)
  95. name = re.sub("\.pyc?$", "", name)
  96. file_obj, path, desc = imp.find_module(name, [path])
  97. try:
  98. module = imp.load_module(name, file_obj, path, desc)
  99. finally:
  100. file_obj.close()
  101. corpus = module.corpus
  102. chain = make_chain(corpus)
  103. func = make_function(chain, "func")
  104. print "Using {0}-function corpus.".format(len(corpus))
  105. print "Smashed function disassembly:"
  106. print_function(func)
  107. if args.prettify:
  108. print "\nFunction as python code:"
  109. prettify.prettify_function(func, indent=4)
  110. if not args.no_run:
  111. arg = 12
  112. print
  113. print "func({0}) =".format(arg), func(arg)
  114. if args.interpret:
  115. variables = dict(globals().items() + locals().items())
  116. interact(banner="", local=variables)
  117. def _parse_func(func, chain):
  118. codeobj = func.__code__
  119. codestring = codeobj.co_code
  120. length = len(codestring)
  121. i = 0
  122. prevcode = MARKOV_START
  123. while i < length:
  124. code = ord(codestring[i])
  125. i += 1
  126. if code >= opcode.HAVE_ARGUMENT:
  127. arg = _get_argument(codeobj, codestring, i, code)
  128. i += 2
  129. else:
  130. arg = None
  131. _chain_append(chain, prevcode, (code, arg))
  132. prevcode = code
  133. _chain_append(chain, code, (MARKOV_END, None))
  134. def _get_argument(codeobj, codestring, i, code):
  135. arg = ord(codestring[i]) + ord(codestring[i + 1]) * 256
  136. if code in opcode.hasconst:
  137. return codeobj.co_consts[arg]
  138. elif code in opcode.hasname:
  139. return codeobj.co_names[arg]
  140. elif code in opcode.haslocal:
  141. return codeobj.co_varnames[arg]
  142. elif code in opcode.hascompare:
  143. return opcode.cmp_op[arg]
  144. elif code in OP_HASCALL:
  145. return (ord(codestring[i]), ord(codestring[i + 1]))
  146. elif code in OP_LITERALARG:
  147. return arg
  148. raise NotImplementedError(code, opcode.opname[code])
  149. def _chain_append(chain, first, second):
  150. try:
  151. chain[first].append(second)
  152. except KeyError:
  153. chain[first] = [second]
  154. def _make_codes(chain):
  155. codes = []
  156. instruction = random.choice(chain[MARKOV_START])
  157. constants, names, varnames = [], [], []
  158. while 1:
  159. code, arg = instruction
  160. if code == MARKOV_END:
  161. break
  162. codes.append(code)
  163. if code >= opcode.HAVE_ARGUMENT:
  164. if code in opcode.hasconst:
  165. if arg not in constants:
  166. constants.append(arg)
  167. _coerce_arg_into_codes(codes, constants.index(arg))
  168. elif code in opcode.hasname:
  169. if arg not in names:
  170. names.append(arg)
  171. _coerce_arg_into_codes(codes, names.index(arg))
  172. elif code in opcode.haslocal:
  173. if arg not in varnames:
  174. varnames.append(arg)
  175. _coerce_arg_into_codes(codes, varnames.index(arg))
  176. elif code in opcode.hascompare:
  177. _coerce_arg_into_codes(codes, opcode.cmp_op.index(arg))
  178. elif code in OP_HASCALL:
  179. codes.append(arg[0])
  180. codes.append(arg[1])
  181. elif code in OP_LITERALARG:
  182. _coerce_arg_into_codes(codes, arg)
  183. else:
  184. raise NotImplementedError(code, opcode.opname[code])
  185. instruction = random.choice(chain[code])
  186. return codes, tuple(constants), tuple(names), tuple(varnames)
  187. def _opcode_to_opname(code):
  188. if code == MARKOV_START:
  189. return "START"
  190. elif code == MARKOV_END:
  191. return "END"
  192. return opcode.opname[code]
  193. def _coerce_arg_into_codes(codes, arg):
  194. codes.append(arg % 256)
  195. codes.append(arg // 256)
  196. if __name__ == "__main__":
  197. run()