Generates random Python functions using Markov chains
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

188 lines
5.8 KiB

  1. from code import interact
  2. import imp
  3. import opcode
  4. import os
  5. import random
  6. import re
  7. import sys
  8. import types
  9. OPMAP = opcode.opmap
  10. OP_HASCALL = [OPMAP[n] for n in ("CALL_FUNCTION", "CALL_FUNCTION_VAR",
  11. "CALL_FUNCTION_KW", "CALL_FUNCTION_VAR_KW")]
  12. OP_MAKEFUNC = [OPMAP[n] for n in ("MAKE_FUNCTION", "MAKE_CLOSURE")]
  13. MARKOV_START = -1
  14. MARKOV_END = -2
  15. def make_chain(funcs):
  16. chain = {}
  17. for func in funcs:
  18. _parse_func(func, chain)
  19. return chain
  20. def make_function(chain, name, argcount=1):
  21. codes, constants, names, varnames = _make_codes(chain)
  22. codestring = "".join([chr(code) for code in codes])
  23. lnotab = ""
  24. code = types.CodeType(argcount, len(varnames), 1024, 0, codestring,
  25. constants, names, varnames, "<smash>", name, 1,
  26. lnotab)
  27. func = types.FunctionType(code, globals(), name)
  28. return func
  29. def print_chain(chain):
  30. print "{"
  31. for code in sorted(chain.keys()):
  32. name = _opcode_to_opname(code)
  33. target_counts = {}
  34. for tcode in chain[code]:
  35. target = _opcode_to_opname(tcode[0])
  36. if tcode[0] >= opcode.HAVE_ARGUMENT:
  37. target = "{0}({1!r})".format(target, tcode[1])
  38. try:
  39. target_counts[target] += 1
  40. except KeyError:
  41. target_counts[target] = 1
  42. targets = []
  43. for target, count in target_counts.iteritems():
  44. if count == 1:
  45. targets.append(target)
  46. else:
  47. targets.append("{0}x {1}".format(count, target))
  48. targets.sort()
  49. print name.rjust(20), "=> [{0}]".format(", ".join(targets))
  50. print "}"
  51. def print_function(func):
  52. codeobj = func.__code__
  53. codestring = codeobj.co_code
  54. length = len(codestring)
  55. i = 0
  56. while i < length:
  57. code = ord(codestring[i])
  58. i += 1
  59. print opcode.opname[code].rjust(20),
  60. if code >= opcode.HAVE_ARGUMENT:
  61. arg = _get_argument(codeobj, codestring, i, code)
  62. i += 2
  63. print " ({0!r})".format(arg)
  64. else:
  65. print
  66. def run():
  67. try:
  68. path, name = os.path.split(sys.argv[1])
  69. name = re.sub("\.pyc?$", "", name)
  70. except IndexError:
  71. raise RuntimeError("Needs a filename as a command-line argument")
  72. file_obj, path, desc = imp.find_module(name, [path])
  73. try:
  74. module = imp.load_module(name, file_obj, path, desc)
  75. finally:
  76. file_obj.close()
  77. _demo(module.corpus)
  78. def _parse_func(func, chain):
  79. codeobj = func.__code__
  80. codestring = codeobj.co_code
  81. length = len(codestring)
  82. i = 0
  83. prevcode = MARKOV_START
  84. while i < length:
  85. code = ord(codestring[i])
  86. i += 1
  87. if code >= opcode.HAVE_ARGUMENT:
  88. arg = _get_argument(codeobj, codestring, i, code)
  89. i += 2
  90. else:
  91. arg = None
  92. _chain_append(chain, prevcode, (code, arg))
  93. prevcode = code
  94. _chain_append(chain, code, (MARKOV_END, None))
  95. def _get_argument(codeobj, codestring, i, code):
  96. arg = ord(codestring[i]) + ord(codestring[i + 1]) * 256
  97. if code in opcode.hasconst:
  98. return codeobj.co_consts[arg]
  99. elif code in opcode.hasname:
  100. return codeobj.co_names[arg]
  101. elif code in opcode.haslocal:
  102. return codeobj.co_varnames[arg]
  103. elif code in opcode.hascompare:
  104. return opcode.cmp_op[arg]
  105. elif code in OP_HASCALL:
  106. return (ord(codestring[i]), ord(codestring[i + 1]))
  107. elif code in OP_MAKEFUNC:
  108. return arg
  109. raise NotImplementedError(code, opcode.opname[code])
  110. def _chain_append(chain, first, second):
  111. try:
  112. chain[first].append(second)
  113. except KeyError:
  114. chain[first] = [second]
  115. def _make_codes(chain):
  116. codes = []
  117. instruction = random.choice(chain[MARKOV_START])
  118. constants, names, varnames = [], [], []
  119. while 1:
  120. code, arg = instruction
  121. if code == MARKOV_END:
  122. break
  123. codes.append(code)
  124. if code >= opcode.HAVE_ARGUMENT:
  125. if code in opcode.hasconst:
  126. if arg not in constants:
  127. constants.append(arg)
  128. _coerce_arg_into_codes(codes, constants.index(arg))
  129. elif code in opcode.hasname:
  130. if arg not in names:
  131. names.append(arg)
  132. _coerce_arg_into_codes(codes, names.index(arg))
  133. elif code in opcode.haslocal:
  134. if arg not in varnames:
  135. varnames.append(arg)
  136. _coerce_arg_into_codes(codes, varnames.index(arg))
  137. elif code in opcode.hascompare:
  138. _coerce_arg_into_codes(codes, opcode.cmp_op.index(arg))
  139. elif code in OP_HASCALL:
  140. codes.append(arg[0])
  141. codes.append(arg[1])
  142. elif code in OP_MAKEFUNC:
  143. _coerce_arg_into_codes(codes, arg)
  144. else:
  145. raise NotImplementedError(code, opcode.opname[code])
  146. instruction = random.choice(chain[code])
  147. return codes, tuple(constants), tuple(names), tuple(varnames)
  148. def _opcode_to_opname(code):
  149. if code == MARKOV_START:
  150. return "START"
  151. elif code == MARKOV_END:
  152. return "END"
  153. return opcode.opname[code]
  154. def _coerce_arg_into_codes(codes, arg):
  155. codes.append(arg % 256)
  156. codes.append(arg // 256)
  157. def _demo(corpus, arg=12.0):
  158. chain = make_chain(corpus)
  159. func = make_function(chain, "func")
  160. print "Using {0}-function corpus.".format(len(corpus))
  161. print "Smashed function disassembly:"
  162. print_function(func)
  163. print
  164. print "func({0}) =".format(arg), func(arg)
  165. if len(sys.argv) > 2 and sys.argv[2] == "-i":
  166. variables = dict(globals().items() + locals().items())
  167. interact(banner="", local=variables)
  168. if __name__ == "__main__":
  169. run()