Generates random Python functions using Markov chains
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

205 líneas
6.6 KiB

  1. from code import interact
  2. import imp
  3. import opcode
  4. import os
  5. import random
  6. import re
  7. import sys
  8. import types
  9. OPMAP = opcode.opmap
  10. OP_HASBUILD = [OPMAP[n] for n in ("BUILD_TUPLE", "BUILD_LIST", "BUILD_MAP",
  11. "BUILD_SET")]
  12. OP_HASCALL = [OPMAP[n] for n in ("CALL_FUNCTION", "CALL_FUNCTION_VAR",
  13. "CALL_FUNCTION_KW", "CALL_FUNCTION_VAR_KW")]
  14. OP_HASMAKE = [OPMAP[n] for n in ("MAKE_FUNCTION", "MAKE_CLOSURE")]
  15. OP_LITERALARG = opcode.hasjabs + opcode.hasjrel + OP_HASBUILD + OP_HASMAKE
  16. MARKOV_START = -1
  17. MARKOV_END = -2
  18. def make_chain(funcs):
  19. chain = {}
  20. for func in funcs:
  21. _parse_func(func, chain)
  22. return chain
  23. def make_function(chain, name, argcount=1):
  24. codes, constants, names, varnames = _make_codes(chain)
  25. codestring = "".join([chr(code) for code in codes])
  26. lnotab = ""
  27. code = types.CodeType(argcount, len(varnames), 1024, 0, codestring,
  28. constants, names, varnames, "<smash>", name, 1,
  29. lnotab)
  30. func = types.FunctionType(code, globals(), name)
  31. return func
  32. def print_chain(chain):
  33. print "{"
  34. for code in sorted(chain.keys()):
  35. name = _opcode_to_opname(code)
  36. target_counts = {}
  37. for tcode in chain[code]:
  38. target = _opcode_to_opname(tcode[0])
  39. if tcode[0] >= opcode.HAVE_ARGUMENT:
  40. target = "{0}({1!r})".format(target, tcode[1])
  41. try:
  42. target_counts[target] += 1
  43. except KeyError:
  44. target_counts[target] = 1
  45. targets = []
  46. for target, count in target_counts.iteritems():
  47. if count == 1:
  48. targets.append(target)
  49. else:
  50. targets.append("{0}x {1}".format(count, target))
  51. targets.sort()
  52. print name.rjust(20), "=> [{0}]".format(", ".join(targets))
  53. print "}"
  54. def print_function(func):
  55. codeobj = func.__code__
  56. codestring = codeobj.co_code
  57. length = len(codestring)
  58. i = 0
  59. while i < length:
  60. code = ord(codestring[i])
  61. i += 1
  62. print opcode.opname[code].rjust(20),
  63. if code >= opcode.HAVE_ARGUMENT:
  64. arg = _get_argument(codeobj, codestring, i, code)
  65. i += 2
  66. if code in opcode.hascompare:
  67. print " ({0})".format(arg)
  68. elif code in opcode.hasjabs:
  69. print " (to {0})".format(arg)
  70. elif code in opcode.hasjrel:
  71. print " (+{0})".format(arg)
  72. elif code in OP_HASBUILD:
  73. print " ({0} items)".format(arg)
  74. elif code in OP_HASCALL:
  75. print " ({0} args, {1} kwargs)".format(*arg)
  76. elif code in OP_HASMAKE:
  77. print " ({0} defaults)".format(arg)
  78. else:
  79. print " ({0!r})".format(arg)
  80. else:
  81. print
  82. def run():
  83. try:
  84. path, name = os.path.split(sys.argv[1])
  85. name = re.sub("\.pyc?$", "", name)
  86. except IndexError:
  87. raise RuntimeError("Needs a filename as a command-line argument")
  88. file_obj, path, desc = imp.find_module(name, [path])
  89. try:
  90. module = imp.load_module(name, file_obj, path, desc)
  91. finally:
  92. file_obj.close()
  93. _demo(module.corpus)
  94. def _parse_func(func, chain):
  95. codeobj = func.__code__
  96. print codeobj.co_argcount
  97. codestring = codeobj.co_code
  98. length = len(codestring)
  99. i = 0
  100. prevcode = MARKOV_START
  101. while i < length:
  102. code = ord(codestring[i])
  103. i += 1
  104. if code >= opcode.HAVE_ARGUMENT:
  105. arg = _get_argument(codeobj, codestring, i, code)
  106. i += 2
  107. else:
  108. arg = None
  109. _chain_append(chain, prevcode, (code, arg))
  110. prevcode = code
  111. _chain_append(chain, code, (MARKOV_END, None))
  112. def _get_argument(codeobj, codestring, i, code):
  113. arg = ord(codestring[i]) + ord(codestring[i + 1]) * 256
  114. if code in opcode.hasconst:
  115. return codeobj.co_consts[arg]
  116. elif code in opcode.hasname:
  117. return codeobj.co_names[arg]
  118. elif code in opcode.haslocal:
  119. return codeobj.co_varnames[arg]
  120. elif code in opcode.hascompare:
  121. return opcode.cmp_op[arg]
  122. elif code in OP_HASCALL:
  123. return (ord(codestring[i]), ord(codestring[i + 1]))
  124. elif code in OP_LITERALARG:
  125. return arg
  126. raise NotImplementedError(code, opcode.opname[code])
  127. def _chain_append(chain, first, second):
  128. try:
  129. chain[first].append(second)
  130. except KeyError:
  131. chain[first] = [second]
  132. def _make_codes(chain):
  133. codes = []
  134. instruction = random.choice(chain[MARKOV_START])
  135. constants, names, varnames = [], [], []
  136. while 1:
  137. code, arg = instruction
  138. if code == MARKOV_END:
  139. break
  140. codes.append(code)
  141. if code >= opcode.HAVE_ARGUMENT:
  142. if code in opcode.hasconst:
  143. if arg not in constants:
  144. constants.append(arg)
  145. _coerce_arg_into_codes(codes, constants.index(arg))
  146. elif code in opcode.hasname:
  147. if arg not in names:
  148. names.append(arg)
  149. _coerce_arg_into_codes(codes, names.index(arg))
  150. elif code in opcode.haslocal:
  151. if arg not in varnames:
  152. varnames.append(arg)
  153. _coerce_arg_into_codes(codes, varnames.index(arg))
  154. elif code in opcode.hascompare:
  155. _coerce_arg_into_codes(codes, opcode.cmp_op.index(arg))
  156. elif code in OP_HASCALL:
  157. codes.append(arg[0])
  158. codes.append(arg[1])
  159. elif code in OP_LITERALARG:
  160. _coerce_arg_into_codes(codes, arg)
  161. else:
  162. raise NotImplementedError(code, opcode.opname[code])
  163. instruction = random.choice(chain[code])
  164. return codes, tuple(constants), tuple(names), tuple(varnames)
  165. def _opcode_to_opname(code):
  166. if code == MARKOV_START:
  167. return "START"
  168. elif code == MARKOV_END:
  169. return "END"
  170. return opcode.opname[code]
  171. def _coerce_arg_into_codes(codes, arg):
  172. codes.append(arg % 256)
  173. codes.append(arg // 256)
  174. def _demo(corpus, arg=12.0):
  175. chain = make_chain(corpus)
  176. func = make_function(chain, "func")
  177. print "Using {0}-function corpus.".format(len(corpus))
  178. print "Smashed function disassembly:"
  179. print_function(func)
  180. print
  181. print "func({0}) =".format(arg), func(arg)
  182. if len(sys.argv) > 2 and sys.argv[2] == "-i":
  183. variables = dict(globals().items() + locals().items())
  184. interact(banner="", local=variables)
  185. if __name__ == "__main__":
  186. run()