Generates random Python functions using Markov chains
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

177 lines
4.9 KiB

  1. import imp
  2. import opcode
  3. import os
  4. import random
  5. import re
  6. import sys
  7. import traceback
  8. import types
  9. MARKOV_START = -1
  10. MARKOV_END = -2
  11. def run():
  12. try:
  13. path, name = os.path.split(sys.argv[1])
  14. name = re.sub("\.pyc?$", "", name)
  15. except IndexError:
  16. raise RuntimeError("Needs a filename as a command-line argument")
  17. f, path, desc = imp.find_module(name, [path])
  18. try:
  19. module = imp.load_module(name, f, path, desc)
  20. finally:
  21. f.close()
  22. corpus = module.corpus
  23. chain = make_chain(corpus)
  24. func = make_function(chain, "func")
  25. print "Using {0}-function corpus.".format(len(corpus))
  26. print "Smashed function disassembly:"
  27. print_function(func)
  28. print
  29. n = 12.0
  30. print "func(%s) =" % n, func(n)
  31. if len(sys.argv) > 2 and sys.argv[2] == "-i": # Allow input after the fact
  32. print
  33. while 1:
  34. try:
  35. input(">>> ")
  36. except EOFError:
  37. break
  38. except Exception:
  39. traceback.print_exc()
  40. def make_chain(funcs):
  41. chain = {}
  42. for func in funcs:
  43. _parse_func(func, chain)
  44. return chain
  45. def make_function(chain, name, argcount=1):
  46. codes, constants, varnames = _make_codes(chain)
  47. nlocals = len(varnames)
  48. stacksize = 1024 # High limit?
  49. flags = 0 # Denotes funcs with *args and/or **kwargs; nothing for now
  50. codestring = "".join([chr(code) for code in codes])
  51. names = ()
  52. filename = "<smash>"
  53. firstlineno = 1
  54. lnotab = ""
  55. code = types.CodeType(argcount, nlocals, stacksize, flags, codestring,
  56. constants, names, varnames, filename, name,
  57. firstlineno, lnotab)
  58. func = types.FunctionType(code, globals(), name)
  59. return func
  60. def print_chain(chain):
  61. print "{"
  62. for key in sorted(chain.keys()):
  63. op = _int_to_opname(key)
  64. targets = {}
  65. for op2 in chain[key]:
  66. target = _int_to_opname(op2[0])
  67. if op2[0] >= opcode.HAVE_ARGUMENT:
  68. target = "{0}({1})".format(target, op2[1])
  69. try:
  70. targets[target] += 1
  71. except KeyError:
  72. targets[target] = 1
  73. targs = []
  74. for optarget, count in targets.iteritems():
  75. if count == 1:
  76. targs.append(optarget)
  77. else:
  78. targs.append("{0}x {1}".format(count, optarget))
  79. targs.sort()
  80. print op.rjust(20), "=> [{0}]".format(", ".join(targs))
  81. print "}"
  82. def print_function(func):
  83. co = func.__code__
  84. code = co.co_code
  85. n = len(code)
  86. i = 0
  87. while i < n:
  88. op = ord(code[i])
  89. i += 1
  90. print opcode.opname[op].rjust(20),
  91. if op >= opcode.HAVE_ARGUMENT:
  92. arg = _get_argument(co, code, i, op)
  93. i += 2
  94. print " ({0})".format(arg)
  95. else:
  96. print
  97. def _parse_func(func, chain):
  98. co = func.__code__
  99. code = co.co_code
  100. n = len(code)
  101. i = 0
  102. lastop = MARKOV_START
  103. while i < n:
  104. op = ord(code[i])
  105. i += 1
  106. if op >= opcode.HAVE_ARGUMENT:
  107. arg = _get_argument(co, code, i, op)
  108. i += 2
  109. else:
  110. arg = None
  111. _chain_append(chain, lastop, (op, arg))
  112. lastop = op
  113. _chain_append(chain, op, (MARKOV_END, None))
  114. def _get_argument(co, code, i, op):
  115. oparg = ord(code[i]) + ord(code[i + 1]) * 256
  116. if op in opcode.hasconst:
  117. return co.co_consts[oparg]
  118. elif op in opcode.haslocal:
  119. return co.co_varnames[oparg]
  120. elif op in opcode.hascompare:
  121. return opcode.cmp_op[oparg]
  122. raise NotImplementedError(op, opcode.opname[op])
  123. def _chain_append(chain, first, second):
  124. try:
  125. chain[first].append(second)
  126. except KeyError:
  127. chain[first] = [second]
  128. def _make_codes(chain):
  129. codes = []
  130. code = random.choice(chain[MARKOV_START])
  131. constants, varnames = [], []
  132. while 1:
  133. op, arg = code
  134. if op == MARKOV_END:
  135. break
  136. codes.append(op)
  137. if op >= opcode.HAVE_ARGUMENT:
  138. if op in opcode.hasconst:
  139. if arg not in constants:
  140. constants.append(arg)
  141. args = constants
  142. elif op in opcode.haslocal:
  143. if arg not in varnames:
  144. varnames.append(arg)
  145. args = varnames
  146. elif op in opcode.hascompare:
  147. args = opcode.cmp_op
  148. else:
  149. raise NotImplementedError(op, opcode.opname[op])
  150. codes.append(args.index(arg) % 256)
  151. codes.append(args.index(arg) // 256)
  152. code = random.choice(chain[op])
  153. return codes, tuple(constants), tuple(varnames)
  154. def _int_to_opname(op):
  155. if op == MARKOV_START:
  156. return "START"
  157. elif op == MARKOV_END:
  158. return "END"
  159. return opcode.opname[op]
  160. if __name__ == "__main__":
  161. run()