Generates random Python functions using Markov chains
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

func_smash.py 4.9 KiB

12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. from code import interact
  2. import imp
  3. import opcode
  4. import os
  5. import random
  6. import re
  7. import sys
  8. import types
  9. MARKOV_START = -1
  10. MARKOV_END = -2
  11. def make_chain(funcs):
  12. chain = {}
  13. for func in funcs:
  14. _parse_func(func, chain)
  15. return chain
  16. def make_function(chain, name, argcount=1):
  17. codes, constants, varnames = _make_codes(chain)
  18. names = ()
  19. codestring = "".join([chr(code) for code in codes])
  20. lnotab = ""
  21. code = types.CodeType(argcount, len(varnames), 1024, 0, codestring,
  22. constants, names, varnames, "<smash>", name, 1,
  23. lnotab)
  24. func = types.FunctionType(code, globals(), name)
  25. return func
  26. def print_chain(chain):
  27. print "{"
  28. for code in sorted(chain.keys()):
  29. name = _opcode_to_opname(code)
  30. target_counts = {}
  31. for tcode in chain[code]:
  32. target = _opcode_to_opname(tcode[0])
  33. if tcode[0] >= opcode.HAVE_ARGUMENT:
  34. target = "{0}({1})".format(target, tcode[1])
  35. try:
  36. target_counts[target] += 1
  37. except KeyError:
  38. target_counts[target] = 1
  39. targets = []
  40. for target, count in target_counts.iteritems():
  41. if count == 1:
  42. targets.append(target)
  43. else:
  44. targets.append("{0}x {1}".format(count, target))
  45. targets.sort()
  46. print name.rjust(20), "=> [{0}]".format(", ".join(targets))
  47. print "}"
  48. def print_function(func):
  49. codeobj = func.__code__
  50. codestring = codeobj.co_code
  51. length = len(codestring)
  52. i = 0
  53. while i < length:
  54. code = ord(codestring[i])
  55. i += 1
  56. print opcode.opname[code].rjust(20),
  57. if code >= opcode.HAVE_ARGUMENT:
  58. arg = _get_argument(codeobj, codestring, i, code)
  59. i += 2
  60. print " ({0})".format(arg)
  61. else:
  62. print
  63. def run():
  64. try:
  65. path, name = os.path.split(sys.argv[1])
  66. name = re.sub("\.pyc?$", "", name)
  67. except IndexError:
  68. raise RuntimeError("Needs a filename as a command-line argument")
  69. file_obj, path, desc = imp.find_module(name, [path])
  70. try:
  71. module = imp.load_module(name, file_obj, path, desc)
  72. finally:
  73. file_obj.close()
  74. _demo(module.corpus)
  75. def _parse_func(func, chain):
  76. codeobj = func.__code__
  77. codestring = codeobj.co_code
  78. length = len(codestring)
  79. i = 0
  80. prevcode = MARKOV_START
  81. while i < length:
  82. code = ord(codestring[i])
  83. i += 1
  84. if code >= opcode.HAVE_ARGUMENT:
  85. arg = _get_argument(codeobj, codestring, i, code)
  86. i += 2
  87. else:
  88. arg = None
  89. _chain_append(chain, prevcode, (code, arg))
  90. prevcode = code
  91. _chain_append(chain, code, (MARKOV_END, None))
  92. def _get_argument(codeobj, codestring, i, code):
  93. arg = ord(codestring[i]) + ord(codestring[i + 1]) * 256
  94. if code in opcode.hasconst:
  95. return codeobj.co_consts[arg]
  96. elif code in opcode.haslocal:
  97. return codeobj.co_varnames[arg]
  98. elif code in opcode.hascompare:
  99. return opcode.cmp_op[arg]
  100. raise NotImplementedError(code, opcode.opname[code])
  101. def _chain_append(chain, first, second):
  102. try:
  103. chain[first].append(second)
  104. except KeyError:
  105. chain[first] = [second]
  106. def _make_codes(chain):
  107. codes = []
  108. instruction = random.choice(chain[MARKOV_START])
  109. constants, varnames = [], []
  110. while 1:
  111. code, arg = instruction
  112. if code == MARKOV_END:
  113. break
  114. codes.append(code)
  115. if code >= opcode.HAVE_ARGUMENT:
  116. if code in opcode.hasconst:
  117. if arg not in constants:
  118. constants.append(arg)
  119. args = constants
  120. elif code in opcode.haslocal:
  121. if arg not in varnames:
  122. varnames.append(arg)
  123. args = varnames
  124. elif code in opcode.hascompare:
  125. args = opcode.cmp_op
  126. else:
  127. raise NotImplementedError(code, opcode.opname[code])
  128. codes.append(args.index(arg) % 256)
  129. codes.append(args.index(arg) // 256)
  130. instruction = random.choice(chain[code])
  131. return codes, tuple(constants), tuple(varnames)
  132. def _opcode_to_opname(code):
  133. if code == MARKOV_START:
  134. return "START"
  135. elif code == MARKOV_END:
  136. return "END"
  137. return opcode.opname[code]
  138. def _demo(corpus, arg=12.0):
  139. chain = make_chain(corpus)
  140. func = make_function(chain, "func")
  141. print "Using {0}-function corpus.".format(len(corpus))
  142. print "Smashed function disassembly:"
  143. print_function(func)
  144. print
  145. print "func({0}) =".format(arg), func(arg)
  146. if len(sys.argv) > 2 and sys.argv[2] == "-i":
  147. variables = dict(globals().items() + locals().items())
  148. interact(banner="", local=variables)
  149. if __name__ == "__main__":
  150. run()