pickletools
index
/usr/local/lib/python2.3/pickletools.py

"Executable documentation" for the pickle module.
 
Extensive comments about the pickle protocols and pickle-machine opcodes
can be found here.  Some functions meant for external use:
 
genops(pickle)
   Generate all the opcodes in a pickle, as (opcode, arg, position) triples.
 
dis(pickle, out=None, indentlevel=4)
   Print a symbolic disassembly of a pickle.

 
Classes
       
__builtin__.object
ArgumentDescriptor
OpcodeInfo
StackObject

 
class ArgumentDescriptor(__builtin__.object)
     Methods defined here:
__init__(self, name, n, reader, doc)

Data and other attributes defined here:
__slots__ = ('name', 'n', 'reader', 'doc')
doc = <member 'doc' of 'ArgumentDescriptor' objects>
n = <member 'n' of 'ArgumentDescriptor' objects>
name = <member 'name' of 'ArgumentDescriptor' objects>
reader = <member 'reader' of 'ArgumentDescriptor' objects>

 
class OpcodeInfo(__builtin__.object)
     Methods defined here:
__init__(self, name, code, arg, stack_before, stack_after, proto, doc)

Data and other attributes defined here:
__slots__ = ('name', 'code', 'arg', 'stack_before', 'stack_after', 'proto', 'doc')
arg = <member 'arg' of 'OpcodeInfo' objects>
code = <member 'code' of 'OpcodeInfo' objects>
doc = <member 'doc' of 'OpcodeInfo' objects>
name = <member 'name' of 'OpcodeInfo' objects>
proto = <member 'proto' of 'OpcodeInfo' objects>
stack_after = <member 'stack_after' of 'OpcodeInfo' objects>
stack_before = <member 'stack_before' of 'OpcodeInfo' objects>

 
class StackObject(__builtin__.object)
     Methods defined here:
__init__(self, name, obtype, doc)
__repr__(self)

Data and other attributes defined here:
__slots__ = ('name', 'obtype', 'doc')
doc = <member 'doc' of 'StackObject' objects>
name = <member 'name' of 'StackObject' objects>
obtype = <member 'obtype' of 'StackObject' objects>

 
Functions
       
dis(pickle, out=None, memo=None, indentlevel=4)
Produce a symbolic disassembly of a pickle.
 
'pickle' is a file-like object, or string, containing a (at least one)
pickle.  The pickle is disassembled from the current position, through
the first STOP opcode encountered.
 
Optional arg 'out' is a file-like object to which the disassembly is
printed.  It defaults to sys.stdout.
 
Optional arg 'memo' is a Python dict, used as the pickle's memo.  It
may be mutated by dis(), if the pickle contains PUT or BINPUT opcodes.
Passing the same memo object to another dis() call then allows disassembly
to proceed across multiple pickles that were all created by the same
pickler with the same memo.  Ordinarily you don't need to worry about this.
 
Optional arg indentlevel is the number of blanks by which to indent
a new MARK level.  It defaults to 4.
 
In addition to printing the disassembly, some sanity checks are made:
 
+ All embedded opcode arguments "make sense".
 
+ Explicit and implicit pop operations have enough items on the stack.
 
+ When an opcode implicitly refers to a markobject, a markobject is
  actually on the stack.
 
+ A memo entry isn't referenced before it's defined.
 
+ The markobject isn't stored in the memo.
 
+ A memo entry isn't redefined.
genops(pickle)
Generate all the opcodes in a pickle.
 
'pickle' is a file-like object, or string, containing the pickle.
 
Each opcode in the pickle is generated, from the current pickle position,
stopping after a STOP opcode is delivered.  A triple is generated for
each opcode:
 
    opcode, arg, pos
 
opcode is an OpcodeInfo record, describing the current opcode.
 
If the opcode has an argument embedded in the pickle, arg is its decoded
value, as a Python object.  If the opcode doesn't have an argument, arg
is None.
 
If the pickle has a tell() method, pos was the value of pickle.tell()
before reading the current opcode.  If the pickle is a string object,
it's wrapped in a StringIO object, and the latter's tell() result is
used.  Else (the pickle doesn't have a tell(), and it's not obvious how
to query its current position) pos is None.
read_decimalnl_long(f)
>>> import StringIO
 
>>> read_decimalnl_long(StringIO.StringIO("1234\n56"))
Traceback (most recent call last):
...
ValueError: trailing 'L' required in '1234'
 
Someday the trailing 'L' will probably go away from this output.
 
>>> read_decimalnl_long(StringIO.StringIO("1234L\n56"))
1234L
 
>>> read_decimalnl_long(StringIO.StringIO("123456789012345678901234L\n6"))
123456789012345678901234L
read_decimalnl_short(f)
>>> import StringIO
>>> read_decimalnl_short(StringIO.StringIO("1234\n56"))
1234
 
>>> read_decimalnl_short(StringIO.StringIO("1234L\n56"))
Traceback (most recent call last):
...
ValueError: trailing 'L' not allowed in '1234L'
read_float8(f)
>>> import StringIO, struct
>>> raw = struct.pack(">d", -1.25)
>>> raw
'\xbf\xf4\x00\x00\x00\x00\x00\x00'
>>> read_float8(StringIO.StringIO(raw + "\n"))
-1.25
read_floatnl(f)
>>> import StringIO
>>> read_floatnl(StringIO.StringIO("-1.25\n6"))
-1.25
read_int4(f)
>>> import StringIO
>>> read_int4(StringIO.StringIO('\xff\x00\x00\x00'))
255
>>> read_int4(StringIO.StringIO('\x00\x00\x00\x80')) == -(2**31)
True
read_long1(f)
>>> import StringIO
>>> read_long1(StringIO.StringIO("\x00"))
0L
>>> read_long1(StringIO.StringIO("\x02\xff\x00"))
255L
>>> read_long1(StringIO.StringIO("\x02\xff\x7f"))
32767L
>>> read_long1(StringIO.StringIO("\x02\x00\xff"))
-256L
>>> read_long1(StringIO.StringIO("\x02\x00\x80"))
-32768L
read_long4(f)
>>> import StringIO
>>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\xff\x00"))
255L
>>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\xff\x7f"))
32767L
>>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\xff"))
-256L
>>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\x80"))
-32768L
>>> read_long1(StringIO.StringIO("\x00\x00\x00\x00"))
0L
read_string1(f)
>>> import StringIO
>>> read_string1(StringIO.StringIO("\x00"))
''
>>> read_string1(StringIO.StringIO("\x03abcdef"))
'abc'
read_string4(f)
>>> import StringIO
>>> read_string4(StringIO.StringIO("\x00\x00\x00\x00abc"))
''
>>> read_string4(StringIO.StringIO("\x03\x00\x00\x00abcdef"))
'abc'
>>> read_string4(StringIO.StringIO("\x00\x00\x00\x03abcdef"))
Traceback (most recent call last):
...
ValueError: expected 50331648 bytes in a string4, but only 6 remain
read_stringnl(f, decode=True, stripquotes=True)
>>> import StringIO
>>> read_stringnl(StringIO.StringIO("'abcd'\nefg\n"))
'abcd'
 
>>> read_stringnl(StringIO.StringIO("\n"))
Traceback (most recent call last):
...
ValueError: no string quotes around ''
 
>>> read_stringnl(StringIO.StringIO("\n"), stripquotes=False)
''
 
>>> read_stringnl(StringIO.StringIO("''\n"))
''
 
>>> read_stringnl(StringIO.StringIO('"abcd"'))
Traceback (most recent call last):
...
ValueError: no newline found when trying to read stringnl
 
Embedded escapes are undone in the result.
>>> read_stringnl(StringIO.StringIO(r"'a\n\\b\x00c\td'" + "\n'e'"))
'a\n\\b\x00c\td'
read_stringnl_noescape(f)
read_stringnl_noescape_pair(f)
>>> import StringIO
>>> read_stringnl_noescape_pair(StringIO.StringIO("Queue\nEmpty\njunk"))
'Queue Empty'
read_uint1(f)
>>> import StringIO
>>> read_uint1(StringIO.StringIO('\xff'))
255
read_uint2(f)
>>> import StringIO
>>> read_uint2(StringIO.StringIO('\xff\x00'))
255
>>> read_uint2(StringIO.StringIO('\xff\xff'))
65535
read_unicodestring4(f)
>>> import StringIO
>>> s = u'abcd\uabcd'
>>> enc = s.encode('utf-8')
>>> enc
'abcd\xea\xaf\x8d'
>>> n = chr(len(enc)) + chr(0) * 3  # little-endian 4-byte length
>>> t = read_unicodestring4(StringIO.StringIO(n + enc + 'junk'))
>>> s == t
True
 
>>> read_unicodestring4(StringIO.StringIO(n + enc[:-1]))
Traceback (most recent call last):
...
ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
read_unicodestringnl(f)
>>> import StringIO
>>> read_unicodestringnl(StringIO.StringIO("abc\uabcd\njunk"))
u'abc\uabcd'

 
Data
        TAKEN_FROM_ARGUMENT1 = -2
TAKEN_FROM_ARGUMENT4 = -3
UP_TO_NEWLINE = -1
__test__ = {'disassembler_memo_test': '\n>>> import pickle\n>>> from StringIO import Stri...18: . STOP\nhighest protocol among opcodes = 2\n', 'disassembler_test': "\n>>> import pickle\n>>> x = [1, 2, (3, 4), {'abc'...14: . STOP\nhighest protocol among opcodes = 2\n"}
anyobject = any
code2op = {'(': <pickletools.OpcodeInfo object>, ')': <pickletools.OpcodeInfo object>, '.': <pickletools.OpcodeInfo object>, '0': <pickletools.OpcodeInfo object>, '1': <pickletools.OpcodeInfo object>, '2': <pickletools.OpcodeInfo object>, 'F': <pickletools.OpcodeInfo object>, 'G': <pickletools.OpcodeInfo object>, 'I': <pickletools.OpcodeInfo object>, 'J': <pickletools.OpcodeInfo object>, ...}
decimalnl_long = <pickletools.ArgumentDescriptor object>
decimalnl_short = <pickletools.ArgumentDescriptor object>
float8 = <pickletools.ArgumentDescriptor object>
floatnl = <pickletools.ArgumentDescriptor object>
int4 = <pickletools.ArgumentDescriptor object>
long1 = <pickletools.ArgumentDescriptor object>
long4 = <pickletools.ArgumentDescriptor object>
markobject = mark
opcodes = [<pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, <pickletools.OpcodeInfo object>, ...]
pybool = bool
pydict = dict
pyfloat = float
pyint = int
pyinteger_or_bool = int_or_bool
pylist = list
pylong = long
pynone = None
pystring = str
pytuple = tuple
pyunicode = unicode
stackslice = stackslice
string1 = <pickletools.ArgumentDescriptor object>
string4 = <pickletools.ArgumentDescriptor object>
stringnl = <pickletools.ArgumentDescriptor object>
stringnl_noescape = <pickletools.ArgumentDescriptor object>
stringnl_noescape_pair = <pickletools.ArgumentDescriptor object>
uint1 = <pickletools.ArgumentDescriptor object>
uint2 = <pickletools.ArgumentDescriptor object>
unicodestring4 = <pickletools.ArgumentDescriptor object>
unicodestringnl = <pickletools.ArgumentDescriptor object>