summaryrefslogtreecommitdiff
path: root/dynamic/pg_encoder.py
diff options
context:
space:
mode:
Diffstat (limited to 'dynamic/pg_encoder.py')
-rw-r--r--dynamic/pg_encoder.py377
1 files changed, 377 insertions, 0 deletions
diff --git a/dynamic/pg_encoder.py b/dynamic/pg_encoder.py
new file mode 100644
index 0000000..7cf0c8e
--- /dev/null
+++ b/dynamic/pg_encoder.py
@@ -0,0 +1,377 @@
+# Online Python Tutor
+# https://github.com/pgbovine/OnlinePythonTutor/
+#
+# Copyright (C) Philip J. Guo (philip@pgbovine.net)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+# Thanks to John DeNero for making the encoder work on both Python 2 and 3
+
+
+# Given an arbitrary piece of Python data, encode it in such a manner
+# that it can be later encoded into JSON.
+# http://json.org/
+#
+# We use this function to encode run-time traces of data structures
+# to send to the front-end.
+#
+# Format:
+# Primitives:
+# * None, int, long, float, str, bool - unchanged
+# (json.dumps encodes these fine verbatim, except for inf, -inf, and nan)
+#
+# exceptions: float('inf') -> ['SPECIAL_FLOAT', 'Infinity']
+# float('-inf') -> ['SPECIAL_FLOAT', '-Infinity']
+# float('nan') -> ['SPECIAL_FLOAT', 'NaN']
+# x == int(x) -> ['SPECIAL_FLOAT', '%.1f' % x]
+# (this way, 3.0 prints as '3.0' and not as 3, which looks like an int)
+#
+# If render_heap_primitives is True, then primitive values are rendered
+# on the heap as ['HEAP_PRIMITIVE', <type name>, <value>]
+#
+# (for SPECIAL_FLOAT values, <value> is a list like ['SPECIAL_FLOAT', 'Infinity'])
+#
+# Compound objects:
+# * list - ['LIST', elt1, elt2, elt3, ..., eltN]
+# * tuple - ['TUPLE', elt1, elt2, elt3, ..., eltN]
+# * set - ['SET', elt1, elt2, elt3, ..., eltN]
+# * dict - ['DICT', [key1, value1], [key2, value2], ..., [keyN, valueN]]
+# * instance - ['INSTANCE', class name, [attr1, value1], [attr2, value2], ..., [attrN, valueN]]
+# * instance with __str__ defined - ['INSTANCE_PPRINT', class name, <__str__ value>]
+# * class - ['CLASS', class name, [list of superclass names], [attr1, value1], [attr2, value2], ..., [attrN, valueN]]
+# * function - ['FUNCTION', function name, parent frame ID (for nested functions)]
+# * module - ['module', module name]
+# * other - [<type name>, string representation of object]
+# * compound object reference - ['REF', target object's unique_id]
+#
+# the unique_id is derived from id(), which allows us to capture aliasing
+
+
+# number of significant digits for floats
+FLOAT_PRECISION = 4
+
+
+from collections import defaultdict
+import re, types
+import sys
+import math
+typeRE = re.compile("<type '(.*)'>")
+classRE = re.compile("<class '(.*)'>")
+
+import inspect
+
+# TODO: maybe use the 'six' library to smooth over Py2 and Py3 incompatibilities?
+is_python3 = (sys.version_info[0] == 3)
+if is_python3:
+ # avoid name errors (GROSS!)
+ long = int
+ unicode = str
+
+
+def is_class(dat):
+ """Return whether dat is a class."""
+ if is_python3:
+ return isinstance(dat, type)
+ else:
+ return type(dat) in (types.ClassType, types.TypeType)
+
+
+def is_instance(dat):
+ """Return whether dat is an instance of a class."""
+ if is_python3:
+ return type(dat) not in PRIMITIVE_TYPES and \
+ isinstance(type(dat), type) and \
+ not isinstance(dat, type)
+ else:
+ # ugh, classRE match is a bit of a hack :(
+ return type(dat) == types.InstanceType or classRE.match(str(type(dat)))
+
+
+def get_name(obj):
+ """Return the name of an object."""
+ return obj.__name__ if hasattr(obj, '__name__') else get_name(type(obj))
+
+
+PRIMITIVE_TYPES = (int, long, float, str, unicode, bool, type(None))
+
+def encode_primitive(dat):
+ t = type(dat)
+ if t is float:
+ if math.isinf(dat):
+ if dat > 0:
+ return ['SPECIAL_FLOAT', 'Infinity']
+ else:
+ return ['SPECIAL_FLOAT', '-Infinity']
+ elif math.isnan(dat):
+ return ['SPECIAL_FLOAT', 'NaN']
+ else:
+ # render floats like 3.0 as '3.0' and not as 3
+ if dat == int(dat):
+ return ['SPECIAL_FLOAT', '%.1f' % dat]
+ else:
+ return round(dat, FLOAT_PRECISION)
+ elif t is str and (not is_python3):
+ # hack only for Python 2 strings ... always turn into unicode
+ # and display '?' when it's not valid unicode
+ return dat.decode('utf-8', 'replace')
+ else:
+ # return all other primitives verbatim
+ return dat
+
+
+# grab a line number like ' <line 2>' or ' <line 2b>'
+def create_lambda_line_number(codeobj, line_to_lambda_code):
+ try:
+ lambda_lineno = codeobj.co_firstlineno
+ lst = line_to_lambda_code[lambda_lineno]
+ ind = lst.index(codeobj)
+ # add a suffix for all subsequent lambdas on a line beyond the first
+ # (nix this for now because order isn't guaranteed when you have
+ # multiple lambdas on the same line)
+ '''
+ if ind > 0:
+ lineno_str = str(lambda_lineno) + chr(ord('a') + ind)
+ else:
+ lineno_str = str(lambda_lineno)
+ '''
+ lineno_str = str(lambda_lineno)
+ return ' <line ' + lineno_str + '>'
+ except:
+ return ''
+
+
+# Note that this might BLOAT MEMORY CONSUMPTION since we're holding on
+# to every reference ever created by the program without ever releasing
+# anything!
+class ObjectEncoder:
+ def __init__(self, render_heap_primitives):
+ # Key: canonicalized small ID
+ # Value: encoded (compound) heap object
+ self.encoded_heap_objects = {}
+
+ self.render_heap_primitives = render_heap_primitives
+
+ self.id_to_small_IDs = {}
+ self.cur_small_ID = 1
+
+ # wow, creating unique identifiers for lambdas is quite annoying,
+ # especially if we want to properly differentiate:
+ # 1.) multiple lambdas defined on the same line, and
+ # 2.) the same lambda code defined multiple times on different lines
+ #
+ # However, it gets confused when there are multiple identical
+ # lambdas on the same line, like:
+ # f(lambda x:x*x, lambda y:y*y, lambda x:x*x)
+
+ # (assumes everything is in one file)
+ # Key: line number
+ # Value: list of the code objects of lambdas defined
+ # on that line in the order they were defined
+ self.line_to_lambda_code = defaultdict(list)
+
+
+ def get_heap(self):
+ return self.encoded_heap_objects
+
+
+ def reset_heap(self):
+ # VERY IMPORTANT to reassign to an empty dict rather than just
+ # clearing the existing dict, since get_heap() could have been
+ # called earlier to return a reference to a previous heap state
+ self.encoded_heap_objects = {}
+
+ def set_function_parent_frame_ID(self, ref_obj, enclosing_frame_id):
+ assert ref_obj[0] == 'REF'
+ func_obj = self.encoded_heap_objects[ref_obj[1]]
+ assert func_obj[0] == 'FUNCTION'
+ func_obj[-1] = enclosing_frame_id
+
+
+ # return either a primitive object or an object reference;
+ # and as a side effect, update encoded_heap_objects
+ def encode(self, dat, get_parent):
+ """Encode a data value DAT using the GET_PARENT function for parent ids."""
+ # primitive type
+ if not self.render_heap_primitives and type(dat) in PRIMITIVE_TYPES:
+ return encode_primitive(dat)
+ # compound type - return an object reference and update encoded_heap_objects
+ else:
+ my_id = id(dat)
+
+ try:
+ my_small_id = self.id_to_small_IDs[my_id]
+ except KeyError:
+ my_small_id = self.cur_small_ID
+ self.id_to_small_IDs[my_id] = self.cur_small_ID
+ self.cur_small_ID += 1
+
+ del my_id # to prevent bugs later in this function
+
+ ret = ['REF', my_small_id]
+
+ # punt early if you've already encoded this object
+ if my_small_id in self.encoded_heap_objects:
+ return ret
+
+
+ # major side-effect!
+ new_obj = []
+ self.encoded_heap_objects[my_small_id] = new_obj
+
+ typ = type(dat)
+
+ if typ == list:
+ new_obj.append('LIST')
+ for e in dat:
+ new_obj.append(self.encode(e, get_parent))
+ elif typ == tuple:
+ new_obj.append('TUPLE')
+ for e in dat:
+ new_obj.append(self.encode(e, get_parent))
+ elif typ == set:
+ new_obj.append('SET')
+ for e in dat:
+ new_obj.append(self.encode(e, get_parent))
+ elif typ == dict:
+ new_obj.append('DICT')
+ for (k, v) in dat.items():
+ # don't display some built-in locals ...
+ if k not in ('__module__', '__return__', '__locals__'):
+ new_obj.append([self.encode(k, get_parent), self.encode(v, get_parent)])
+ elif typ in (types.FunctionType, types.MethodType):
+ if is_python3:
+ argspec = inspect.getfullargspec(dat)
+ else:
+ argspec = inspect.getargspec(dat)
+
+ printed_args = [e for e in argspec.args]
+ if argspec.varargs:
+ printed_args.append('*' + argspec.varargs)
+
+ if is_python3:
+ if argspec.varkw:
+ printed_args.append('**' + argspec.varkw)
+ if argspec.kwonlyargs:
+ printed_args.extend(argspec.kwonlyargs)
+ else:
+ if argspec.keywords:
+ printed_args.append('**' + argspec.keywords)
+
+ func_name = get_name(dat)
+
+ pretty_name = func_name
+
+ # sometimes might fail for, say, <genexpr>, so just ignore
+ # failures for now ...
+ try:
+ pretty_name += '(' + ', '.join(printed_args) + ')'
+ except TypeError:
+ pass
+
+ # put a line number suffix on lambdas to more uniquely identify
+ # them, since they don't have names
+ if func_name == '<lambda>':
+ cod = (dat.__code__ if is_python3 else dat.func_code) # ugh!
+ lst = self.line_to_lambda_code[cod.co_firstlineno]
+ if cod not in lst:
+ lst.append(cod)
+ pretty_name += create_lambda_line_number(cod,
+ self.line_to_lambda_code)
+
+ encoded_val = ['FUNCTION', pretty_name, None]
+ if get_parent:
+ enclosing_frame_id = get_parent(dat)
+ encoded_val[2] = enclosing_frame_id
+ new_obj.extend(encoded_val)
+ elif typ is types.BuiltinFunctionType:
+ pretty_name = get_name(dat) + '(...)'
+ new_obj.extend(['FUNCTION', pretty_name, None])
+ elif is_class(dat) or is_instance(dat):
+ self.encode_class_or_instance(dat, new_obj)
+ elif typ is types.ModuleType:
+ new_obj.extend(['module', dat.__name__])
+ elif typ in PRIMITIVE_TYPES:
+ assert self.render_heap_primitives
+ new_obj.extend(['HEAP_PRIMITIVE', type(dat).__name__, encode_primitive(dat)])
+ else:
+ typeStr = str(typ)
+ m = typeRE.match(typeStr)
+
+ if not m:
+ m = classRE.match(typeStr)
+
+ assert m, typ
+
+ if is_python3:
+ encoded_dat = str(dat)
+ else:
+ # ugh, for bytearray() in Python 2, str() returns
+ # non-JSON-serializable characters, so need to decode:
+ encoded_dat = str(dat).decode('utf-8', 'replace')
+ new_obj.extend([m.group(1), encoded_dat])
+
+ return ret
+
+
+ def encode_class_or_instance(self, dat, new_obj):
+ """Encode dat as a class or instance."""
+ if is_instance(dat):
+ if hasattr(dat, '__class__'):
+ # common case ...
+ class_name = get_name(dat.__class__)
+ else:
+ # super special case for something like
+ # "from datetime import datetime_CAPI" in Python 3.2,
+ # which is some weird 'PyCapsule' type ...
+ # http://docs.python.org/release/3.1.5/c-api/capsule.html
+ class_name = get_name(type(dat))
+
+ if hasattr(dat, '__str__') and \
+ (not dat.__class__.__str__ is object.__str__): # make sure it's not the lame default __str__
+ # N.B.: when objects are being constructed, this call
+ # might fail since not all fields have yet been populated
+ try:
+ pprint_str = str(dat)
+ except:
+ pprint_str = '<incomplete object>'
+
+ new_obj.extend(['INSTANCE_PPRINT', class_name, pprint_str])
+ return # bail early
+ else:
+ new_obj.extend(['INSTANCE', class_name])
+ # don't traverse inside modules, or else risk EXPLODING the visualization
+ if class_name == 'module':
+ return
+ else:
+ superclass_names = [e.__name__ for e in dat.__bases__ if e is not object]
+ new_obj.extend(['CLASS', get_name(dat), superclass_names])
+
+ # traverse inside of its __dict__ to grab attributes
+ # (filter out useless-seeming ones, based on anecdotal observation):
+ hidden = ('__doc__', '__module__', '__return__', '__dict__',
+ '__locals__', '__weakref__', '__qualname__')
+ if hasattr(dat, '__dict__'):
+ user_attrs = sorted([e for e in dat.__dict__ if e not in hidden])
+ else:
+ user_attrs = []
+
+ for attr in user_attrs:
+ new_obj.append([self.encode(attr, None), self.encode(dat.__dict__[attr], None)])
+