diff options
Diffstat (limited to 'dynamic/pg_encoder.py')
-rw-r--r-- | dynamic/pg_encoder.py | 377 |
1 files changed, 377 insertions, 0 deletions
diff --git a/dynamic/pg_encoder.py b/dynamic/pg_encoder.py new file mode 100644 index 0000000..7cf0c8e --- /dev/null +++ b/dynamic/pg_encoder.py @@ -0,0 +1,377 @@ +# Online Python Tutor +# https://github.com/pgbovine/OnlinePythonTutor/ +# +# Copyright (C) Philip J. Guo (philip@pgbovine.net) +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +# Thanks to John DeNero for making the encoder work on both Python 2 and 3 + + +# Given an arbitrary piece of Python data, encode it in such a manner +# that it can be later encoded into JSON. +# http://json.org/ +# +# We use this function to encode run-time traces of data structures +# to send to the front-end. +# +# Format: +# Primitives: +# * None, int, long, float, str, bool - unchanged +# (json.dumps encodes these fine verbatim, except for inf, -inf, and nan) +# +# exceptions: float('inf') -> ['SPECIAL_FLOAT', 'Infinity'] +# float('-inf') -> ['SPECIAL_FLOAT', '-Infinity'] +# float('nan') -> ['SPECIAL_FLOAT', 'NaN'] +# x == int(x) -> ['SPECIAL_FLOAT', '%.1f' % x] +# (this way, 3.0 prints as '3.0' and not as 3, which looks like an int) +# +# If render_heap_primitives is True, then primitive values are rendered +# on the heap as ['HEAP_PRIMITIVE', <type name>, <value>] +# +# (for SPECIAL_FLOAT values, <value> is a list like ['SPECIAL_FLOAT', 'Infinity']) +# +# Compound objects: +# * list - ['LIST', elt1, elt2, elt3, ..., eltN] +# * tuple - ['TUPLE', elt1, elt2, elt3, ..., eltN] +# * set - ['SET', elt1, elt2, elt3, ..., eltN] +# * dict - ['DICT', [key1, value1], [key2, value2], ..., [keyN, valueN]] +# * instance - ['INSTANCE', class name, [attr1, value1], [attr2, value2], ..., [attrN, valueN]] +# * instance with __str__ defined - ['INSTANCE_PPRINT', class name, <__str__ value>] +# * class - ['CLASS', class name, [list of superclass names], [attr1, value1], [attr2, value2], ..., [attrN, valueN]] +# * function - ['FUNCTION', function name, parent frame ID (for nested functions)] +# * module - ['module', module name] +# * other - [<type name>, string representation of object] +# * compound object reference - ['REF', target object's unique_id] +# +# the unique_id is derived from id(), which allows us to capture aliasing + + +# number of significant digits for floats +FLOAT_PRECISION = 4 + + +from collections import defaultdict +import re, types +import sys +import math +typeRE = re.compile("<type '(.*)'>") +classRE = re.compile("<class '(.*)'>") + +import inspect + +# TODO: maybe use the 'six' library to smooth over Py2 and Py3 incompatibilities? +is_python3 = (sys.version_info[0] == 3) +if is_python3: + # avoid name errors (GROSS!) + long = int + unicode = str + + +def is_class(dat): + """Return whether dat is a class.""" + if is_python3: + return isinstance(dat, type) + else: + return type(dat) in (types.ClassType, types.TypeType) + + +def is_instance(dat): + """Return whether dat is an instance of a class.""" + if is_python3: + return type(dat) not in PRIMITIVE_TYPES and \ + isinstance(type(dat), type) and \ + not isinstance(dat, type) + else: + # ugh, classRE match is a bit of a hack :( + return type(dat) == types.InstanceType or classRE.match(str(type(dat))) + + +def get_name(obj): + """Return the name of an object.""" + return obj.__name__ if hasattr(obj, '__name__') else get_name(type(obj)) + + +PRIMITIVE_TYPES = (int, long, float, str, unicode, bool, type(None)) + +def encode_primitive(dat): + t = type(dat) + if t is float: + if math.isinf(dat): + if dat > 0: + return ['SPECIAL_FLOAT', 'Infinity'] + else: + return ['SPECIAL_FLOAT', '-Infinity'] + elif math.isnan(dat): + return ['SPECIAL_FLOAT', 'NaN'] + else: + # render floats like 3.0 as '3.0' and not as 3 + if dat == int(dat): + return ['SPECIAL_FLOAT', '%.1f' % dat] + else: + return round(dat, FLOAT_PRECISION) + elif t is str and (not is_python3): + # hack only for Python 2 strings ... always turn into unicode + # and display '?' when it's not valid unicode + return dat.decode('utf-8', 'replace') + else: + # return all other primitives verbatim + return dat + + +# grab a line number like ' <line 2>' or ' <line 2b>' +def create_lambda_line_number(codeobj, line_to_lambda_code): + try: + lambda_lineno = codeobj.co_firstlineno + lst = line_to_lambda_code[lambda_lineno] + ind = lst.index(codeobj) + # add a suffix for all subsequent lambdas on a line beyond the first + # (nix this for now because order isn't guaranteed when you have + # multiple lambdas on the same line) + ''' + if ind > 0: + lineno_str = str(lambda_lineno) + chr(ord('a') + ind) + else: + lineno_str = str(lambda_lineno) + ''' + lineno_str = str(lambda_lineno) + return ' <line ' + lineno_str + '>' + except: + return '' + + +# Note that this might BLOAT MEMORY CONSUMPTION since we're holding on +# to every reference ever created by the program without ever releasing +# anything! +class ObjectEncoder: + def __init__(self, render_heap_primitives): + # Key: canonicalized small ID + # Value: encoded (compound) heap object + self.encoded_heap_objects = {} + + self.render_heap_primitives = render_heap_primitives + + self.id_to_small_IDs = {} + self.cur_small_ID = 1 + + # wow, creating unique identifiers for lambdas is quite annoying, + # especially if we want to properly differentiate: + # 1.) multiple lambdas defined on the same line, and + # 2.) the same lambda code defined multiple times on different lines + # + # However, it gets confused when there are multiple identical + # lambdas on the same line, like: + # f(lambda x:x*x, lambda y:y*y, lambda x:x*x) + + # (assumes everything is in one file) + # Key: line number + # Value: list of the code objects of lambdas defined + # on that line in the order they were defined + self.line_to_lambda_code = defaultdict(list) + + + def get_heap(self): + return self.encoded_heap_objects + + + def reset_heap(self): + # VERY IMPORTANT to reassign to an empty dict rather than just + # clearing the existing dict, since get_heap() could have been + # called earlier to return a reference to a previous heap state + self.encoded_heap_objects = {} + + def set_function_parent_frame_ID(self, ref_obj, enclosing_frame_id): + assert ref_obj[0] == 'REF' + func_obj = self.encoded_heap_objects[ref_obj[1]] + assert func_obj[0] == 'FUNCTION' + func_obj[-1] = enclosing_frame_id + + + # return either a primitive object or an object reference; + # and as a side effect, update encoded_heap_objects + def encode(self, dat, get_parent): + """Encode a data value DAT using the GET_PARENT function for parent ids.""" + # primitive type + if not self.render_heap_primitives and type(dat) in PRIMITIVE_TYPES: + return encode_primitive(dat) + # compound type - return an object reference and update encoded_heap_objects + else: + my_id = id(dat) + + try: + my_small_id = self.id_to_small_IDs[my_id] + except KeyError: + my_small_id = self.cur_small_ID + self.id_to_small_IDs[my_id] = self.cur_small_ID + self.cur_small_ID += 1 + + del my_id # to prevent bugs later in this function + + ret = ['REF', my_small_id] + + # punt early if you've already encoded this object + if my_small_id in self.encoded_heap_objects: + return ret + + + # major side-effect! + new_obj = [] + self.encoded_heap_objects[my_small_id] = new_obj + + typ = type(dat) + + if typ == list: + new_obj.append('LIST') + for e in dat: + new_obj.append(self.encode(e, get_parent)) + elif typ == tuple: + new_obj.append('TUPLE') + for e in dat: + new_obj.append(self.encode(e, get_parent)) + elif typ == set: + new_obj.append('SET') + for e in dat: + new_obj.append(self.encode(e, get_parent)) + elif typ == dict: + new_obj.append('DICT') + for (k, v) in dat.items(): + # don't display some built-in locals ... + if k not in ('__module__', '__return__', '__locals__'): + new_obj.append([self.encode(k, get_parent), self.encode(v, get_parent)]) + elif typ in (types.FunctionType, types.MethodType): + if is_python3: + argspec = inspect.getfullargspec(dat) + else: + argspec = inspect.getargspec(dat) + + printed_args = [e for e in argspec.args] + if argspec.varargs: + printed_args.append('*' + argspec.varargs) + + if is_python3: + if argspec.varkw: + printed_args.append('**' + argspec.varkw) + if argspec.kwonlyargs: + printed_args.extend(argspec.kwonlyargs) + else: + if argspec.keywords: + printed_args.append('**' + argspec.keywords) + + func_name = get_name(dat) + + pretty_name = func_name + + # sometimes might fail for, say, <genexpr>, so just ignore + # failures for now ... + try: + pretty_name += '(' + ', '.join(printed_args) + ')' + except TypeError: + pass + + # put a line number suffix on lambdas to more uniquely identify + # them, since they don't have names + if func_name == '<lambda>': + cod = (dat.__code__ if is_python3 else dat.func_code) # ugh! + lst = self.line_to_lambda_code[cod.co_firstlineno] + if cod not in lst: + lst.append(cod) + pretty_name += create_lambda_line_number(cod, + self.line_to_lambda_code) + + encoded_val = ['FUNCTION', pretty_name, None] + if get_parent: + enclosing_frame_id = get_parent(dat) + encoded_val[2] = enclosing_frame_id + new_obj.extend(encoded_val) + elif typ is types.BuiltinFunctionType: + pretty_name = get_name(dat) + '(...)' + new_obj.extend(['FUNCTION', pretty_name, None]) + elif is_class(dat) or is_instance(dat): + self.encode_class_or_instance(dat, new_obj) + elif typ is types.ModuleType: + new_obj.extend(['module', dat.__name__]) + elif typ in PRIMITIVE_TYPES: + assert self.render_heap_primitives + new_obj.extend(['HEAP_PRIMITIVE', type(dat).__name__, encode_primitive(dat)]) + else: + typeStr = str(typ) + m = typeRE.match(typeStr) + + if not m: + m = classRE.match(typeStr) + + assert m, typ + + if is_python3: + encoded_dat = str(dat) + else: + # ugh, for bytearray() in Python 2, str() returns + # non-JSON-serializable characters, so need to decode: + encoded_dat = str(dat).decode('utf-8', 'replace') + new_obj.extend([m.group(1), encoded_dat]) + + return ret + + + def encode_class_or_instance(self, dat, new_obj): + """Encode dat as a class or instance.""" + if is_instance(dat): + if hasattr(dat, '__class__'): + # common case ... + class_name = get_name(dat.__class__) + else: + # super special case for something like + # "from datetime import datetime_CAPI" in Python 3.2, + # which is some weird 'PyCapsule' type ... + # http://docs.python.org/release/3.1.5/c-api/capsule.html + class_name = get_name(type(dat)) + + if hasattr(dat, '__str__') and \ + (not dat.__class__.__str__ is object.__str__): # make sure it's not the lame default __str__ + # N.B.: when objects are being constructed, this call + # might fail since not all fields have yet been populated + try: + pprint_str = str(dat) + except: + pprint_str = '<incomplete object>' + + new_obj.extend(['INSTANCE_PPRINT', class_name, pprint_str]) + return # bail early + else: + new_obj.extend(['INSTANCE', class_name]) + # don't traverse inside modules, or else risk EXPLODING the visualization + if class_name == 'module': + return + else: + superclass_names = [e.__name__ for e in dat.__bases__ if e is not object] + new_obj.extend(['CLASS', get_name(dat), superclass_names]) + + # traverse inside of its __dict__ to grab attributes + # (filter out useless-seeming ones, based on anecdotal observation): + hidden = ('__doc__', '__module__', '__return__', '__dict__', + '__locals__', '__weakref__', '__qualname__') + if hasattr(dat, '__dict__'): + user_attrs = sorted([e for e in dat.__dict__ if e not in hidden]) + else: + user_attrs = [] + + for attr in user_attrs: + new_obj.append([self.encode(attr, None), self.encode(dat.__dict__[attr], None)]) + |