From c3955d1ea0d0ad83baad6d4911e21ed4fc3be701 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Wed, 4 Nov 2015 13:31:15 +0100 Subject: Support Tree objects in python.util.stringify --- prolog/parser.py | 5 +++-- prolog/util.py | 24 +++++++++++++++--------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/prolog/parser.py b/prolog/parser.py index 4e52a35..0c539d5 100644 --- a/prolog/parser.py +++ b/prolog/parser.py @@ -16,7 +16,7 @@ from nltk import Tree import ply.yacc as yacc -from .lexer import operators, tokens +from .lexer import tokens from .util import Token # PARSER @@ -183,6 +183,7 @@ def p_error(t): parser = yacc.yacc(debug=False) if __name__ == '__main__': + from .util import stringify while True: try: s = input('> ') @@ -191,4 +192,4 @@ if __name__ == '__main__': if not s: continue ast = parser.parse(s) - print(repr(ast)) + print(stringify(ast)) diff --git a/prolog/util.py b/prolog/util.py index ba48732..ba61da0 100644 --- a/prolog/util.py +++ b/prolog/util.py @@ -15,6 +15,8 @@ # along with this program. If not, see . from collections import namedtuple +from collections.abc import Iterable +from nltk import Tree # Stores a token's type and value, and optionally the position of the first # character in the lexed stream. @@ -65,15 +67,19 @@ def tokenize(text): lexer.input(text) return [Token(t.type, t.value, t.lexpos) for t in lexer] -# Return a one-line string representation of [tokens]. -def stringify(tokens): - def token_str(t): - if t.type in ('PERIOD', 'COMMA'): - return str(t) + ' ' - if t.type in operators.values(): - return ' ' + str(t) + ' ' - return str(t) - return ''.join(map(token_str, tokens)) +# Return a one-line string representation of [obj] which may be a Tree or a +# list of tokens. +def stringify(obj): + if isinstance(obj, Token): + if obj.type in ('PERIOD', 'COMMA'): + return str(obj) + ' ' + if obj.type in operators.values(): + return ' ' + str(obj) + ' ' + return str(obj) + if isinstance(obj, Iterable): + if isinstance(obj, Tree) and obj.label() == 'clause': + return ''.join([stringify(child) for child in obj]) + '\n' + return ''.join([stringify(child) for child in obj]) # Lex [code] into tokens with rule indexes and stop markers. def annotate(code): -- cgit v1.2.1