diff options
author | Timotej Lazar <timotej.lazar@fri.uni-lj.si> | 2015-11-04 13:31:15 +0100 |
---|---|---|
committer | Timotej Lazar <timotej.lazar@fri.uni-lj.si> | 2015-12-10 14:10:04 +0100 |
commit | c3955d1ea0d0ad83baad6d4911e21ed4fc3be701 (patch) | |
tree | 335f69dd084b41be0bd05e6b82f2656ed6fb6a34 /prolog | |
parent | c7ce88a12ad4469642aaad0a4ad675769d6aa591 (diff) |
Support Tree objects in python.util.stringify
Diffstat (limited to 'prolog')
-rw-r--r-- | prolog/parser.py | 5 | ||||
-rw-r--r-- | prolog/util.py | 24 |
2 files changed, 18 insertions, 11 deletions
diff --git a/prolog/parser.py b/prolog/parser.py index 4e52a35..0c539d5 100644 --- a/prolog/parser.py +++ b/prolog/parser.py @@ -16,7 +16,7 @@ from nltk import Tree import ply.yacc as yacc -from .lexer import operators, tokens +from .lexer import tokens from .util import Token # PARSER @@ -183,6 +183,7 @@ def p_error(t): parser = yacc.yacc(debug=False) if __name__ == '__main__': + from .util import stringify while True: try: s = input('> ') @@ -191,4 +192,4 @@ if __name__ == '__main__': if not s: continue ast = parser.parse(s) - print(repr(ast)) + print(stringify(ast)) diff --git a/prolog/util.py b/prolog/util.py index ba48732..ba61da0 100644 --- a/prolog/util.py +++ b/prolog/util.py @@ -15,6 +15,8 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. from collections import namedtuple +from collections.abc import Iterable +from nltk import Tree # Stores a token's type and value, and optionally the position of the first # character in the lexed stream. @@ -65,15 +67,19 @@ def tokenize(text): lexer.input(text) return [Token(t.type, t.value, t.lexpos) for t in lexer] -# Return a one-line string representation of [tokens]. -def stringify(tokens): - def token_str(t): - if t.type in ('PERIOD', 'COMMA'): - return str(t) + ' ' - if t.type in operators.values(): - return ' ' + str(t) + ' ' - return str(t) - return ''.join(map(token_str, tokens)) +# Return a one-line string representation of [obj] which may be a Tree or a +# list of tokens. +def stringify(obj): + if isinstance(obj, Token): + if obj.type in ('PERIOD', 'COMMA'): + return str(obj) + ' ' + if obj.type in operators.values(): + return ' ' + str(obj) + ' ' + return str(obj) + if isinstance(obj, Iterable): + if isinstance(obj, Tree) and obj.label() == 'clause': + return ''.join([stringify(child) for child in obj]) + '\n' + return ''.join([stringify(child) for child in obj]) # Lex [code] into tokens with rule indexes and stop markers. def annotate(code): |