summaryrefslogtreecommitdiff
path: root/prolog
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@fri.uni-lj.si>2015-11-04 13:31:15 +0100
committerTimotej Lazar <timotej.lazar@fri.uni-lj.si>2015-12-10 14:10:04 +0100
commitc3955d1ea0d0ad83baad6d4911e21ed4fc3be701 (patch)
tree335f69dd084b41be0bd05e6b82f2656ed6fb6a34 /prolog
parentc7ce88a12ad4469642aaad0a4ad675769d6aa591 (diff)
Support Tree objects in python.util.stringify
Diffstat (limited to 'prolog')
-rw-r--r--prolog/parser.py5
-rw-r--r--prolog/util.py24
2 files changed, 18 insertions, 11 deletions
diff --git a/prolog/parser.py b/prolog/parser.py
index 4e52a35..0c539d5 100644
--- a/prolog/parser.py
+++ b/prolog/parser.py
@@ -16,7 +16,7 @@
from nltk import Tree
import ply.yacc as yacc
-from .lexer import operators, tokens
+from .lexer import tokens
from .util import Token
# PARSER
@@ -183,6 +183,7 @@ def p_error(t):
parser = yacc.yacc(debug=False)
if __name__ == '__main__':
+ from .util import stringify
while True:
try:
s = input('> ')
@@ -191,4 +192,4 @@ if __name__ == '__main__':
if not s:
continue
ast = parser.parse(s)
- print(repr(ast))
+ print(stringify(ast))
diff --git a/prolog/util.py b/prolog/util.py
index ba48732..ba61da0 100644
--- a/prolog/util.py
+++ b/prolog/util.py
@@ -15,6 +15,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from collections import namedtuple
+from collections.abc import Iterable
+from nltk import Tree
# Stores a token's type and value, and optionally the position of the first
# character in the lexed stream.
@@ -65,15 +67,19 @@ def tokenize(text):
lexer.input(text)
return [Token(t.type, t.value, t.lexpos) for t in lexer]
-# Return a one-line string representation of [tokens].
-def stringify(tokens):
- def token_str(t):
- if t.type in ('PERIOD', 'COMMA'):
- return str(t) + ' '
- if t.type in operators.values():
- return ' ' + str(t) + ' '
- return str(t)
- return ''.join(map(token_str, tokens))
+# Return a one-line string representation of [obj] which may be a Tree or a
+# list of tokens.
+def stringify(obj):
+ if isinstance(obj, Token):
+ if obj.type in ('PERIOD', 'COMMA'):
+ return str(obj) + ' '
+ if obj.type in operators.values():
+ return ' ' + str(obj) + ' '
+ return str(obj)
+ if isinstance(obj, Iterable):
+ if isinstance(obj, Tree) and obj.label() == 'clause':
+ return ''.join([stringify(child) for child in obj]) + '\n'
+ return ''.join([stringify(child) for child in obj])
# Lex [code] into tokens with rule indexes and stop markers.
def annotate(code):