summaryrefslogtreecommitdiff
path: root/prolog/util.py
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@fri.uni-lj.si>2015-11-04 13:31:15 +0100
committerTimotej Lazar <timotej.lazar@fri.uni-lj.si>2015-12-10 14:10:04 +0100
commitc3955d1ea0d0ad83baad6d4911e21ed4fc3be701 (patch)
tree335f69dd084b41be0bd05e6b82f2656ed6fb6a34 /prolog/util.py
parentc7ce88a12ad4469642aaad0a4ad675769d6aa591 (diff)
Support Tree objects in python.util.stringify
Diffstat (limited to 'prolog/util.py')
-rw-r--r--prolog/util.py24
1 files changed, 15 insertions, 9 deletions
diff --git a/prolog/util.py b/prolog/util.py
index ba48732..ba61da0 100644
--- a/prolog/util.py
+++ b/prolog/util.py
@@ -15,6 +15,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from collections import namedtuple
+from collections.abc import Iterable
+from nltk import Tree
# Stores a token's type and value, and optionally the position of the first
# character in the lexed stream.
@@ -65,15 +67,19 @@ def tokenize(text):
lexer.input(text)
return [Token(t.type, t.value, t.lexpos) for t in lexer]
-# Return a one-line string representation of [tokens].
-def stringify(tokens):
- def token_str(t):
- if t.type in ('PERIOD', 'COMMA'):
- return str(t) + ' '
- if t.type in operators.values():
- return ' ' + str(t) + ' '
- return str(t)
- return ''.join(map(token_str, tokens))
+# Return a one-line string representation of [obj] which may be a Tree or a
+# list of tokens.
+def stringify(obj):
+ if isinstance(obj, Token):
+ if obj.type in ('PERIOD', 'COMMA'):
+ return str(obj) + ' '
+ if obj.type in operators.values():
+ return ' ' + str(obj) + ' '
+ return str(obj)
+ if isinstance(obj, Iterable):
+ if isinstance(obj, Tree) and obj.label() == 'clause':
+ return ''.join([stringify(child) for child in obj]) + '\n'
+ return ''.join([stringify(child) for child in obj])
# Lex [code] into tokens with rule indexes and stop markers.
def annotate(code):