diff options
Diffstat (limited to 'prolog/util.py')
-rw-r--r-- | prolog/util.py | 24 |
1 files changed, 15 insertions, 9 deletions
diff --git a/prolog/util.py b/prolog/util.py index ba48732..ba61da0 100644 --- a/prolog/util.py +++ b/prolog/util.py @@ -15,6 +15,8 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. from collections import namedtuple +from collections.abc import Iterable +from nltk import Tree # Stores a token's type and value, and optionally the position of the first # character in the lexed stream. @@ -65,15 +67,19 @@ def tokenize(text): lexer.input(text) return [Token(t.type, t.value, t.lexpos) for t in lexer] -# Return a one-line string representation of [tokens]. -def stringify(tokens): - def token_str(t): - if t.type in ('PERIOD', 'COMMA'): - return str(t) + ' ' - if t.type in operators.values(): - return ' ' + str(t) + ' ' - return str(t) - return ''.join(map(token_str, tokens)) +# Return a one-line string representation of [obj] which may be a Tree or a +# list of tokens. +def stringify(obj): + if isinstance(obj, Token): + if obj.type in ('PERIOD', 'COMMA'): + return str(obj) + ' ' + if obj.type in operators.values(): + return ' ' + str(obj) + ' ' + return str(obj) + if isinstance(obj, Iterable): + if isinstance(obj, Tree) and obj.label() == 'clause': + return ''.join([stringify(child) for child in obj]) + '\n' + return ''.join([stringify(child) for child in obj]) # Lex [code] into tokens with rule indexes and stop markers. def annotate(code): |