From 089dc41954067ec351ae398214091aff269c8d67 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Mon, 9 Feb 2015 19:00:13 +0100 Subject: Clean up prolog.util.decompose --- prolog/util.py | 68 +++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 24 deletions(-) diff --git a/prolog/util.py b/prolog/util.py index c762f23..e5a93e2 100644 --- a/prolog/util.py +++ b/prolog/util.py @@ -58,40 +58,60 @@ def split(code): def decompose(code): lines = [] rules = [] - tokens = tokenize(code) - tokens.append(Token('EOF')) - line = [] - parens = [] - rule_start = 0 - for t in tokens: + rule_start = 0 # lowest line number in the current rule + line = [] # tokens in the current line + break_line = True # for each comma, go to a new line + parens = [] # stack of currently open parens/brackets/braces + + for t in tokenize(code) + [Token('EOF')]: + # Always break the line on a semicolon, even inside parens. if t.type == 'SEMI': if line: lines.append(tuple(line)) line = [] lines.append((t,)) continue - if not parens: - if t.type in ('PERIOD', 'FROM', 'COMMA', 'EOF'): - if t.type == 'FROM': - line.append(t) - if line != []: - lines.append(tuple(line)) - line = [] - if t.type in ('PERIOD', 'EOF') and rule_start < len(lines): - rules.append((rule_start, len(lines))) - rule_start = len(lines) - continue - if t.type in ('LPAREN', 'LBRACKET', 'LBRACE'): - parens.append(t.type) + + # Break the line on these tokens if we are not inside parens. Don't + # append the final token unless it is the :- operator. + if break_line and t.type in ('PERIOD', 'FROM', 'COMMA', 'EOF'): + # Only append :- at the end of the line, ignore commas and periods. + if t.type == 'FROM': + line.append(t) + + # Append nonempty lines to the output list. + if line: + lines.append(tuple(line)) + line = [] + + # Commit a new rule if it contains some lines. + if t.type in ('PERIOD', 'EOF') and rule_start < len(lines): + rules.append((rule_start, len(lines))) + rule_start = len(lines) + continue + + # Handle parens. + if t.type == 'LPAREN': + # Disallow breaking lines inside "name( )" (e.g. member(X, L)) but + # not other ( ). + if line and line[-1].type == 'NAME': + parens.append('paren') + break_line = False + else: + parens.append('ignore') + elif t.type in ('LBRACKET', 'LBRACE'): + # Disallow breaking lines inside "[ ]" and "{ }". + parens.append('paren') + break_line = False elif parens: - if t.type == 'RPAREN' and parens[-1] == 'LPAREN': - parens.pop() - elif t.type == 'RBRACKET' and parens[-1] == 'LBRACKET': - parens.pop() - elif t.type == 'RBRACE' and parens[-1] == 'LBRACE': + if t.type in ('RPAREN', 'RBRACE', 'RBRACKET'): parens.pop() + break_line = 'paren' not in parens + + # Append this token to the current line. line.append(t) + return lines, rules # Format a list of [lines] according to [rules] (as returned by decompose). -- cgit v1.2.1