summaryrefslogtreecommitdiff
path: root/prolog
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@araneo.org>2015-02-09 19:00:13 +0100
committerAleš Smodiš <aless@guru.si>2015-08-11 14:26:02 +0200
commit089dc41954067ec351ae398214091aff269c8d67 (patch)
treefac7fdf2ca593b24159b29d58506cc3a1cb5e70c /prolog
parent1effa504a63f579a16425f5c66026f0204733194 (diff)
Clean up prolog.util.decompose
Diffstat (limited to 'prolog')
-rw-r--r--prolog/util.py68
1 files changed, 44 insertions, 24 deletions
diff --git a/prolog/util.py b/prolog/util.py
index c762f23..e5a93e2 100644
--- a/prolog/util.py
+++ b/prolog/util.py
@@ -58,40 +58,60 @@ def split(code):
def decompose(code):
lines = []
rules = []
- tokens = tokenize(code)
- tokens.append(Token('EOF'))
- line = []
- parens = []
- rule_start = 0
- for t in tokens:
+ rule_start = 0 # lowest line number in the current rule
+ line = [] # tokens in the current line
+ break_line = True # for each comma, go to a new line
+ parens = [] # stack of currently open parens/brackets/braces
+
+ for t in tokenize(code) + [Token('EOF')]:
+ # Always break the line on a semicolon, even inside parens.
if t.type == 'SEMI':
if line:
lines.append(tuple(line))
line = []
lines.append((t,))
continue
- if not parens:
- if t.type in ('PERIOD', 'FROM', 'COMMA', 'EOF'):
- if t.type == 'FROM':
- line.append(t)
- if line != []:
- lines.append(tuple(line))
- line = []
- if t.type in ('PERIOD', 'EOF') and rule_start < len(lines):
- rules.append((rule_start, len(lines)))
- rule_start = len(lines)
- continue
- if t.type in ('LPAREN', 'LBRACKET', 'LBRACE'):
- parens.append(t.type)
+
+ # Break the line on these tokens if we are not inside parens. Don't
+ # append the final token unless it is the :- operator.
+ if break_line and t.type in ('PERIOD', 'FROM', 'COMMA', 'EOF'):
+ # Only append :- at the end of the line, ignore commas and periods.
+ if t.type == 'FROM':
+ line.append(t)
+
+ # Append nonempty lines to the output list.
+ if line:
+ lines.append(tuple(line))
+ line = []
+
+ # Commit a new rule if it contains some lines.
+ if t.type in ('PERIOD', 'EOF') and rule_start < len(lines):
+ rules.append((rule_start, len(lines)))
+ rule_start = len(lines)
+ continue
+
+ # Handle parens.
+ if t.type == 'LPAREN':
+ # Disallow breaking lines inside "name( )" (e.g. member(X, L)) but
+ # not other ( ).
+ if line and line[-1].type == 'NAME':
+ parens.append('paren')
+ break_line = False
+ else:
+ parens.append('ignore')
+ elif t.type in ('LBRACKET', 'LBRACE'):
+ # Disallow breaking lines inside "[ ]" and "{ }".
+ parens.append('paren')
+ break_line = False
elif parens:
- if t.type == 'RPAREN' and parens[-1] == 'LPAREN':
- parens.pop()
- elif t.type == 'RBRACKET' and parens[-1] == 'LBRACKET':
- parens.pop()
- elif t.type == 'RBRACE' and parens[-1] == 'LBRACE':
+ if t.type in ('RPAREN', 'RBRACE', 'RBRACKET'):
parens.pop()
+ break_line = 'paren' not in parens
+
+ # Append this token to the current line.
line.append(t)
+
return lines, rules
# Format a list of [lines] according to [rules] (as returned by decompose).