From 089dc41954067ec351ae398214091aff269c8d67 Mon Sep 17 00:00:00 2001
From: Timotej Lazar <timotej.lazar@araneo.org>
Date: Mon, 9 Feb 2015 19:00:13 +0100
Subject: Clean up prolog.util.decompose

---
 prolog/util.py | 68 +++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 44 insertions(+), 24 deletions(-)

diff --git a/prolog/util.py b/prolog/util.py
index c762f23..e5a93e2 100644
--- a/prolog/util.py
+++ b/prolog/util.py
@@ -58,40 +58,60 @@ def split(code):
 def decompose(code):
     lines = []
     rules = []
-    tokens = tokenize(code)
-    tokens.append(Token('EOF'))
 
-    line = []
-    parens = []
-    rule_start = 0
-    for t in tokens:
+    rule_start = 0     # lowest line number in the current rule
+    line = []          # tokens in the current line
+    break_line = True  # for each comma, go to a new line
+    parens = []        # stack of currently open parens/brackets/braces
+
+    for t in tokenize(code) + [Token('EOF')]:
+        # Always break the line on a semicolon, even inside parens.
         if t.type == 'SEMI':
             if line:
                 lines.append(tuple(line))
                 line = []
             lines.append((t,))
             continue
-        if not parens:
-            if t.type in ('PERIOD', 'FROM', 'COMMA', 'EOF'):
-                if t.type == 'FROM':
-                    line.append(t)
-                if line != []:
-                    lines.append(tuple(line))
-                    line = []
-                if t.type in ('PERIOD', 'EOF') and rule_start < len(lines):
-                    rules.append((rule_start, len(lines)))
-                    rule_start = len(lines)
-                continue
-        if t.type in ('LPAREN', 'LBRACKET', 'LBRACE'):
-            parens.append(t.type)
+
+        # Break the line on these tokens if we are not inside parens. Don't
+        # append the final token unless it is the :- operator.
+        if break_line and t.type in ('PERIOD', 'FROM', 'COMMA', 'EOF'):
+            # Only append :- at the end of the line, ignore commas and periods.
+            if t.type == 'FROM':
+                line.append(t)
+
+            # Append nonempty lines to the output list.
+            if line:
+                lines.append(tuple(line))
+                line = []
+
+            # Commit a new rule if it contains some lines.
+            if t.type in ('PERIOD', 'EOF') and rule_start < len(lines):
+                rules.append((rule_start, len(lines)))
+                rule_start = len(lines)
+            continue
+
+        # Handle parens.
+        if t.type == 'LPAREN':
+            # Disallow breaking lines inside "name( )" (e.g. member(X, L)) but
+            # not other ( ).
+            if line and line[-1].type == 'NAME':
+                parens.append('paren')
+                break_line = False
+            else:
+                parens.append('ignore')
+        elif t.type in ('LBRACKET', 'LBRACE'):
+            # Disallow breaking lines inside "[ ]" and "{ }".
+            parens.append('paren')
+            break_line = False
         elif parens:
-            if t.type == 'RPAREN' and parens[-1] == 'LPAREN':
-                parens.pop()
-            elif t.type == 'RBRACKET' and parens[-1] == 'LBRACKET':
-                parens.pop()
-            elif t.type == 'RBRACE' and parens[-1] == 'LBRACE':
+            if t.type in ('RPAREN', 'RBRACE', 'RBRACKET'):
                 parens.pop()
+            break_line = 'paren' not in parens
+
+        # Append this token to the current line.
         line.append(t)
+
     return lines, rules
 
 # Format a list of [lines] according to [rules] (as returned by decompose).
-- 
cgit v1.2.1