From 1c1b8f683ee2025497e2733eb0d8bc1e54035487 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Sat, 7 Feb 2015 19:47:57 +0100 Subject: Remove all trailing punctuation from lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove trailing sequences of COMMAs and PERIODs when extracting edits from a trace. This is because subgoal order is rarely important in Prolog, and we don't care if the edit happened on the last line or not. This means that we treat for example "conc(A,B)," → "conc(A,B,C)." the same as "conc(A,B)" → "conc(A,B,C)". --- monkey/edits.py | 12 +++++++----- prolog/util.py | 6 ++++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/monkey/edits.py b/monkey/edits.py index ad595b6..a920bf5 100644 --- a/monkey/edits.py +++ b/monkey/edits.py @@ -146,12 +146,14 @@ def get_paths(root, path=None, done=None): # edits. Return a dictionary of edits and their frequencies, and also # submissions and queries in [traces]. def get_edits_from_traces(traces): - # Helper function to remove trailing punctuation from lines. This is a - # rather ugly performance-boosting hack. + # Helper function to remove trailing punctuation from lines. def remove_punct(line): - if line and line[-1].type in ('COMMA', 'PERIOD', 'SEMI', 'FROM'): - return line[:-1] - return line + i = len(line) + while i > 0: + if line[i-1].type not in ('COMMA', 'PERIOD', 'SEMI'): + break + i -= 1 + return line[:i] # Return values: counts for observed edits, lines, submissions and queries. edits = collections.Counter() diff --git a/prolog/util.py b/prolog/util.py index 7fb81e3..c762f23 100644 --- a/prolog/util.py +++ b/prolog/util.py @@ -66,13 +66,15 @@ def decompose(code): rule_start = 0 for t in tokens: if t.type == 'SEMI': - if line != []: + if line: lines.append(tuple(line)) line = [] lines.append((t,)) continue if not parens: if t.type in ('PERIOD', 'FROM', 'COMMA', 'EOF'): + if t.type == 'FROM': + line.append(t) if line != []: lines.append(tuple(line)) line = [] @@ -104,7 +106,7 @@ def compose(lines, rules): if i == end-1: code += '.\n' elif i == start: - code += ' :-\n' + code += '\n' else: if line and line[-1].type != 'SEMI' and lines[i+1][-1].type != 'SEMI': code += ',' -- cgit v1.2.1