diff options
author | Timotej Lazar <timotej.lazar@araneo.org> | 2015-02-07 19:47:57 +0100 |
---|---|---|
committer | Aleš Smodiš <aless@guru.si> | 2015-08-11 14:26:01 +0200 |
commit | 1c1b8f683ee2025497e2733eb0d8bc1e54035487 (patch) | |
tree | 0ccb287c65ae57a8049a5a3c51b443085d7e74d3 | |
parent | 4838e37e26c3fb72ad509d7aef7f307cc7ae3ef2 (diff) |
Remove all trailing punctuation from lines
Remove trailing sequences of COMMAs and PERIODs when extracting edits
from a trace. This is because subgoal order is rarely important in
Prolog, and we don't care if the edit happened on the last line or not.
This means that we treat for example
"conc(A,B)," → "conc(A,B,C)." the same as
"conc(A,B)" → "conc(A,B,C)".
-rw-r--r-- | monkey/edits.py | 12 | ||||
-rw-r--r-- | prolog/util.py | 6 |
2 files changed, 11 insertions, 7 deletions
diff --git a/monkey/edits.py b/monkey/edits.py index ad595b6..a920bf5 100644 --- a/monkey/edits.py +++ b/monkey/edits.py @@ -146,12 +146,14 @@ def get_paths(root, path=None, done=None): # edits. Return a dictionary of edits and their frequencies, and also # submissions and queries in [traces]. def get_edits_from_traces(traces): - # Helper function to remove trailing punctuation from lines. This is a - # rather ugly performance-boosting hack. + # Helper function to remove trailing punctuation from lines. def remove_punct(line): - if line and line[-1].type in ('COMMA', 'PERIOD', 'SEMI', 'FROM'): - return line[:-1] - return line + i = len(line) + while i > 0: + if line[i-1].type not in ('COMMA', 'PERIOD', 'SEMI'): + break + i -= 1 + return line[:i] # Return values: counts for observed edits, lines, submissions and queries. edits = collections.Counter() diff --git a/prolog/util.py b/prolog/util.py index 7fb81e3..c762f23 100644 --- a/prolog/util.py +++ b/prolog/util.py @@ -66,13 +66,15 @@ def decompose(code): rule_start = 0 for t in tokens: if t.type == 'SEMI': - if line != []: + if line: lines.append(tuple(line)) line = [] lines.append((t,)) continue if not parens: if t.type in ('PERIOD', 'FROM', 'COMMA', 'EOF'): + if t.type == 'FROM': + line.append(t) if line != []: lines.append(tuple(line)) line = [] @@ -104,7 +106,7 @@ def compose(lines, rules): if i == end-1: code += '.\n' elif i == start: - code += ' :-\n' + code += '\n' else: if line and line[-1].type != 'SEMI' and lines[i+1][-1].type != 'SEMI': code += ',' |