#!/usr/bin/python3

import collections

from .action import expand, parse
from .graph import Node
from .prolog.util import rename_vars, stringify, tokenize
from .util import get_line

# A line edit is a contiguous sequences of actions within a single line. This
# function takes a sequence of actions and builds a directed acyclic graph
# where each edit represents one line edit and each node represents a version
# of some line. The function returns a list of nodes (first element is the
# root), and sets of submissions (program versions tested by the user) and
# queries in this attempt.
def edit_graph(actions, debug=False):
    # Return values.
    nodes = [Node([0, 0, ()])]  # Node data: rank (Y), line no. (X), and tokens.
    submissions = set()         # Program versions at 'test' actions.
    queries = set()             # Queries run by the student.

    # State variables.
    leaves = {0: nodes[0]}  # Current leaf node for each line.
    rank = 1                # Rank (order / y-position) for the next node.
    code_next = ''          # Program code after applying the current action.

    # Ensure there is a separate action for each inserted/removed character.
    expand(actions)
    for action_id, action in enumerate(actions):
        code = code_next
        code_next = action.apply(code)

        if action.type == 'test':
            submissions.add(code)

        elif action.type == 'solve' or action.type == 'solve_all':
            queries.add(action.query)

        elif action.type == 'insert' or action.type == 'remove':
            # Number of the changed line.
            line = code[:action.offset].count('\n')
            # Current leaf node for this line.
            parent = leaves[line]
            # Tokens in this line after applying [action].
            tokens_next = tuple(tokenize(get_line(code_next, line)))

            # If a new node is inserted, clone each leaf into the next rank.
            # This makes it easier to print the graph for graphviz; when
            # analyzing the graph, duplicate nodes without siblings should be
            # ignored.
            new_leaves = {}

            if action.text == '\n':
                if action.type == 'insert':
                    tokens_next_right = tuple(tokenize(get_line(code_next, line+1)))

                    child_left = Node([rank, line, tokens_next])
                    parent.add_out(child_left)

                    child_right = Node([rank, line+1, tokens_next_right])
                    parent.add_out(child_right)

                    # Create new leaf nodes.
                    for i, leaf in leaves.items():
                        if i < line:
                            new_leaves[i] = Node([rank, i, leaf.data[2]])
                            leaf.add_out(new_leaves[i])
                        elif i > line:
                            new_leaves[i+1] = Node([rank, i+1, leaf.data[2]])
                            leaf.add_out(new_leaves[i+1])
                    new_leaves[line] = child_left
                    new_leaves[line+1] = child_right

                elif action.type == 'remove':
                    parent_right = leaves[line+1]

                    child = Node([rank, line, tokens_next])
                    parent_right.add_out(child)
                    parent.add_out(child)

                    # Create new leaf nodes.
                    for i, leaf in leaves.items():
                        if i < line:
                            new_leaves[i] = Node([rank, i, leaf.data[2]])
                            leaf.add_out(new_leaves[i])
                        elif i > line+1:
                            new_leaves[i-1] = Node([rank, i-1, leaf.data[2]])
                            leaf.add_out(new_leaves[i-1])
                    new_leaves[line] = child
            else:
                # Skip the node if the next action is insert/remove (except \n)
                # on the same line.
                if action_id < len(actions)-1:
                    action_next = actions[action_id+1]
                    if action_next.type in ('insert', 'remove'):
                        line_next = code_next[:action_next.offset].count('\n')
                        if action_next.text != '\n' and line == line_next:
                            continue

                # Skip the node if it is the same as the parent.
                if tokens_next == parent.data[2]:
                    continue

                child = Node([rank, line, tokens_next])
                parent.add_out(child)

                # Create new leaf nodes.
                for i, leaf in leaves.items():
                    if i != line:
                        new_leaves[i] = Node([rank, i, leaf.data[2]])
                        leaf.add_out(new_leaves[i])
                new_leaves[line] = child

            leaves = new_leaves
            nodes += leaves.values()
            rank += 1

    return nodes, submissions, queries

# Return all interesting edit paths in the edit graph rooted at [root].
def get_paths(root, path=tuple(), done=None):
    if done is None:
        done = set()

    cur_path = list(path)
    if len(path) == 0 or path[-1] != root.data[2]:
        cur_path.append(root.data[2])

    # leaf node
    if len(root.eout) == 0:
        yield tuple(cur_path)
    # empty node
    elif len(path) > 1 and len(root.data[2]) == 0:
        yield tuple(cur_path)

    if len(root.data[2]) > 0:
        new_path = cur_path
    else:
        new_path = [root.data[2]]
        done.add(root)

    for node in root.eout:
        if node not in done:
            yield from get_paths(node, tuple(new_path), done)

# Build an edit graph for each trace and find "meaningful" (to be defined)
# edits. Return a dictionary of edits and their frequencies, and also
# submissions and queries in [traces].
def get_edits_from_traces(traces):
    # Helper function to remove trailing punctuation from lines. This is a
    # rather ugly performance-boosting hack.
    def remove_punct(line):
        if line and line[-1].type in ('COMMA', 'PERIOD', 'SEMI', 'FROM'):
            return line[:-1]
        return line

    # Return values: counts for observed edits, lines, submissions and queries.
    edits = collections.Counter()
    lines = collections.Counter()
    submissions = collections.Counter()
    queries = collections.Counter()

    for trace in traces:
        try:
            actions = parse(trace)
        except:
            continue
        nodes, trace_submissions, trace_queries = edit_graph(actions)

        # Update the submissions/queries counters; rename variables first to
        # remove trivial differences.
        for submission in trace_submissions:
            tokens = tokenize(submission)
            rename_vars(tokens)
            code = stringify(tokens)
            submissions[code] += 1

        for query in trace_queries:
            tokens = tokenize(query)
            rename_vars(tokens)
            code = stringify(tokens)
            queries[code] += 1

        # Get edits.
        for path in get_paths(nodes[0]):
            for i in range(len(path)):
                start = list(remove_punct(path[i]))
                var_names = rename_vars(start)
                start_t = tuple(start)

                for j in range(len(path[i+1:])):
                    end = list(remove_punct(path[i+1+j]))
                    rename_vars(end, var_names)
                    end_t = tuple(end)

                    if start_t != end_t:
                        edit = (start_t, end_t)
                        edits[edit] += 1
                        lines[start_t] += 1

    # Discard rarely occurring edits. XXX only for testing
    singletons = [edit for edit in edits if edits[edit] < 2]
    for edit in singletons:
        lines[edit[0]] -= edits[edit]
        del edits[edit]

    # Get the probability of each edit given its 'before' line.
    for before, after in edits:
        edits[(before, after)] /= lines[before]

    # Normalize line frequencies.
    if len(lines) > 0:
        lines_max = max(lines.values())
        lines = {line: count/lines_max for line, count in lines.items()}

    return edits, lines, submissions, queries

def classify_edits(edits):
    inserts = {}
    removes = {}
    changes = {}
    for (before, after), cost in edits.items():
        if after and not before:
            inserts[after] = cost
        elif before and not after:
            removes[before] = cost
        else:
            changes[(before, after)] = cost
    return inserts, removes, changes

# Simplify an edit graph with given nodes: remove empty leaf nodes and other
# fluff. The function is called recursively until no more changes are done.
def clean_graph(nodes):
    changed = False

    # A
    # | --> A (when B is an empty leaf)
    # B
    for node in nodes:
        if len(node.eout) == 0 and len(node.ein) == 1 and len(node.data[2]) == 0:
            parent = node.ein[0]
            parent.eout.remove(node)
            nodes.remove(node)
            changed = True
            break

    # A
    # | --> A
    # A
    for node in nodes:
        if len(node.ein) == 1:
            parent = node.ein[0]
            if len(parent.eout) == 1 and node.data[2] == parent.data[2]:
                parent.eout = node.eout
                for child in parent.eout:
                    child.ein = [parent if node == node else node for node in child.ein]
                nodes.remove(node)
                changed = True
                break

    # A       A
    # |\      |
    # | C --> |   (when C is empty)
    # |/      |
    # B       B
    for node in nodes:
        if len(node.data[2]) == 0 and len(node.ein) == 1 and len(node.eout) == 1:
            parent = node.ein[0]
            child = node.eout[0]
            if len(parent.eout) == 2 and len(child.ein) == 2:
                parent.eout = [n for n in parent.eout if n != node]
                child.ein = [n for n in child.ein if n != node]
                nodes.remove(node)
                changed = True
                break

    # A
    # |
    # C --> A
    # |
    # A
    for node in nodes:
        if len(node.data[2]) == 0 and len(node.ein) == 1 and len(node.eout) == 1:
            parent = node.ein[0]
            child = node.eout[0]
            if len(parent.eout) == 1 and len(child.ein) == 1 and parent.data[2] == child.data[2]:
                parent.eout = [child]
                child.ein = [parent]
                nodes.remove(node)
                changed = True
                break

    if changed:
        # go again until nothing changes
        clean_graph(nodes)
    else:
        # compact node ranks
        ranks = set([node.data[0] for node in nodes])
        missing = set(range(1,max(ranks)+1)) - ranks

        for node in nodes:
            diff = 0
            for rank in sorted(missing):
                if rank >= node.data[0]:
                    break
                diff += 1
            node.data[0] -= diff

if __name__ == '__main__':
    import os
    import pickle
    import django

    # Load django models.
    os.environ['DJANGO_SETTINGS_MODULE'] = 'webmonkey.settings'
    django.setup()
    from django.contrib.auth.models import User
    from tutor.models import Attempt, Problem

    edits = {}
    lines = {}
    submissions = {}
    queries = {}
    for problem in Problem.objects.all():
        pid = problem.pk
        traces = [a.trace for a in Attempt.objects.filter(problem=problem, done=True)]
        edits[pid], lines[pid], submissions[pid], queries[pid] = get_edits_from_traces(traces)

    pickle.dump((edits, lines, submissions, queries), open('edits.pickle', 'wb'))