#!/usr/bin/python3

import collections
import math

from .action import expand, parse
from .graph import Node
from prolog.util import rename_vars, stringify, tokenize
from .util import get_line, avg, logistic

# A line edit is a contiguous sequences of actions within a single line. This
# function takes a sequence of actions and builds a directed acyclic graph
# where each edit represents one line edit and each node represents a version
# of some line. The function returns a list of nodes (first element is the
# root), and sets of submissions (program versions tested by the user) and
# queries in this attempt.
def edit_graph(actions, debug=False):
    # Return values.
    nodes = [Node([0, 0, ()])]  # Node data: rank (Y), line no. (X), and tokens.
    submissions = set()         # Program versions at 'test' actions.
    queries = set()             # Queries run by the student.

    # State variables.
    leaves = {0: nodes[0]}  # Current leaf node for each line.
    rank = 1                # Rank (order / y-position) for the next node.
    code_next = ''          # Program code after applying the current action.

    # Ensure there is a separate action for each inserted/removed character.
    expand(actions)
    for action_id, action in enumerate(actions):
        code = code_next
        code_next = action.apply(code)

        if action.type == 'test':
            submissions.add(code)

        elif action.type == 'solve' or action.type == 'solve_all':
            queries.add(action.query)

        elif action.type == 'insert' or action.type == 'remove':
            # Number of the changed line.
            line = code[:action.offset].count('\n')
            # Current leaf node for this line.
            parent = leaves[line]
            # Tokens in this line after applying [action].
            tokens_next = tuple(tokenize(get_line(code_next, line)))

            # If a new node is inserted, clone each leaf into the next rank.
            # This makes it easier to print the graph for graphviz; when
            # analyzing the graph, duplicate nodes without siblings should be
            # ignored.
            new_leaves = {}

            if action.text == '\n':
                if action.type == 'insert':
                    tokens_next_right = tuple(tokenize(get_line(code_next, line+1)))

                    child_left = Node([rank, line, tokens_next])
                    parent.add_out(child_left)

                    child_right = Node([rank, line+1, tokens_next_right])
                    parent.add_out(child_right)

                    # Create new leaf nodes.
                    for i, leaf in leaves.items():
                        if i < line:
                            new_leaves[i] = Node([rank, i, leaf.data[2]])
                            leaf.add_out(new_leaves[i])
                        elif i > line:
                            new_leaves[i+1] = Node([rank, i+1, leaf.data[2]])
                            leaf.add_out(new_leaves[i+1])
                    new_leaves[line] = child_left
                    new_leaves[line+1] = child_right

                elif action.type == 'remove':
                    parent_right = leaves[line+1]

                    child = Node([rank, line, tokens_next])
                    parent_right.add_out(child)
                    parent.add_out(child)

                    # Create new leaf nodes.
                    for i, leaf in leaves.items():
                        if i < line:
                            new_leaves[i] = Node([rank, i, leaf.data[2]])
                            leaf.add_out(new_leaves[i])
                        elif i > line+1:
                            new_leaves[i-1] = Node([rank, i-1, leaf.data[2]])
                            leaf.add_out(new_leaves[i-1])
                    new_leaves[line] = child
            else:
                # Skip the node if the next action is insert/remove (except \n)
                # on the same line.
                if action_id < len(actions)-1:
                    action_next = actions[action_id+1]
                    if action_next.type in ('insert', 'remove'):
                        line_next = code_next[:action_next.offset].count('\n')
                        if action_next.text != '\n' and line == line_next:
                            continue

                # Skip the node if it is the same as the parent.
                if tokens_next == parent.data[2]:
                    continue

                child = Node([rank, line, tokens_next])
                parent.add_out(child)

                # Create new leaf nodes.
                for i, leaf in leaves.items():
                    if i != line:
                        new_leaves[i] = Node([rank, i, leaf.data[2]])
                        leaf.add_out(new_leaves[i])
                new_leaves[line] = child

            leaves = new_leaves
            nodes += leaves.values()
            rank += 1

    return nodes, submissions, queries

# Generate all interesting paths in the edit graph rooted at [root].
def get_paths(root, path=None, done=None):
    if done is None:
        done = set()

    # Add [root] to [path] if it is the first node or different than previous.
    if not path:
        path = (root.data[2],)
    elif root.data[2] != path[-1]:
        path = path + (root.data[2],)

    # Return the current path if [root] is a leaf or an empty node.
    if len(path) > 1:
        if not root.eout or not root.data[2]:
            yield path

    # If [root] is an empty node, start a new path.
    if not root.data[2]:
        path = (root.data[2],)
        done.add(root)

    for node in root.eout:
        if node not in done:
            yield from get_paths(node, path, done)

# Build an edit graph for each trace and find "meaningful" (to be defined)
# edits. Return a dictionary of edits and their frequencies, and also
# submissions and queries in [traces].
def get_edits_from_traces(traces):
    # Helper function to remove trailing punctuation from lines and rename
    # variables to A1,A2,A3,… (potentially using [var_names]). Return a tuple.
    def normalize(line, var_names=None):
        # Remove trailing punctuation.
        i = len(line)
        while i > 0:
            if line[i-1].type not in ('COMMA', 'PERIOD', 'SEMI'):
                break
            i -= 1
        return tuple(rename_vars(line[:i], var_names))

    # Return values: counts for observed edits, lines, submissions and queries.
    edits = collections.Counter()
    submissions = collections.Counter()
    queries = collections.Counter()

    # Counts of traces where each line appears as a leaf / any node.
    n_leaf = collections.Counter()
    n_all = collections.Counter()

    for trace in traces:
        try:
            actions = parse(trace)
        except:
            # Only a few traces fail to parse, so just ignore them.
            continue
        nodes, trace_submissions, trace_queries = edit_graph(actions)

        # Update the submissions/queries counters (use normalized variables).
        for submission in trace_submissions:
            code = stringify(rename_vars(tokenize(submission)))
            submissions[code] += 1

        for query in trace_queries:
            code = stringify(rename_vars(tokenize(query)))
            queries[code] += 1

        # Get edits.
        trace_edits = set()
        for path in get_paths(nodes[0]):
            for i in range(1, len(path)):
                # Normalize path[i-1] → path[i] into start → end. Reuse
                # variable names from start when normalizing end.
                var_names = {}
                start = normalize(path[i-1], var_names)
                end = normalize(path[i], var_names)

                # Disallow edits that insert a whole rule (a → … :- …).
                # TODO improve edit_graph to handle this.
                if 'FROM' in [t.type for t in end[:-1]]:
                    continue

                # This should always succeed but check anyway.
                if start != end:
                    edit = (start, end)
                    trace_edits.add(edit)
        edits.update(trace_edits)

        # Update node counts.
        n_leaf.update(set([normalize(n.data[2]) for n in nodes if n.data[2] and not n.eout]))
        n_all.update(set([normalize(n.data[2]) for n in nodes if n.data[2]]))

    # Discard edits that only occur in one trace.
    singletons = [edit for edit in edits if edits[edit] < 2]
    for edit in singletons:
        del edits[edit]

    # Find the probability of each edit a → b.
    for (a, b), count in edits.items():
        p = 1.0
        if a:
            p *= 1 - (n_leaf[a] / (n_all[a]+1))
        if b:
            b_normal = normalize(b)
            p *= n_leaf[b_normal] / (n_all[b_normal]+1)
        if a and b:
            p = math.sqrt(p)
        edits[(a, b)] = p

    # Tweak the edit distribution to improve search.
    avg_p = avg(edits.values())
    for edit, p in edits.items():
        edits[edit] = logistic(p, k=3, x_0=avg_p)

    lines = dict(n_leaf)

    return edits, lines, submissions, queries

def classify_edits(edits):
    inserts = {}
    removes = {}
    changes = {}
    for (before, after), cost in edits.items():
        if after and not before:
            inserts[after] = cost
        elif before and not after:
            removes[before] = cost
        else:
            changes[(before, after)] = cost
    return inserts, removes, changes

# Simplify an edit graph with given nodes: remove empty leaf nodes and other
# fluff. The function is called recursively until no more changes are done.
def clean_graph(nodes):
    changed = False

    # A
    # | --> A (when B is an empty leaf)
    # B
    for node in nodes:
        if len(node.eout) == 0 and len(node.ein) == 1 and len(node.data[2]) == 0:
            parent = node.ein[0]
            parent.eout.remove(node)
            nodes.remove(node)
            changed = True
            break

    # A
    # | --> A
    # A
    for node in nodes:
        if len(node.ein) == 1:
            parent = node.ein[0]
            if len(parent.eout) == 1 and node.data[2] == parent.data[2]:
                parent.eout = node.eout
                for child in parent.eout:
                    child.ein = [parent if node == node else node for node in child.ein]
                nodes.remove(node)
                changed = True
                break

    # A       A
    # |\      |
    # | C --> |   (when C is empty)
    # |/      |
    # B       B
    for node in nodes:
        if len(node.data[2]) == 0 and len(node.ein) == 1 and len(node.eout) == 1:
            parent = node.ein[0]
            child = node.eout[0]
            if len(parent.eout) == 2 and len(child.ein) == 2:
                parent.eout = [n for n in parent.eout if n != node]
                child.ein = [n for n in child.ein if n != node]
                nodes.remove(node)
                changed = True
                break

    # A
    # |
    # C --> A
    # |
    # A
    for node in nodes:
        if len(node.data[2]) == 0 and len(node.ein) == 1 and len(node.eout) == 1:
            parent = node.ein[0]
            child = node.eout[0]
            if len(parent.eout) == 1 and len(child.ein) == 1 and parent.data[2] == child.data[2]:
                parent.eout = [child]
                child.ein = [parent]
                nodes.remove(node)
                changed = True
                break

    if changed:
        # go again until nothing changes
        clean_graph(nodes)
    else:
        # compact node ranks
        ranks = set([node.data[0] for node in nodes])
        missing = set(range(1,max(ranks)+1)) - ranks

        for node in nodes:
            diff = 0
            for rank in sorted(missing):
                if rank >= node.data[0]:
                    break
                diff += 1
            node.data[0] -= diff

if __name__ == '__main__':
    import os
    import pickle
    import django

    # Load django models.
    os.environ['DJANGO_SETTINGS_MODULE'] = 'webmonkey.settings'
    django.setup()
    from django.contrib.auth.models import User
    from tutor.models import Attempt, Problem

    edits = {}
    lines = {}
    submissions = {}
    queries = {}
    for problem in Problem.objects.all():
        print(problem.name)
        pid = problem.pk
        traces = [a.trace for a in Attempt.objects.filter(problem=problem, done=True)]
        edits[pid], lines[pid], submissions[pid], queries[pid] = get_edits_from_traces(traces)

    pickle.dump((edits, lines, submissions, queries), open('edits.pickle', 'wb'))