/**
 * A parser/compiler for the pythonic definitions of assignments.
 * Supports basic assignment statements, no expressions using operators (yet).
 */

(function () {
    var regexpWhitespace = new RegExp('[ \t]'),
        regexpNameStart = new RegExp('[a-zA-Z_]'),
        regexpName = new RegExp('[a-zA-Z0-9_]'),
        regexpNumber = new RegExp('[0-9]'),
        regexpNumberAndDot = new RegExp('[0-9.]');

    var isEscape = function (s, pos) {
        var i, result = false;
        for (i = pos; i >= pos; i--) {
            if (s.charAt(pos) === '\\') result = !result;
            else break;
        }
        return result;
    };

    var escapePyString = function (s, output) {
        var parts = s.split("'"),
            n = parts.length,
            i, part, previousPart;
        output.push("'"); // starting quote

        previousPart = parts[0];
        output.push(previousPart.split('\r').join('\\r').split('\t').join('\\t').split('\n').join('\\n'));
        for (i = 1; i < n; i++) {
            part = parts[i];
            // first escape the single quote, if required
            if (!isEscape(previousPart, previousPart.length - 1)) output.push('\\');
            // escape \r, \n, \t
            output.push(part.split('\r').join('\\r').split('\t').join('\\t').split('\n').join('\\n'));
            previousPart = part;
        }

        output.push("'"); // ending quote
    };

    var tokenize = function (input) {
        var pos = 0, row = 1, col = 1,
            n = input.length,
            is_line_start = true,
            getMultilineString = function (type) {
                var p = pos, i, j, l, parts, part, is_escaped, result;
                while (p < n) {
                    i = input.indexOf(type, p);
                    if (i < 0) throw new Error('Unterminated long string at position #' + pos + ', line ' + row + ', character ' + col);
                    is_escaped = false;
                    for (j = i-1; j >= pos; j--) {
                        if (input.charAt(j) === '\\') is_escaped = !is_escaped;
                        else break;
                    }
                    if (is_escaped) p = i + 1; // an escaped quote
                    else {
                        // process line continuations
                        parts = input.slice(pos, i).split('\n');
                        col += i + 3 - pos; // position the next column pointer
                        pos = i + 3; // position the next character pointer
                        l = parts.length - 1;
                        if (l === 0) return parts[0]; // no newline character in the string
                        row += l; // add the lines of the string
                        col = 1 + parts[l].length;
                        result = []; // the resulting string
                        for (i = 0; i < l; i++) {
                            part = parts[i]; // check the part for a trailing escape character
                            is_escaped = false;
                            for (j = part.length - 1; j >= 0; j--) {
                                if (part.charAt(j) === '\\') is_escaped = !is_escaped;
                                else break;
                            }
                            if (is_escaped) {
                                // only add the line with the tail escape character trimmed
                                result.push(part.slice(0, part.length - 1));
                            }
                            else {
                                // add the line and the trailing newline
                                result.push(part, '\n');
                            }
                        }
                        result.push(parts[l]); // the last line, which is not checked for an escape character
                        return result.join('');
                    }
                }
                throw new Error('Unterminated long string at position #' + pos + ', line ' + row + ', character ' + col);
            },
            getString = function (type) {
                var p = pos, i, j, is_escaped, s;
                while (p < n) {
                    i = input.indexOf(type, p);
                    j = input.indexOf('\n', p);
                    if (i < 0) throw new Error('Unterminated string at position #' + pos + ', line ' + row + ', character ' + col);
                    if ((j >= 0) && (j < i)) throw new Error('Unterminated string at position #' + pos + ', line ' + row + ', character ' + col);
                    is_escaped = false;
                    for (j = i-1; j >= pos; j--) {
                        if (input.charAt(j) === '\\') is_escaped = !is_escaped;
                        else break;
                    }
                    if (is_escaped) p = i + 1; // an escaped quote
                    else {
                        s = input.slice(pos, i);
                        col += i + 1 - pos; // position the next column pointer
                        pos = i + 1; // position the next character pointer
                        return s;
                    }
                }
                throw new Error('Unterminated string at position #' + pos + ', line ' + row + ', character ' + col);
            };

        // the next() function
        return function () {
            var s, count = 0, token = null, parts, i;
            if (pos >= n) return null;
            var c = input.charAt(pos++), token_row = row, token_col = col;

            // skip white-space
            for (;;) {
                if (c === ' ') {
                    count++;
                    col++;
                }
                else if (c === '\t') {
                    // tab is worth 4 spaces
                    count += 4;
                    col++;
                }
                else if (c === '\n') {
                    count = 0;
                    is_line_start = true;
                    row++;
                    col = 1;
                }
                else if (c === '\r') {
                    if (pos >= n) return null; // EOF
                    if (input.charAt(pos) !== '\n') throw new Error("CR character without a trailing LF at character #" + (pos-1));
                    pos++;
                    count = 0;
                    is_line_start = true;
                    row++;
                    col = 1;
                }
                else break;
                if (pos >= n) return null; // EOF
                c = input.charAt(pos++);
            }

            if ((c === '"') || (c === "'")) {
                // a quote starts a string, now see whether it's a multi-line string
                if ((pos+1 < n) && (input.charAt(pos) == c) && (input.charAt(pos+1) == c)) {
                    // it's a long string
                    pos += 2;
                    s = getMultilineString(c === '"' ? '"""' : "'''");
                }
                else {
                    s = getString(c);
                }
                token = {'type': 'string', 'value': s};
            }

            else if (c.match(regexpNameStart)) {
                parts = [ c ]; // the name builder
                col++;
                while (pos < n) {
                    c = input.charAt(pos);
                    if (c.match(regexpName)) {
                        parts.push(c);
                        pos++;
                        col++;
                    }
                    else break;
                }
                token = {'type': 'name', 'value': parts.join('')};
            }

            else if (c.match(regexpNumberAndDot)) {
                parts = [ c ]; // the number builder
                col++;
                while ((pos < n) && (c != '.')) {
                    c = input.charAt(pos);
                    if (c.match(regexpNumberAndDot)) {
                        parts.push(c);
                        pos++;
                        col++;
                    }
                    else break;
                }
                while (pos < n) {
                    if (c.match(regexpNumber)) {
                        parts.push(c);
                        pos++;
                        col++;
                    }
                    else break;
                }
                token = {'type': 'number', 'value': +parts.join('')};
            }

            else if (c === '=') {
                col++;
                if ((pos < n) && (input.charAt(pos) === '=')) {
                    pos++;
                    col++;
                    token = {'type': 'eq'};
                }
                else {
                    token = {'type': 'assign'};
                }
            }

            else if (c === '{') {
                token = {'type': 'lbrace'};
                col++;
            }

            else if (c === '}') {
                token = {'type': 'rbrace'};
                col++;
            }

            else if (c === '[') {
                token = {'type': 'lbracket'};
                col++;
            }

            else if (c === ']') {
                token = {'type': 'rbracket'};
                col++;
            }

            else if (c === '(') {
                token = {'type': 'lparen'};
                col++;
            }

            else if (c === ')') {
                token = {'type': 'rparen'};
                col++;
            }

            else if (c === ':') {
                token = {'type': 'colon'};
                col++;
            }

            else if (c === ';') {
                token = {'type': 'semicolon'};
                col++;
            }

            else if (c === ',') {
                token = {'type': 'comma'};
                col++;
            }

            else if (c === '#') {
                // a comment till the end of line
                i = input.indexOf('\n', pos);
                if (i < 0) {
                    // this is the last line
                    s = input.slice(pos);
                }
                else {
                    s = input.slice(pos, i);
                }
                token = {'type': 'comment', 'value': s};
                col += i - pos;
                pos = i; // we want the next invocation to parse the trailing newline, so it correctly sets line_start, etc.
            }

            else {
                token = {'type': 'unknown', 'value': c};
                col++;
            }

            token['line_start'] = is_line_start;
            token['whitespace_offset'] = count;
            token['line'] = token_row;
            token['column'] = token_col;
            return token;
        };
    };

    var parseExpression = function (token, next, output) {
        var nextToken;
        if (token.type === 'string') {
            // string literal
            escapePyString(token.value, output);
        }
        else if (token.type === 'number') {
            // number literal
            output.push('' + token.value);
        }
        else if (token.type === 'lbrace') {
            // object literal
            output.push('{');
            if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.line);
            if (nextToken.type === 'rbrace') {
                output.push('}');
                return; // end of object literal
            }
            for (;;) {
                if (nextToken.type === 'string') {
                    escapePyString(nextToken.value, output);
                }
                else if (nextToken.type === 'number') {
                    output.push('' + token.value);
                }
                else throw new Error("Object key not a string or a number, at line " + nextToken.line + ", column " + nextToken.column);

                if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column);
                if (nextToken.type !== 'colon') throw new Error("Expected :, at line " + nextToken.line + ", column " + nextToken.column);
                output.push(':');

                if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column);
                parseExpression(nextToken, next, output);

                if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column);
                if (nextToken.type === 'comma') {
                    output.push(',');
                }
                else if (nextToken.type === 'rbrace') {
                    output.push('}');
                    break; // end of object literal
                }
                else throw new Error("Expected , or }, at line " + nextToken.line + ", column " + nextToken.column);

                if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column);
            }
        }
        else if (token.type === 'lbracket') {
            // array literal
            output.push('[');
            if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.line);
            if (nextToken.type === 'rbracket') {
                output.push(']');
                return; // end of array literal
            }
            for (;;) {
                parseExpression(nextToken, next, output);

                if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column);
                if (nextToken.type === 'comma') {
                    output.push(',');
                }
                else if (nextToken.type === 'rbracket') {
                    output.push(']');
                    break; // end of array literal
                }
                else throw new Error("Expected , or ], at line " + nextToken.line + ", column " + nextToken.column);

                if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column);
            }
        }
        else throw new Error("Unexpected token: expected a string, number, object, or array, at line " + token.line + ", column " + token.column);
    };


    /**
     * @typedef {string|{type: string, message: string}} HintDefinition
     */
    /**
     * @typedef {Object} PrologTaskDef a Prolog assignment definition
     * @property {string} description the assignment description
     * @property {Object.<string, HintDefinition>} hint the assignment hint definitions, keyed by hint ID
     */

    /**
     * Converts the given pythonic assignment definition into a JavaScriptish definition,
     * executes it, and takes the variables "description" and "hint" from the definition.
     *
     * @param {string} definition The assignment definition.
     * @returns {PrologTaskDef}
     */
    codeq.parseDefinition = function (definition) {
        var next = tokenize(definition),
            vars = { 'description': true, 'hint': true },
            parts = [ ], // first element is just a placeholder, to be replaced with the full "var" declaration at the end
            token, first_token, varname, v, fn, obj;

        while (token = next()) {
            // parse line by line
            if (!token.line_start) throw new Error("The token does not start in a new line, at line " + token.line + ", column " + token.column);
            if (token.type === 'comment') continue;
            if (token.whitespace_offset > 0) throw new Error("Cannot parse indented lines, at line " + token.line);

            // parse lvalue
            if (token.type !== 'name') throw new Error("Expected a lvalue, at line " + token.line + ", column " + token.column);
            varname = token.value;
            vars[varname] = true; // remember the variable name, so we will declare it at the end
            parts.push(';\n'); // close the previous line -- the first time this is wrong because there is no previous line yet, but we will replace it with a variable declaration
            parts.push(varname); // start the new line with the assignment statement -- the only statement we support
            first_token = token;
            if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line);

            // optional index
            if (token.type === 'lbracket') {
                // index operator
                parts.push('[');
                if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line);
                if (token.type === 'string') escapePyString(token.value, parts);
                else if (token.type === 'number') parts.push('' + token.value);
                else throw new Error("Unsupported index expression, at line " + token.line + ", column " + token.column);
                if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line);
                if (token.type !== 'rbracket') throw new Error("Expected ], at line " + token.line + ", column " + token.column + ", token " + token.type);
                parts.push(']');
                if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line);
            }

            // the assignment operator
            if (token.type !== 'assign') throw new Error("Expected =, at line " + token.line + ", column " + token.column);
            parts.push('=');
            if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line);

            // parse rvalue
            parseExpression(token, next, parts);
        }

        if (parts.length === 0) return {}; // empty definition

        v = [];
        for (varname in vars) {
            if (vars.hasOwnProperty(varname)) v.push(varname);
        }

        parts[0] = 'var ' + v.join(', ') + ';\n';
        parts.push(';\nreturn {"description":description,"hint":hint};');
        v = parts.join('');
        codeq.log.debug("Creating a new parseInfo function having the body:\n" + v);
        fn = new Function("__params__", v);
        obj = fn();
        return obj; // obj now contains "description" and "hint"
    }; // parseDefinition

})();