/** * A parser/compiler for the pythonic definitions of assignments. * Supports basic assignment statements, no expressions using operators (yet). */ (function () { var regexpWhitespace = new RegExp('[ \t]'), regexpNameStart = new RegExp('[a-zA-Z_]'), regexpName = new RegExp('[a-zA-Z0-9_]'), regexpNumber = new RegExp('[0-9]'), regexpNumberAndDot = new RegExp('[0-9.]'); var isEscape = function (s, pos) { var i, result = false; for (i = pos; i >= pos; i--) { if (s.charAt(pos) === '\\') result = !result; else break; } return result; }; var escapePyString = function (s, output) { var parts = s.split("'"), n = parts.length, i, part, previousPart; output.push("'"); // starting quote previousPart = parts[0]; output.push(previousPart.split('\r').join('\\r').split('\t').join('\\t').split('\n').join('\\n')); for (i = 1; i < n; i++) { part = parts[i]; // first escape the single quote, if required if (!isEscape(previousPart, previousPart.length - 1)) output.push('\\'); // escape \r, \n, \t output.push(part.split('\r').join('\\r').split('\t').join('\\t').split('\n').join('\\n')); previousPart = part; } output.push("'"); // ending quote }; var tokenize = function (input) { var pos = 0, row = 1, col = 1, n = input.length, is_line_start = true, getMultilineString = function (type) { var p = pos, i, j, l, parts, part, is_escaped, result; while (p < n) { i = input.indexOf(type, p); if (i < 0) throw new Error('Unterminated long string at position #' + pos + ', line ' + row + ', character ' + col); is_escaped = false; for (j = i-1; j >= pos; j--) { if (input.charAt(j) === '\\') is_escaped = !is_escaped; else break; } if (is_escaped) p = i + 1; // an escaped quote else { // process line continuations parts = input.slice(pos, i).split('\n'); col += i + 3 - pos; // position the next column pointer pos = i + 3; // position the next character pointer l = parts.length - 1; if (l === 0) return parts[0]; // no newline character in the string row += l; // add the lines of the string col = 1 + parts[l].length; result = []; // the resulting string for (i = 0; i < l; i++) { part = parts[i]; // check the part for a trailing escape character is_escaped = false; for (j = part.length - 1; j >= 0; j--) { if (part.charAt(j) === '\\') is_escaped = !is_escaped; else break; } if (is_escaped) { // only add the line with the tail escape character trimmed result.push(part.slice(0, part.length - 1)); } else { // add the line and the trailing newline result.push(part, '\n'); } } result.push(parts[l]); // the last line, which is not checked for an escape character return result.join(''); } } throw new Error('Unterminated long string at position #' + pos + ', line ' + row + ', character ' + col); }, getString = function (type) { var p = pos, i, j, is_escaped, s; while (p < n) { i = input.indexOf(type, p); j = input.indexOf('\n', p); if (i < 0) throw new Error('Unterminated string at position #' + pos + ', line ' + row + ', character ' + col); if ((j >= 0) && (j < i)) throw new Error('Unterminated string at position #' + pos + ', line ' + row + ', character ' + col); is_escaped = false; for (j = i-1; j >= pos; j--) { if (input.charAt(j) === '\\') is_escaped = !is_escaped; else break; } if (is_escaped) p = i + 1; // an escaped quote else { s = input.slice(pos, i); col += i + 1 - pos; // position the next column pointer pos = i + 1; // position the next character pointer return s; } } throw new Error('Unterminated string at position #' + pos + ', line ' + row + ', character ' + col); }; // the next() function return function () { var s, count = 0, token = null, parts, i; if (pos >= n) return null; var c = input.charAt(pos++), token_row = row, token_col = col; // skip white-space for (;;) { if (c === ' ') { count++; col++; } else if (c === '\t') { // tab is worth 4 spaces count += 4; col++; } else if (c === '\n') { count = 0; is_line_start = true; row++; col = 1; } else if (c === '\r') { if (pos >= n) return null; // EOF if (input.charAt(pos) !== '\n') throw new Error("CR character without a trailing LF at character #" + (pos-1)); pos++; count = 0; is_line_start = true; row++; col = 1; } else break; if (pos >= n) return null; // EOF c = input.charAt(pos++); } if ((c === '"') || (c === "'")) { // a quote starts a string, now see whether it's a multi-line string if ((pos+1 < n) && (input.charAt(pos) == c) && (input.charAt(pos+1) == c)) { // it's a long string pos += 2; s = getMultilineString(c === '"' ? '"""' : "'''"); } else { s = getString(c); } token = {'type': 'string', 'value': s}; } else if (c.match(regexpNameStart)) { parts = [ c ]; // the name builder col++; while (pos < n) { c = input.charAt(pos); if (c.match(regexpName)) { parts.push(c); pos++; col++; } else break; } token = {'type': 'name', 'value': parts.join('')}; } else if (c.match(regexpNumberAndDot)) { parts = [ c ]; // the number builder col++; while ((pos < n) && (c != '.')) { c = input.charAt(pos); if (c.match(regexpNumberAndDot)) { parts.push(c); pos++; col++; } else break; } while (pos < n) { if (c.match(regexpNumber)) { parts.push(c); pos++; col++; } else break; } token = {'type': 'number', 'value': +parts.join('')}; } else if (c === '=') { col++; if ((pos < n) && (input.charAt(pos) === '=')) { pos++; col++; token = {'type': 'eq'}; } else { token = {'type': 'assign'}; } } else if (c === '{') { token = {'type': 'lbrace'}; col++; } else if (c === '}') { token = {'type': 'rbrace'}; col++; } else if (c === '[') { token = {'type': 'lbracket'}; col++; } else if (c === ']') { token = {'type': 'rbracket'}; col++; } else if (c === '(') { token = {'type': 'lparen'}; col++; } else if (c === ')') { token = {'type': 'rparen'}; col++; } else if (c === ':') { token = {'type': 'colon'}; col++; } else if (c === ';') { token = {'type': 'semicolon'}; col++; } else if (c === ',') { token = {'type': 'comma'}; col++; } else if (c === '#') { // a comment till the end of line i = input.indexOf('\n', pos); if (i < 0) { // this is the last line s = input.slice(pos); } else { s = input.slice(pos, i); } token = {'type': 'comment', 'value': s}; col += i - pos; pos = i; // we want the next invocation to parse the trailing newline, so it correctly sets line_start, etc. } else { token = {'type': 'unknown', 'value': c}; col++; } token['line_start'] = is_line_start; token['whitespace_offset'] = count; token['line'] = token_row; token['column'] = token_col; return token; }; }; var parseExpression = function (token, next, output) { var nextToken; if (token.type === 'string') { // string literal escapePyString(token.value, output); } else if (token.type === 'number') { // number literal output.push('' + token.value); } else if (token.type === 'lbrace') { // object literal output.push('{'); if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.line); if (nextToken.type === 'rbrace') { output.push('}'); return; // end of object literal } for (;;) { if (nextToken.type === 'string') { escapePyString(nextToken.value, output); } else if (nextToken.type === 'number') { output.push('' + token.value); } else throw new Error("Object key not a string or a number, at line " + nextToken.line + ", column " + nextToken.column); if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column); if (nextToken.type !== 'colon') throw new Error("Expected :, at line " + nextToken.line + ", column " + nextToken.column); output.push(':'); if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column); parseExpression(nextToken, next, output); if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column); if (nextToken.type === 'comma') { output.push(','); } else if (nextToken.type === 'rbrace') { output.push('}'); break; // end of object literal } else throw new Error("Expected , or }, at line " + nextToken.line + ", column " + nextToken.column); if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column); } } else if (token.type === 'lbracket') { // array literal output.push('['); if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.line); if (nextToken.type === 'rbracket') { output.push(']'); return; // end of array literal } for (;;) { parseExpression(nextToken, next, output); if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column); if (nextToken.type === 'comma') { output.push(','); } else if (nextToken.type === 'rbracket') { output.push(']'); break; // end of array literal } else throw new Error("Expected , or ], at line " + nextToken.line + ", column " + nextToken.column); if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column); } } else throw new Error("Unexpected token: expected a string, number, object, or array, at line " + token.line + ", column " + token.column); }; /** * @typedef {string|{type: string, message: string}} HintDefinition */ /** * @typedef {Object} PrologTaskDef a Prolog assignment definition * @property {string} description the assignment description * @property {Object.} hint the assignment hint definitions, keyed by hint ID */ /** * Converts the given pythonic assignment definition into a JavaScriptish definition, * executes it, and takes the variables "description" and "hint" from the definition. * * @param {string} definition The assignment definition. * @returns {PrologTaskDef} */ codeq.parseDefinition = function (definition) { var next = tokenize(definition), vars = { 'description': true, 'hint': true }, parts = [ ], // first element is just a placeholder, to be replaced with the full "var" declaration at the end token, first_token, varname, v, fn, obj; while (token = next()) { // parse line by line if (!token.line_start) throw new Error("The token does not start in a new line, at line " + token.line + ", column " + token.column); if (token.type === 'comment') continue; if (token.whitespace_offset > 0) throw new Error("Cannot parse indented lines, at line " + token.line); // parse lvalue if (token.type !== 'name') throw new Error("Expected a lvalue, at line " + token.line + ", column " + token.column); varname = token.value; vars[varname] = true; // remember the variable name, so we will declare it at the end parts.push(';\n'); // close the previous line -- the first time this is wrong because there is no previous line yet, but we will replace it with a variable declaration parts.push(varname); // start the new line with the assignment statement -- the only statement we support first_token = token; if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line); // optional index if (token.type === 'lbracket') { // index operator parts.push('['); if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line); if (token.type === 'string') escapePyString(token.value, parts); else if (token.type === 'number') parts.push('' + token.value); else throw new Error("Unsupported index expression, at line " + token.line + ", column " + token.column); if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line); if (token.type !== 'rbracket') throw new Error("Expected ], at line " + token.line + ", column " + token.column + ", token " + token.type); parts.push(']'); if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line); } // the assignment operator if (token.type !== 'assign') throw new Error("Expected =, at line " + token.line + ", column " + token.column); parts.push('='); if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line); // parse rvalue parseExpression(token, next, parts); } if (parts.length === 0) return {}; // empty definition v = []; for (varname in vars) { if (vars.hasOwnProperty(varname)) v.push(varname); } parts[0] = 'var ' + v.join(', ') + ';\n'; parts.push(';\nreturn {"description":description,"hint":hint};'); v = parts.join(''); codeq.log.debug("Creating a new parseInfo function having the body:\n" + v); fn = new Function("__params__", v); obj = fn(); return obj; // obj now contains "description" and "hint" }; // parseDefinition })();