From 13399da7f5eea223db86ff4d30403bc714c5816c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ale=C5=A1=20Smodi=C5=A1?= Date: Tue, 14 Jul 2015 17:57:06 +0200 Subject: Implemented a parser of simple pythonic definitions. --- js/codeq.js | 6 +- js/def_parser.js | 420 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 423 insertions(+), 3 deletions(-) create mode 100644 js/def_parser.js (limited to 'js') diff --git a/js/codeq.js b/js/codeq.js index 3a30823..770153e 100644 --- a/js/codeq.js +++ b/js/codeq.js @@ -614,7 +614,7 @@ window.siteDefinition = { logLevel: 'debug' }; // for debug purposes // -------------------------------------------------------------------------------- // Task info parser: converts simplified pythonic syntax to a JavaScript function // -------------------------------------------------------------------------------- - + // deprecated: use codeq.parseDefinition() instead parseInfo: function (infoText) { var parts = [], n, lines, line, i, j, len, walker, fn, obj; @@ -700,7 +700,7 @@ window.siteDefinition = { logLevel: 'debug' }; // for debug purposes // if (s.length == 0) return; // empty hash // if (s.charAt(0) == '#') s = s.substring(1); // if (s.length == 0) return; // empty hash - var editor = CodeMirror.fromTextArea(document.getElementById('program'), { cursorHeight: 0.85, lineNumbers: true, matchBrackets: true }); + var editor = CodeMirror(document.getElementById('code_editor'), { cursorHeight: 0.85, lineNumbers: true, matchBrackets: true }); editor.setValue('sister(X, Y) :-\n female(X),\n parent(Z, X),\n parent(Z, Y),\n X \\== Y.'); /* $('#console').terminal(function (command, term) { @@ -734,7 +734,7 @@ window.siteDefinition = { logLevel: 'debug' }; // for debug purposes url: 'sister.py', callback: function (data, status, url) { if (!data) return; - var info = codeq.system.parseInfo(data); + var info = codeq.parseDefinition(data); $('#description').html(info.description); } }); diff --git a/js/def_parser.js b/js/def_parser.js new file mode 100644 index 0000000..91ca35d --- /dev/null +++ b/js/def_parser.js @@ -0,0 +1,420 @@ +/** + * A parser/compiler for the pythonic definitions of assignments. + * Supports basic assignment statements, no expressions using operators (yet). + */ + +(function () { + var regexpWhitespace = new RegExp('[ \t]'), + regexpNameStart = new RegExp('[a-zA-Z_]'), + regexpName = new RegExp('[a-zA-Z0-9_]'), + regexpNumber = new RegExp('[0-9]'), + regexpNumberAndDot = new RegExp('[0-9.]'); + + var isEscape = function (s, pos) { + var i, result = false; + for (i = pos; i >= pos; i--) { + if (s.charAt(pos) === '\\') result = !result; + else break; + } + return result; + }; + + var escapePyString = function (s, output) { + var parts = s.split("'"), + n = parts.length, + i, part, previousPart; + output.push("'"); // starting quote + + previousPart = parts[0]; + output.push(previousPart.split('\r').join('\\r').split('\t').join('\\t').split('\n').join('\\n')); + for (i = 1; i < n; i++) { + part = parts[i]; + // first escape the single quote, if required + if (!isEscape(previousPart, previousPart.length - 1)) output.push('\\'); + // escape \r, \n, \t + output.push(part.split('\r').join('\\r').split('\t').join('\\t').split('\n').join('\\n')); + previousPart = part; + } + + output.push("'"); // ending quote + }; + + var tokenize = function (input) { + var pos = 0, row = 1, col = 1, + n = input.length, + is_line_start = true, + getMultilineString = function (type) { + var p = pos, i, j, l, parts, part, is_escaped, result; + while (p < n) { + i = input.indexOf(type, p); + if (i < 0) throw new Error('Unterminated long string at position #' + pos + ', line ' + row + ', character ' + col); + is_escaped = false; + for (j = i-1; j >= pos; j--) { + if (input.charAt(j) === '\\') is_escaped = !is_escaped; + else break; + } + if (is_escaped) p = i + 1; // an escaped quote + else { + // process line continuations + parts = input.slice(pos, i).split('\n'); + col += i + 3 - pos; // position the next column pointer + pos = i + 3; // position the next character pointer + l = parts.length - 1; + if (l === 0) return parts[0]; // no newline character in the string + row += l; // add the lines of the string + col = 1 + parts[l].length; + result = []; // the resulting string + for (i = 0; i < l; i++) { + part = parts[i]; // check the part for a trailing escape character + is_escaped = false; + for (j = part.length - 1; j >= 0; j--) { + if (part.charAt(j) === '\\') is_escaped = !is_escaped; + else break; + } + if (is_escaped) { + // only add the line with the tail escape character trimmed + result.push(part.slice(0, part.length - 1)); + } + else { + // add the line and the trailing newline + result.push(part, '\n'); + } + } + result.push(parts[l]); // the last line, which is not checked for an escape character + return result.join(''); + } + } + throw new Error('Unterminated long string at position #' + pos + ', line ' + row + ', character ' + col); + }, + getString = function (type) { + var p = pos, i, j, is_escaped, s; + while (p < n) { + i = input.indexOf(type, p); + j = input.indexOf('\n', p) + if (i < 0) throw new Error('Unterminated string at position #' + pos + ', line ' + row + ', character ' + col); + if ((j >= 0) && (j < i)) throw new Error('Unterminated string at position #' + pos + ', line ' + row + ', character ' + col); + is_escaped = false; + for (j = i-1; j >= pos; j--) { + if (input.charAt(j) === '\\') is_escaped = !is_escaped; + else break; + } + if (is_escaped) p = i + 1; // an escaped quote + else { + s = input.slice(pos, i); + col += i + 1 - pos; // position the next column pointer + pos = i + 1; // position the next character pointer + return s; + } + } + throw new Error('Unterminated string at position #' + pos + ', line ' + row + ', character ' + col); + }; + + // the next() function + return function () { + var s, count = 0, token = null, parts, i; + if (pos >= n) return null; + var c = input.charAt(pos++), token_row = row, token_col = col; + + // skip white-space + for (;;) { + if (c === ' ') { + count++; + col++; + } + else if (c === '\t') { + // tab is worth 4 spaces + count += 4; + col++; + } + else if (c === '\n') { + count = 0; + is_line_start = true; + row++; + col = 1; + } + else if (c === '\r') { + if (pos >= n) return null; // EOF + pos++; + if (input.charAt(pos) !== '\n') throw new Error("CR character without a trailing LF at character #" + (pos-1)); + count = 0; + is_line_start = true; + row++; + col = 1; + } + else break; + if (pos >= n) return null; // EOF + c = input.charAt(pos++); + } + + if ((c === '"') || (c === "'")) { + // a quote starts a string, now see whether it's a multi-line string + if ((pos+1 < n) && (input.charAt(pos) == c) && (input.charAt(pos+1) == c)) { + // it's a long string + pos += 2; + s = getMultilineString(c === '"' ? '"""' : "'''"); + } + else { + s = getString(c); + } + token = {'type': 'string', 'value': s}; + } + + else if (c.match(regexpNameStart)) { + parts = [ c ]; // the name builder + col++; + while (pos < n) { + c = input.charAt(pos); + if (c.match(regexpName)) { + parts.push(c); + pos++; + col++; + } + else break; + } + token = {'type': 'name', 'value': parts.join('')}; + } + + else if (c.match(regexpNumberAndDot)) { + parts = [ c ]; // the number builder + col++; + while ((pos < n) && (c != '.')) { + c = input.charAt(pos); + if (c.match(regexpNumberAndDot)) { + parts.push(c); + pos++; + col++; + } + else break; + } + while (pos < n) { + if (c.match(regexpNumber)) { + parts.push(c); + pos++; + col++; + } + else break; + } + token = {'type': 'number', 'value': +parts.join('')}; + } + + else if (c === '=') { + col++; + if ((pos < n) && (input.charAt(pos) === '=')) { + pos++; + col++; + token = {'type': 'eq'}; + } + else { + token = {'type': 'assign'}; + } + } + + else if (c === '{') { + token = {'type': 'lbrace'}; + col++; + } + + else if (c === '}') { + token = {'type': 'rbrace'}; + col++; + } + + else if (c === '[') { + token = {'type': 'lbracket'}; + col++; + } + + else if (c === ']') { + token = {'type': 'rbracket'}; + col++; + } + + else if (c === '(') { + token = {'type': 'lparen'}; + col++; + } + + else if (c === ')') { + token = {'type': 'rparen'}; + col++; + } + + else if (c === ':') { + token = {'type': 'colon'}; + col++; + } + + else if (c === ';') { + token = {'type': 'semicolon'}; + col++; + } + + else if (c === ',') { + token = {'type': 'comma'}; + col++; + } + + else if (c === '#') { + // a comment till the end of line + i = input.indexOf('\n', pos); + if (i < 0) { + // this is the last line + s = input.slice(pos); + } + else { + s = input.slice(pos, i); + } + token = {'type': 'comment', 'value': s}; + col += i - pos; + pos = i; // we want the next invocation to parse the trailing newline, so it correctly sets line_start, etc. + } + + else { + token = {'type': 'unknown', 'value': c}; + col++; + } + + token['line_start'] = is_line_start; + token['whitespace_offset'] = count; + token['line'] = token_row; + token['column'] = token_col; + return token; + }; + }; + + var parseExpression = function (token, next, output) { + var nextToken; + if (token.type === 'string') { + // string literal + escapePyString(token.value, output); + } + else if (token.type === 'number') { + // number literal + output.push('' + token.value); + } + else if (token.type === 'lbrace') { + // object literal + output.push('{'); + if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.line); + if (nextToken.type === 'rbrace') { + output.push('}'); + return; // end of object literal + } + for (;;) { + if (nextToken.type === 'string') { + escapePyString(nextToken.value, output); + } + else if (nextToken.type === 'number') { + output.push('' + token.value); + } + else throw new Error("Object key not a string or a number, at line " + nextToken.line + ", column " + nextToken.column); + + if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column); + if (nextToken.type !== 'colon') throw new Error("Expected :, at line " + nextToken.line + ", column " + nextToken.column); + output.push(':'); + + if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column); + parseExpression(nextToken, next, output); + + if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column); + if (nextToken.type === 'comma') { + output.push(','); + } + else if (nextToken.type === 'rbrace') { + output.push('}'); + break; // end of object literal + } + else throw new Error("Expected , or }, at line " + nextToken.line + ", column " + nextToken.column); + + if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column); + } + } + else if (token.type === 'lbracket') { + // array literal + output.push('['); + if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.line); + if (nextToken.type === 'rbracket') { + output.push(']'); + return; // end of array literal + } + for (;;) { + parseExpression(nextToken, next, output); + + if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column); + if (nextToken.type === 'comma') { + output.push(','); + } + else if (nextToken.type === 'rbracket') { + output.push(']'); + break; // end of array literal + } + else throw new Error("Expected , or ], at line " + nextToken.line + ", column " + nextToken.column); + + if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column); + } + } + else throw new Error("Unexpected token: expected a string, number, object, or array, at line " + token.line + ", column " + token.column); + }; + + codeq.parseDefinition = function (definition) { + var next = tokenize(definition), + vars = { 'description': true, 'hint': true }, + parts = [ ], // first element is just a placeholder, to be replaced with the full "var" declaration at the end + token, first_token, varname, v, fn, obj; + + while (token = next()) { + // parse line by line + if (!token.line_start) throw new Error("The token does not start in a new line, at line " + token.line + ", column " + token.column); + if (token.type === 'comment') continue; + if (token.whitespace_offset > 0) throw new Error("Cannot parse indented lines, at line " + token.line); + + // parse lvalue + if (token.type !== 'name') throw new Error("Expected a lvalue, at line " + token.line + ", column " + token.column); + varname = token.value; + vars[varname] = true; // remember the variable name, so we will declare it at the end + parts.push(';\n'); // close the previous line -- the first time this is wrong because there is no previous line yet, but we will replace it with a variable declaration + parts.push(varname); // start the new line with the assignment statement -- the only statement we support + first_token = token; + if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line); + + // optional index + if (token.type === 'lbracket') { + // index operator + parts.push('['); + if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line); + if (token.type === 'string') escapePyString(token.value, parts); + else if (token.type === 'number') parts.push('' + token.value); + else throw new Error("Unsupported index expression, at line " + token.line + ", column " + token.column); + if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line); + if (token.type !== 'rbracket') throw new Error("Expected ], at line " + token.line + ", column " + token.column + ", token " + token.type); + parts.push(']'); + if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line); + } + + // the assignment operator + if (token.type !== 'assign') throw new Error("Expected =, at line " + token.line + ", column " + token.column); + parts.push('='); + if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line); + + // parse rvalue + parseExpression(token, next, parts); + } + + if (parts.length === 0) return {}; // empty definition + + v = []; + for (varname in vars) { + if (vars.hasOwnProperty(varname)) v.push(varname); + } + + parts[0] = 'var ' + v.join(', ') + ';\n'; + parts.push(';\n__params__.description = description;\n__params__.hint = hint;'); + v = parts.join(''); + codeq.log.debug("Creating a new parseInfo function having the body:\n" + v); + fn = new Function("__params__", v); + obj = {}; + fn(obj); + return obj; // obj now contains "description" and "hint" + }; // parseDefinition + +})(); \ No newline at end of file -- cgit v1.2.1