summaryrefslogtreecommitdiff
path: root/js
diff options
context:
space:
mode:
authorAleš Smodiš <aless@guru.si>2015-07-14 17:57:06 +0200
committerAleš Smodiš <aless@guru.si>2015-07-14 17:57:06 +0200
commit13399da7f5eea223db86ff4d30403bc714c5816c (patch)
treef49dd400b20c3ff84931994d9d99e5f58cc2be0c /js
parent9a9c1113c90cf787a82312a548f2bf3776927d8e (diff)
Implemented a parser of simple pythonic definitions.
Diffstat (limited to 'js')
-rw-r--r--js/codeq.js6
-rw-r--r--js/def_parser.js420
2 files changed, 423 insertions, 3 deletions
diff --git a/js/codeq.js b/js/codeq.js
index 3a30823..770153e 100644
--- a/js/codeq.js
+++ b/js/codeq.js
@@ -614,7 +614,7 @@ window.siteDefinition = { logLevel: 'debug' }; // for debug purposes
// --------------------------------------------------------------------------------
// Task info parser: converts simplified pythonic syntax to a JavaScript function
// --------------------------------------------------------------------------------
-
+ // deprecated: use codeq.parseDefinition() instead
parseInfo: function (infoText) {
var parts = [],
n, lines, line, i, j, len, walker, fn, obj;
@@ -700,7 +700,7 @@ window.siteDefinition = { logLevel: 'debug' }; // for debug purposes
// if (s.length == 0) return; // empty hash
// if (s.charAt(0) == '#') s = s.substring(1);
// if (s.length == 0) return; // empty hash
- var editor = CodeMirror.fromTextArea(document.getElementById('program'), { cursorHeight: 0.85, lineNumbers: true, matchBrackets: true });
+ var editor = CodeMirror(document.getElementById('code_editor'), { cursorHeight: 0.85, lineNumbers: true, matchBrackets: true });
editor.setValue('sister(X, Y) :-\n female(X),\n parent(Z, X),\n parent(Z, Y),\n X \\== Y.');
/* $('#console').terminal(function (command, term) {
@@ -734,7 +734,7 @@ window.siteDefinition = { logLevel: 'debug' }; // for debug purposes
url: 'sister.py',
callback: function (data, status, url) {
if (!data) return;
- var info = codeq.system.parseInfo(data);
+ var info = codeq.parseDefinition(data);
$('#description').html(info.description);
}
});
diff --git a/js/def_parser.js b/js/def_parser.js
new file mode 100644
index 0000000..91ca35d
--- /dev/null
+++ b/js/def_parser.js
@@ -0,0 +1,420 @@
+/**
+ * A parser/compiler for the pythonic definitions of assignments.
+ * Supports basic assignment statements, no expressions using operators (yet).
+ */
+
+(function () {
+ var regexpWhitespace = new RegExp('[ \t]'),
+ regexpNameStart = new RegExp('[a-zA-Z_]'),
+ regexpName = new RegExp('[a-zA-Z0-9_]'),
+ regexpNumber = new RegExp('[0-9]'),
+ regexpNumberAndDot = new RegExp('[0-9.]');
+
+ var isEscape = function (s, pos) {
+ var i, result = false;
+ for (i = pos; i >= pos; i--) {
+ if (s.charAt(pos) === '\\') result = !result;
+ else break;
+ }
+ return result;
+ };
+
+ var escapePyString = function (s, output) {
+ var parts = s.split("'"),
+ n = parts.length,
+ i, part, previousPart;
+ output.push("'"); // starting quote
+
+ previousPart = parts[0];
+ output.push(previousPart.split('\r').join('\\r').split('\t').join('\\t').split('\n').join('\\n'));
+ for (i = 1; i < n; i++) {
+ part = parts[i];
+ // first escape the single quote, if required
+ if (!isEscape(previousPart, previousPart.length - 1)) output.push('\\');
+ // escape \r, \n, \t
+ output.push(part.split('\r').join('\\r').split('\t').join('\\t').split('\n').join('\\n'));
+ previousPart = part;
+ }
+
+ output.push("'"); // ending quote
+ };
+
+ var tokenize = function (input) {
+ var pos = 0, row = 1, col = 1,
+ n = input.length,
+ is_line_start = true,
+ getMultilineString = function (type) {
+ var p = pos, i, j, l, parts, part, is_escaped, result;
+ while (p < n) {
+ i = input.indexOf(type, p);
+ if (i < 0) throw new Error('Unterminated long string at position #' + pos + ', line ' + row + ', character ' + col);
+ is_escaped = false;
+ for (j = i-1; j >= pos; j--) {
+ if (input.charAt(j) === '\\') is_escaped = !is_escaped;
+ else break;
+ }
+ if (is_escaped) p = i + 1; // an escaped quote
+ else {
+ // process line continuations
+ parts = input.slice(pos, i).split('\n');
+ col += i + 3 - pos; // position the next column pointer
+ pos = i + 3; // position the next character pointer
+ l = parts.length - 1;
+ if (l === 0) return parts[0]; // no newline character in the string
+ row += l; // add the lines of the string
+ col = 1 + parts[l].length;
+ result = []; // the resulting string
+ for (i = 0; i < l; i++) {
+ part = parts[i]; // check the part for a trailing escape character
+ is_escaped = false;
+ for (j = part.length - 1; j >= 0; j--) {
+ if (part.charAt(j) === '\\') is_escaped = !is_escaped;
+ else break;
+ }
+ if (is_escaped) {
+ // only add the line with the tail escape character trimmed
+ result.push(part.slice(0, part.length - 1));
+ }
+ else {
+ // add the line and the trailing newline
+ result.push(part, '\n');
+ }
+ }
+ result.push(parts[l]); // the last line, which is not checked for an escape character
+ return result.join('');
+ }
+ }
+ throw new Error('Unterminated long string at position #' + pos + ', line ' + row + ', character ' + col);
+ },
+ getString = function (type) {
+ var p = pos, i, j, is_escaped, s;
+ while (p < n) {
+ i = input.indexOf(type, p);
+ j = input.indexOf('\n', p)
+ if (i < 0) throw new Error('Unterminated string at position #' + pos + ', line ' + row + ', character ' + col);
+ if ((j >= 0) && (j < i)) throw new Error('Unterminated string at position #' + pos + ', line ' + row + ', character ' + col);
+ is_escaped = false;
+ for (j = i-1; j >= pos; j--) {
+ if (input.charAt(j) === '\\') is_escaped = !is_escaped;
+ else break;
+ }
+ if (is_escaped) p = i + 1; // an escaped quote
+ else {
+ s = input.slice(pos, i);
+ col += i + 1 - pos; // position the next column pointer
+ pos = i + 1; // position the next character pointer
+ return s;
+ }
+ }
+ throw new Error('Unterminated string at position #' + pos + ', line ' + row + ', character ' + col);
+ };
+
+ // the next() function
+ return function () {
+ var s, count = 0, token = null, parts, i;
+ if (pos >= n) return null;
+ var c = input.charAt(pos++), token_row = row, token_col = col;
+
+ // skip white-space
+ for (;;) {
+ if (c === ' ') {
+ count++;
+ col++;
+ }
+ else if (c === '\t') {
+ // tab is worth 4 spaces
+ count += 4;
+ col++;
+ }
+ else if (c === '\n') {
+ count = 0;
+ is_line_start = true;
+ row++;
+ col = 1;
+ }
+ else if (c === '\r') {
+ if (pos >= n) return null; // EOF
+ pos++;
+ if (input.charAt(pos) !== '\n') throw new Error("CR character without a trailing LF at character #" + (pos-1));
+ count = 0;
+ is_line_start = true;
+ row++;
+ col = 1;
+ }
+ else break;
+ if (pos >= n) return null; // EOF
+ c = input.charAt(pos++);
+ }
+
+ if ((c === '"') || (c === "'")) {
+ // a quote starts a string, now see whether it's a multi-line string
+ if ((pos+1 < n) && (input.charAt(pos) == c) && (input.charAt(pos+1) == c)) {
+ // it's a long string
+ pos += 2;
+ s = getMultilineString(c === '"' ? '"""' : "'''");
+ }
+ else {
+ s = getString(c);
+ }
+ token = {'type': 'string', 'value': s};
+ }
+
+ else if (c.match(regexpNameStart)) {
+ parts = [ c ]; // the name builder
+ col++;
+ while (pos < n) {
+ c = input.charAt(pos);
+ if (c.match(regexpName)) {
+ parts.push(c);
+ pos++;
+ col++;
+ }
+ else break;
+ }
+ token = {'type': 'name', 'value': parts.join('')};
+ }
+
+ else if (c.match(regexpNumberAndDot)) {
+ parts = [ c ]; // the number builder
+ col++;
+ while ((pos < n) && (c != '.')) {
+ c = input.charAt(pos);
+ if (c.match(regexpNumberAndDot)) {
+ parts.push(c);
+ pos++;
+ col++;
+ }
+ else break;
+ }
+ while (pos < n) {
+ if (c.match(regexpNumber)) {
+ parts.push(c);
+ pos++;
+ col++;
+ }
+ else break;
+ }
+ token = {'type': 'number', 'value': +parts.join('')};
+ }
+
+ else if (c === '=') {
+ col++;
+ if ((pos < n) && (input.charAt(pos) === '=')) {
+ pos++;
+ col++;
+ token = {'type': 'eq'};
+ }
+ else {
+ token = {'type': 'assign'};
+ }
+ }
+
+ else if (c === '{') {
+ token = {'type': 'lbrace'};
+ col++;
+ }
+
+ else if (c === '}') {
+ token = {'type': 'rbrace'};
+ col++;
+ }
+
+ else if (c === '[') {
+ token = {'type': 'lbracket'};
+ col++;
+ }
+
+ else if (c === ']') {
+ token = {'type': 'rbracket'};
+ col++;
+ }
+
+ else if (c === '(') {
+ token = {'type': 'lparen'};
+ col++;
+ }
+
+ else if (c === ')') {
+ token = {'type': 'rparen'};
+ col++;
+ }
+
+ else if (c === ':') {
+ token = {'type': 'colon'};
+ col++;
+ }
+
+ else if (c === ';') {
+ token = {'type': 'semicolon'};
+ col++;
+ }
+
+ else if (c === ',') {
+ token = {'type': 'comma'};
+ col++;
+ }
+
+ else if (c === '#') {
+ // a comment till the end of line
+ i = input.indexOf('\n', pos);
+ if (i < 0) {
+ // this is the last line
+ s = input.slice(pos);
+ }
+ else {
+ s = input.slice(pos, i);
+ }
+ token = {'type': 'comment', 'value': s};
+ col += i - pos;
+ pos = i; // we want the next invocation to parse the trailing newline, so it correctly sets line_start, etc.
+ }
+
+ else {
+ token = {'type': 'unknown', 'value': c};
+ col++;
+ }
+
+ token['line_start'] = is_line_start;
+ token['whitespace_offset'] = count;
+ token['line'] = token_row;
+ token['column'] = token_col;
+ return token;
+ };
+ };
+
+ var parseExpression = function (token, next, output) {
+ var nextToken;
+ if (token.type === 'string') {
+ // string literal
+ escapePyString(token.value, output);
+ }
+ else if (token.type === 'number') {
+ // number literal
+ output.push('' + token.value);
+ }
+ else if (token.type === 'lbrace') {
+ // object literal
+ output.push('{');
+ if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.line);
+ if (nextToken.type === 'rbrace') {
+ output.push('}');
+ return; // end of object literal
+ }
+ for (;;) {
+ if (nextToken.type === 'string') {
+ escapePyString(nextToken.value, output);
+ }
+ else if (nextToken.type === 'number') {
+ output.push('' + token.value);
+ }
+ else throw new Error("Object key not a string or a number, at line " + nextToken.line + ", column " + nextToken.column);
+
+ if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column);
+ if (nextToken.type !== 'colon') throw new Error("Expected :, at line " + nextToken.line + ", column " + nextToken.column);
+ output.push(':');
+
+ if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column);
+ parseExpression(nextToken, next, output);
+
+ if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column);
+ if (nextToken.type === 'comma') {
+ output.push(',');
+ }
+ else if (nextToken.type === 'rbrace') {
+ output.push('}');
+ break; // end of object literal
+ }
+ else throw new Error("Expected , or }, at line " + nextToken.line + ", column " + nextToken.column);
+
+ if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column);
+ }
+ }
+ else if (token.type === 'lbracket') {
+ // array literal
+ output.push('[');
+ if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.line);
+ if (nextToken.type === 'rbracket') {
+ output.push(']');
+ return; // end of array literal
+ }
+ for (;;) {
+ parseExpression(nextToken, next, output);
+
+ if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column);
+ if (nextToken.type === 'comma') {
+ output.push(',');
+ }
+ else if (nextToken.type === 'rbracket') {
+ output.push(']');
+ break; // end of array literal
+ }
+ else throw new Error("Expected , or ], at line " + nextToken.line + ", column " + nextToken.column);
+
+ if (!(nextToken = next())) throw new Error("Unfinished line, at line " + token.line + ", column " + token.column);
+ }
+ }
+ else throw new Error("Unexpected token: expected a string, number, object, or array, at line " + token.line + ", column " + token.column);
+ };
+
+ codeq.parseDefinition = function (definition) {
+ var next = tokenize(definition),
+ vars = { 'description': true, 'hint': true },
+ parts = [ ], // first element is just a placeholder, to be replaced with the full "var" declaration at the end
+ token, first_token, varname, v, fn, obj;
+
+ while (token = next()) {
+ // parse line by line
+ if (!token.line_start) throw new Error("The token does not start in a new line, at line " + token.line + ", column " + token.column);
+ if (token.type === 'comment') continue;
+ if (token.whitespace_offset > 0) throw new Error("Cannot parse indented lines, at line " + token.line);
+
+ // parse lvalue
+ if (token.type !== 'name') throw new Error("Expected a lvalue, at line " + token.line + ", column " + token.column);
+ varname = token.value;
+ vars[varname] = true; // remember the variable name, so we will declare it at the end
+ parts.push(';\n'); // close the previous line -- the first time this is wrong because there is no previous line yet, but we will replace it with a variable declaration
+ parts.push(varname); // start the new line with the assignment statement -- the only statement we support
+ first_token = token;
+ if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line);
+
+ // optional index
+ if (token.type === 'lbracket') {
+ // index operator
+ parts.push('[');
+ if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line);
+ if (token.type === 'string') escapePyString(token.value, parts);
+ else if (token.type === 'number') parts.push('' + token.value);
+ else throw new Error("Unsupported index expression, at line " + token.line + ", column " + token.column);
+ if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line);
+ if (token.type !== 'rbracket') throw new Error("Expected ], at line " + token.line + ", column " + token.column + ", token " + token.type);
+ parts.push(']');
+ if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line);
+ }
+
+ // the assignment operator
+ if (token.type !== 'assign') throw new Error("Expected =, at line " + token.line + ", column " + token.column);
+ parts.push('=');
+ if (!(token = next())) throw new Error("Unfinished line, at line " + first_token.line);
+
+ // parse rvalue
+ parseExpression(token, next, parts);
+ }
+
+ if (parts.length === 0) return {}; // empty definition
+
+ v = [];
+ for (varname in vars) {
+ if (vars.hasOwnProperty(varname)) v.push(varname);
+ }
+
+ parts[0] = 'var ' + v.join(', ') + ';\n';
+ parts.push(';\n__params__.description = description;\n__params__.hint = hint;');
+ v = parts.join('');
+ codeq.log.debug("Creating a new parseInfo function having the body:\n" + v);
+ fn = new Function("__params__", v);
+ obj = {};
+ fn(obj);
+ return obj; // obj now contains "description" and "hint"
+ }; // parseDefinition
+
+})(); \ No newline at end of file