From 1a99d21e12dad4c01d4c892ed4e8a0506bcf36aa Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Wed, 29 Jan 2014 13:23:04 +0100 Subject: Initial commit for pymonkey --- prolog/core.py | 1018 +++++++++++++++++++++++++++++++++++++++++++++++++++++ prolog/engine.py | 182 ++++++++++ prolog/lexer.py | 92 +++++ prolog/sandbox.pl | 455 ++++++++++++++++++++++++ prolog/util.py | 114 ++++++ 5 files changed, 1861 insertions(+) create mode 100644 prolog/core.py create mode 100755 prolog/engine.py create mode 100755 prolog/lexer.py create mode 100644 prolog/sandbox.pl create mode 100644 prolog/util.py diff --git a/prolog/core.py b/prolog/core.py new file mode 100644 index 0000000..b14e58f --- /dev/null +++ b/prolog/core.py @@ -0,0 +1,1018 @@ +# -*- coding: utf-8 -*- + + +# pyswip -- Python SWI-Prolog bridge +# Copyright (c) 2007-2012 YĆ¼ce Tekol +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +import os +import re +import sys +import locale +import glob +import warnings +from subprocess import Popen, PIPE +from ctypes import * +from ctypes.util import find_library + + +# To initialize the SWI-Prolog environment, two things need to be done: the +# first is to find where the SO/DLL is located and the second is to find the +# SWI-Prolog home, to get the saved state. +# +# The goal of the (entangled) process below is to make the library installation +# independent. + + +def _findSwiplPathFromFindLib(): + """ + This function resorts to ctype's find_library to find the path to the + DLL. The biggest problem is that find_library does not give the path to the + resource file. + + :returns: + A path to the swipl SO/DLL or None if it is not found. + + :returns type: + {str, None} + """ + + path = (find_library('swipl') or + find_library('pl') or + find_library('libswipl')) # This last one is for Windows + return path + + +def _findSwiplFromExec(): + """ + This function tries to use an executable on the path to find SWI-Prolog + SO/DLL and the resource file. + + :returns: + A tuple of (path to the swipl DLL, path to the resource file) + + :returns type: + ({str, None}, {str, None}) + """ + + platform = sys.platform[:3] + + fullName = None + swiHome = None + + try: # try to get library path from swipl executable. + # We may have pl or swipl as the executable + try: + cmd = Popen(['swipl', '-dump-runtime-variables'], + stdout=PIPE, universal_newlines=True) + except OSError: + cmd = Popen(['pl', '-dump-runtime-variables'], + stdout=PIPE, universal_newlines=True) + ret = cmd.communicate() + + # Parse the output into a dictionary + ret = ret[0].replace(';', '').splitlines() + ret = [line.split('=', 1) for line in ret] + rtvars = dict((name, value[1:-1]) for name, value in ret) # [1:-1] gets + # rid of the + # quotes + + if rtvars['PLSHARED'] == 'no': + raise ImportError('SWI-Prolog is not installed as a shared ' + 'library.') + else: # PLSHARED == 'yes' + swiHome = rtvars['PLBASE'] # The environment is in PLBASE + if not os.path.exists(swiHome): + swiHome = None + + # determine platform specific path + if platform == "win": + dllName = rtvars['PLLIB'][:-4] + '.' + rtvars['PLSOEXT'] + path = os.path.join(rtvars['PLBASE'], 'bin') + fullName = os.path.join(path, dllName) + + if not os.path.exists(fullName): + fullName = None + + elif platform == "cyg": + # e.g. /usr/lib/pl-5.6.36/bin/i686-cygwin/cygpl.dll + + dllName = 'cygpl.dll' + path = os.path.join(rtvars['PLBASE'], 'bin', rtvars['PLARCH']) + fullName = os.path.join(path, dllName) + + if not os.path.exists(fullName): + fullName = None + + elif platform == "dar": + dllName = 'lib' + rtvars['PLLIB'][2:] + '.' + rtvars['PLSOEXT'] + path = os.path.join(rtvars['PLBASE'], 'lib', rtvars['PLARCH']) + baseName = os.path.join(path, dllName) + + if os.path.exists(baseName): + fullName = baseName + else: # We will search for versions + fullName = None + + else: # assume UNIX-like + # The SO name in some linuxes is of the form libswipl.so.5.10.2, + # so we have to use glob to find the correct one + dllName = 'lib' + rtvars['PLLIB'][2:] + '.' + rtvars['PLSOEXT'] + path = os.path.join(rtvars['PLBASE'], 'lib', rtvars['PLARCH']) + baseName = os.path.join(path, dllName) + + if os.path.exists(baseName): + fullName = baseName + else: # We will search for versions + pattern = baseName + '.*' + files = glob.glob(pattern) + if len(files) == 0: + fullName = None + elif len(files) == 1: + fullName = files[0] + else: # Will this ever happen? + fullName = None + + except (OSError, KeyError): # KeyError from accessing rtvars + pass + + return (fullName, swiHome) + + +def _findSwiplWin(): + """ + This function uses several heuristics to gues where SWI-Prolog is installed + in Windows. It always returns None as the path of the resource file because, + in Windows, the way to find it is more robust so the SWI-Prolog DLL is + always able to find it. + + :returns: + A tuple of (path to the swipl DLL, path to the resource file) + + :returns type: + ({str, None}, {str, None}) + """ + + dllNames = ('swipl.dll', 'libswipl.dll') + + # First try: check the usual installation path (this is faster but + # hardcoded) + programFiles = os.getenv('ProgramFiles') + paths = [os.path.join(programFiles, r'pl\bin', dllName) + for dllName in dllNames] + for path in paths: + if os.path.exists(path): + return (path, None) + + # Second try: use the find_library + path = _findSwiplPathFromFindLib() + if path is not None and os.path.exists(path): + return (path, None) + + # Third try: use reg.exe to find the installation path in the registry + # (reg should be installed in all Windows XPs) + try: + cmd = Popen(['reg', 'query', + r'HKEY_LOCAL_MACHINE\Software\SWI\Prolog', + '/v', 'home'], stdout=PIPE) + ret = cmd.communicate() + + # Result is like: + # ! REG.EXE VERSION 3.0 + # + # HKEY_LOCAL_MACHINE\Software\SWI\Prolog + # home REG_SZ C:\Program Files\pl + # (Note: spaces may be \t or spaces in the output) + ret = ret[0].splitlines() + ret = [line for line in ret if len(line) > 0] + pattern = re.compile('[^h]*home[^R]*REG_SZ( |\t)*(.*)$') + match = pattern.match(ret[-1]) + if match is not None: + path = match.group(2) + + paths = [os.path.join(path, 'bin', dllName) + for dllName in dllNames] + for path in paths: + if os.path.exists(path): + return (path, None) + + except OSError: + # reg.exe not found? Weird... + pass + + # May the exec is on path? + (path, swiHome) = _findSwiplFromExec() + if path is not None: + return (path, swiHome) + + # Last try: maybe it is in the current dir + for dllName in dllNames: + if os.path.exists(dllName): + return (dllName, None) + + return (None, None) + +def _findSwiplLin(): + """ + This function uses several heuristics to guess where SWI-Prolog is + installed in Linuxes. + + :returns: + A tuple of (path to the swipl so, path to the resource file) + + :returns type: + ({str, None}, {str, None}) + """ + + # Maybe the exec is on path? + (path, swiHome) = _findSwiplFromExec() + if path is not None: + return (path, swiHome) + + # If it is not, use find_library + path = _findSwiplPathFromFindLib() + if path is not None: + return (path, swiHome) + + # Our last try: some hardcoded paths. + paths = ['/lib', '/usr/lib', '/usr/local/lib', '.', './lib'] + names = ['libswipl.so', 'libpl.so'] + + path = None + for name in names: + for try_ in paths: + try_ = os.path.join(try_, name) + if os.path.exists(try_): + path = try_ + break + + if path is not None: + return (path, swiHome) + + return (None, None) + + +def _findSwiplDar(): + """ + This function uses several heuristics to guess where SWI-Prolog is + installed in MacOS. + + :returns: + A tuple of (path to the swipl so, path to the resource file) + + :returns type: + ({str, None}, {str, None}) + """ + + # If the exec is in path + (path, swiHome) = _findSwiplFromExec() + if path is not None: + return (path, swiHome) + + # If it is not, use find_library + path = _findSwiplPathFromFindLib() + if path is not None: + return (path, swiHome) + + # Last guess, searching for the file + paths = ['.', './lib', '/usr/lib/', '/usr/local/lib', '/opt/local/lib'] + names = ['libswipl.dylib', 'libpl.dylib'] + + for name in names: + for path in paths: + path = os.path.join(path, name) + if os.path.exists(path): + return (path, None) + + return (None, None) + + +def _findSwipl(): + """ + This function makes a big effort to find the path to the SWI-Prolog shared + library. Since this is both OS dependent and installation dependent, we may + not aways succeed. If we do, we return a name/path that can be used by + CDLL(). Otherwise we raise an exception. + + :return: Tuple. Fist element is the name or path to the library that can be + used by CDLL. Second element is the path were SWI-Prolog resource + file may be found (this is needed in some Linuxes) + :rtype: Tuple of strings + :raises ImportError: If we cannot guess the name of the library + """ + + # Now begins the guesswork + platform = sys.platform[:3] + if platform == "win": # In Windows, we have the default installer + # path and the registry to look + (path, swiHome) = _findSwiplWin() + + elif platform in ("lin", "cyg"): + (path, swiHome) = _findSwiplLin() + + elif platform == "dar": # Help with MacOS is welcome!! + (path, swiHome) = _findSwiplDar() + + else: + raise EnvironmentError('The platform %s is not supported by this ' + 'library. If you want it to be supported, ' + 'please open an issue.' % platform) + + # This is a catch all raise + if path is None: + raise ImportError('Could not find the SWI-Prolog library in this ' + 'platform. If you are sure it is installed, please ' + 'open an issue.') + else: + return (path, swiHome) + + +def _fixWindowsPath(dll): + """ + When the path to the DLL is not in Windows search path, Windows will not be + able to find other DLLs on the same directory, so we have to add it to the + path. This function takes care of it. + + :parameters: + - `dll` (str) - File name of the DLL + """ + + if sys.platform[:3] != 'win': + return # Nothing to do here + + pathToDll = os.path.dirname(dll) + currentWindowsPath = os.getenv('PATH') + + if pathToDll not in currentWindowsPath: + # We will prepend the path, to avoid conflicts between DLLs + newPath = pathToDll + ';' + currentWindowsPath + os.putenv('PATH', newPath) + + +# Find the path and resource file. SWI_HOME_DIR shall be treated as a constant +# by users of this module +(_path, SWI_HOME_DIR) = _findSwipl() +_fixWindowsPath(_path) + + +# Load the library +_lib = CDLL(_path) + +# PySWIP constants +PYSWIP_MAXSTR = 1024 +c_int_p = c_void_p +c_long_p = c_void_p +c_double_p = c_void_p +c_uint_p = c_void_p + +# constants (from SWI-Prolog.h) +# PL_unify_term() arguments +PL_VARIABLE = 1 # nothing +PL_ATOM = 2 # const char +PL_INTEGER = 3 # int +PL_FLOAT = 4 # double +PL_STRING = 5 # const char * +PL_TERM = 6 # +# PL_unify_term() +PL_FUNCTOR = 10 # functor_t, arg ... +PL_LIST = 11 # length, arg ... +PL_CHARS = 12 # const char * +PL_POINTER = 13 # void * +# /* PlArg::PlArg(text, type) */ +#define PL_CODE_LIST (14) /* [ascii...] */ +#define PL_CHAR_LIST (15) /* [h,e,l,l,o] */ +#define PL_BOOL (16) /* PL_set_feature() */ +#define PL_FUNCTOR_CHARS (17) /* PL_unify_term() */ +#define _PL_PREDICATE_INDICATOR (18) /* predicate_t (Procedure) */ +#define PL_SHORT (19) /* short */ +#define PL_INT (20) /* int */ +#define PL_LONG (21) /* long */ +#define PL_DOUBLE (22) /* double */ +#define PL_NCHARS (23) /* unsigned, const char * */ +#define PL_UTF8_CHARS (24) /* const char * */ +#define PL_UTF8_STRING (25) /* const char * */ +#define PL_INT64 (26) /* int64_t */ +#define PL_NUTF8_CHARS (27) /* unsigned, const char * */ +#define PL_NUTF8_CODES (29) /* unsigned, const char * */ +#define PL_NUTF8_STRING (30) /* unsigned, const char * */ +#define PL_NWCHARS (31) /* unsigned, const wchar_t * */ +#define PL_NWCODES (32) /* unsigned, const wchar_t * */ +#define PL_NWSTRING (33) /* unsigned, const wchar_t * */ +#define PL_MBCHARS (34) /* const char * */ +#define PL_MBCODES (35) /* const char * */ +#define PL_MBSTRING (36) /* const char * */ + +# /******************************** +# * NON-DETERMINISTIC CALL/RETURN * +# *********************************/ +# +# Note 1: Non-deterministic foreign functions may also use the deterministic +# return methods PL_succeed and PL_fail. +# +# Note 2: The argument to PL_retry is a 30 bits signed integer (long). + +PL_FIRST_CALL = 0 +PL_CUTTED = 1 +PL_REDO = 2 + +PL_FA_NOTRACE = 0x01 # foreign cannot be traced +PL_FA_TRANSPARENT = 0x02 # foreign is module transparent +PL_FA_NONDETERMINISTIC = 0x04 # foreign is non-deterministic +PL_FA_VARARGS = 0x08 # call using t0, ac, ctx +PL_FA_CREF = 0x10 # Internal: has clause-reference */ + +# /******************************* +# * CALL-BACK * +# *******************************/ + +PL_Q_DEBUG = 0x01 # = TRUE for backward compatibility +PL_Q_NORMAL = 0x02 # normal usage +PL_Q_NODEBUG = 0x04 # use this one +PL_Q_CATCH_EXCEPTION = 0x08 # handle exceptions in C +PL_Q_PASS_EXCEPTION = 0x10 # pass to parent environment +PL_Q_DETERMINISTIC = 0x20 # call was deterministic + +# /******************************* +# * BLOBS * +# *******************************/ + +#define PL_BLOB_MAGIC_B 0x75293a00 /* Magic to validate a blob-type */ +#define PL_BLOB_VERSION 1 /* Current version */ +#define PL_BLOB_MAGIC (PL_BLOB_MAGIC_B|PL_BLOB_VERSION) + +#define PL_BLOB_UNIQUE 0x01 /* Blob content is unique */ +#define PL_BLOB_TEXT 0x02 /* blob contains text */ +#define PL_BLOB_NOCOPY 0x04 /* do not copy the data */ +#define PL_BLOB_WCHAR 0x08 /* wide character string */ + +# /******************************* +# * CHAR BUFFERS * +# *******************************/ + +CVT_ATOM = 0x0001 +CVT_STRING = 0x0002 +CVT_LIST = 0x0004 +CVT_INTEGER = 0x0008 +CVT_FLOAT = 0x0010 +CVT_VARIABLE = 0x0020 +CVT_NUMBER = CVT_INTEGER | CVT_FLOAT +CVT_ATOMIC = CVT_NUMBER | CVT_ATOM | CVT_STRING +CVT_WRITE = 0x0040 # as of version 3.2.10 +CVT_ALL = CVT_ATOMIC | CVT_LIST +CVT_MASK = 0x00ff + +BUF_DISCARDABLE = 0x0000 +BUF_RING = 0x0100 +BUF_MALLOC = 0x0200 + +CVT_EXCEPTION = 0x10000 # throw exception on error + +# used to convert python strings to bytes (and back) when calling C functions +encoding = locale.getpreferredencoding() + +argv = (c_char_p*(len(sys.argv) + 1))() +for i, arg in enumerate(sys.argv): + argv[i] = bytes(arg, encoding=encoding) + +argv[-1] = None +argc = len(sys.argv) + +# /******************************* +# * TYPES * +# *******************************/ +# +# typedef uintptr_t atom_t; /* Prolog atom */ +# typedef uintptr_t functor_t; /* Name/arity pair */ +# typedef void * module_t; /* Prolog module */ +# typedef void * predicate_t; /* Prolog procedure */ +# typedef void * record_t; /* Prolog recorded term */ +# typedef uintptr_t term_t; /* opaque term handle */ +# typedef uintptr_t qid_t; /* opaque query handle */ +# typedef uintptr_t PL_fid_t; /* opaque foreign context handle */ +# typedef void * control_t; /* non-deterministic control arg */ +# typedef void * PL_engine_t; /* opaque engine handle */ +# typedef uintptr_t PL_atomic_t; /* same a word */ +# typedef uintptr_t foreign_t; /* return type of foreign functions */ +# typedef wchar_t pl_wchar_t; /* Prolog wide character */ +# typedef foreign_t (*pl_function_t)(); /* foreign language functions */ + +atom_t = c_uint_p +functor_t = c_uint_p +module_t = c_void_p +predicate_t = c_void_p +record_t = c_void_p +term_t = c_uint_p +qid_t = c_uint_p +PL_fid_t = c_uint_p +fid_t = c_uint_p +control_t = c_void_p +PL_engine_t = c_void_p +PL_atomic_t = c_uint_p +foreign_t = c_uint_p +pl_wchar_t = c_wchar + +PL_initialise = _lib.PL_initialise +#PL_initialise.argtypes = [c_int, c_c?? + +PL_open_foreign_frame = _lib.PL_open_foreign_frame +PL_open_foreign_frame.restype = fid_t + +PL_new_term_ref = _lib.PL_new_term_ref +PL_new_term_ref.restype = term_t + +PL_new_term_refs = _lib.PL_new_term_refs +PL_new_term_refs.argtypes = [c_int] +PL_new_term_refs.restype = term_t + +PL_chars_to_term = _lib.PL_chars_to_term +PL_chars_to_term.argtypes = [c_char_p, term_t] +PL_chars_to_term.restype = c_int + +PL_call = _lib.PL_call +PL_call.argtypes = [term_t, module_t] +PL_call.restype = c_int + +PL_call_predicate = _lib.PL_call_predicate +PL_call_predicate.argtypes = [module_t, c_int, predicate_t, term_t] +PL_call_predicate.restype = fid_t + +PL_close_foreign_frame = _lib.PL_close_foreign_frame +PL_close_foreign_frame.argtypes = [fid_t] +PL_close_foreign_frame.restype = None + +PL_discard_foreign_frame = _lib.PL_discard_foreign_frame +PL_discard_foreign_frame.argtypes = [fid_t] +PL_discard_foreign_frame.restype = None + +PL_rewind_foreign_frame = _lib.PL_rewind_foreign_frame +PL_rewind_foreign_frame.argtypes = [fid_t] +PL_rewind_foreign_frame.restype = None + +PL_put_list_chars = _lib.PL_put_list_chars +PL_put_list_chars.argtypes = [term_t, c_char_p] +PL_put_list_chars.restype = c_int + +#PL_EXPORT(void) PL_register_atom(atom_t a); +PL_register_atom = _lib.PL_register_atom +PL_register_atom.argtypes = [atom_t] +PL_register_atom.restype = None + +#PL_EXPORT(void) PL_unregister_atom(atom_t a); +PL_unregister_atom = _lib.PL_unregister_atom +PL_unregister_atom.argtypes = [atom_t] +PL_unregister_atom.restype = None + +#PL_EXPORT(atom_t) PL_functor_name(functor_t f); +PL_functor_name = _lib.PL_functor_name +PL_functor_name.argtypes = [functor_t] +PL_functor_name.restype = atom_t + +#PL_EXPORT(int) PL_functor_arity(functor_t f); +PL_functor_arity = _lib.PL_functor_arity +PL_functor_arity.argtypes = [functor_t] +PL_functor_arity.restype = c_int + +# /* Get C-values from Prolog terms */ +#PL_EXPORT(int) PL_get_atom(term_t t, atom_t *a); +PL_get_atom = _lib.PL_get_atom +PL_get_atom.argtypes = [term_t, POINTER(atom_t)] +PL_get_atom.restype = c_int + +#PL_EXPORT(int) PL_get_bool(term_t t, int *value); +PL_get_bool = _lib.PL_get_bool +PL_get_bool.argtypes = [term_t, POINTER(c_int)] +PL_get_bool.restype = c_int + +#PL_EXPORT(int) PL_get_atom_chars(term_t t, char **a); +PL_get_atom_chars = _lib.PL_get_atom_chars # FIXME +PL_get_atom_chars.argtypes = [term_t, POINTER(c_char_p)] +PL_get_atom_chars.restype = c_int + +##define PL_get_string_chars(t, s, l) PL_get_string(t,s,l) +# /* PL_get_string() is depricated */ +#PL_EXPORT(int) PL_get_string(term_t t, char **s, size_t *len); +PL_get_string = _lib.PL_get_string +PL_get_string_chars = PL_get_string +#PL_get_string_chars.argtypes = [term_t, POINTER(c_char_p), c_int_p] + +#PL_EXPORT(int) PL_get_chars(term_t t, char **s, unsigned int flags); +PL_get_chars = _lib.PL_get_chars # FIXME: + +#PL_EXPORT(int) PL_get_list_chars(term_t l, char **s, +# unsigned int flags); +#PL_EXPORT(int) PL_get_atom_nchars(term_t t, size_t *len, char **a); +#PL_EXPORT(int) PL_get_list_nchars(term_t l, +# size_t *len, char **s, +# unsigned int flags); +#PL_EXPORT(int) PL_get_nchars(term_t t, +# size_t *len, char **s, +# unsigned int flags); +#PL_EXPORT(int) PL_get_integer(term_t t, int *i); +PL_get_integer = _lib.PL_get_integer +PL_get_integer.argtypes = [term_t, POINTER(c_int)] +PL_get_integer.restype = c_int + +#PL_EXPORT(int) PL_get_long(term_t t, long *i); +PL_get_long = _lib.PL_get_long +PL_get_long.argtypes = [term_t, POINTER(c_long)] +PL_get_long.restype = c_int + +#PL_EXPORT(int) PL_get_pointer(term_t t, void **ptr); +#PL_EXPORT(int) PL_get_float(term_t t, double *f); +PL_get_float = _lib.PL_get_float +PL_get_float.argtypes = [term_t, c_double_p] +PL_get_float.restype = c_int + +#PL_EXPORT(int) PL_get_functor(term_t t, functor_t *f); +PL_get_functor = _lib.PL_get_functor +PL_get_functor.argtypes = [term_t, POINTER(functor_t)] +PL_get_functor.restype = c_int + +#PL_EXPORT(int) PL_get_name_arity(term_t t, atom_t *name, int *arity); +PL_get_name_arity = _lib.PL_get_name_arity +PL_get_name_arity.argtypes = [term_t, POINTER(atom_t), POINTER(c_int)] +PL_get_name_arity.restype = c_int + +#PL_EXPORT(int) PL_get_module(term_t t, module_t *module); +#PL_EXPORT(int) PL_get_arg(int index, term_t t, term_t a); +PL_get_arg = _lib.PL_get_arg +PL_get_arg.argtypes = [c_int, term_t, term_t] +PL_get_arg.restype = c_int + +#PL_EXPORT(int) PL_get_list(term_t l, term_t h, term_t t); +#PL_EXPORT(int) PL_get_head(term_t l, term_t h); +PL_get_head = _lib.PL_get_head +PL_get_head.argtypes = [term_t, term_t] +PL_get_head.restype = c_int + +#PL_EXPORT(int) PL_get_tail(term_t l, term_t t); +PL_get_tail = _lib.PL_get_tail +PL_get_tail.argtypes = [term_t, term_t] +PL_get_tail.restype = c_int + +#PL_EXPORT(int) PL_get_nil(term_t l); +PL_get_nil = _lib.PL_get_nil +PL_get_nil.argtypes = [term_t] +PL_get_nil.restype = c_int + +#PL_EXPORT(int) PL_get_term_value(term_t t, term_value_t *v); +#PL_EXPORT(char *) PL_quote(int chr, const char *data); + +PL_put_atom_chars = _lib.PL_put_atom_chars +PL_put_atom_chars.argtypes = [term_t, c_char_p] +PL_put_atom_chars.restype = c_int + +PL_atom_chars = _lib.PL_atom_chars +PL_atom_chars.argtypes = [atom_t] +PL_atom_chars.restype = c_char_p + +PL_predicate = _lib.PL_predicate +PL_predicate.argtypes = [c_char_p, c_int, c_char_p] +PL_predicate.restype = predicate_t + +PL_pred = _lib.PL_pred +PL_pred.argtypes = [functor_t, module_t] +PL_pred.restype = predicate_t + +PL_open_query = _lib.PL_open_query +PL_open_query.argtypes = [module_t, c_int, predicate_t, term_t] +PL_open_query.restype = qid_t + +PL_next_solution = _lib.PL_next_solution +PL_next_solution.argtypes = [qid_t] +PL_next_solution.restype = c_int + +PL_copy_term_ref = _lib.PL_copy_term_ref +PL_copy_term_ref.argtypes = [term_t] +PL_copy_term_ref.restype = term_t + +PL_get_list = _lib.PL_get_list +PL_get_list.argtypes = [term_t, term_t, term_t] +PL_get_list.restype = c_int + +PL_get_chars = _lib.PL_get_chars # FIXME + +PL_close_query = _lib.PL_close_query +PL_close_query.argtypes = [qid_t] +PL_close_query.restype = None + +#void PL_cut_query(qid) +PL_cut_query = _lib.PL_cut_query +PL_cut_query.argtypes = [qid_t] +PL_cut_query.restype = None + +PL_halt = _lib.PL_halt +PL_halt.argtypes = [c_int] +PL_halt.restype = None + +# PL_EXPORT(int) PL_cleanup(int status); +PL_cleanup = _lib.PL_cleanup +PL_cleanup.restype = c_int + +PL_unify_integer = _lib.PL_unify_integer +PL_unify = _lib.PL_unify +PL_unify.restype = c_int + +#PL_EXPORT(int) PL_unify_arg(int index, term_t t, term_t a) WUNUSED; +PL_unify_arg = _lib.PL_unify_arg +PL_unify_arg.argtypes = [c_int, term_t, term_t] +PL_unify_arg.restype = c_int + +# Verify types + +PL_term_type = _lib.PL_term_type +PL_term_type.argtypes = [term_t] +PL_term_type.restype = c_int + +PL_is_variable = _lib.PL_is_variable +PL_is_variable.argtypes = [term_t] +PL_is_variable.restype = c_int + +PL_is_ground = _lib.PL_is_ground +PL_is_ground.argtypes = [term_t] +PL_is_ground.restype = c_int + +PL_is_atom = _lib.PL_is_atom +PL_is_atom.argtypes = [term_t] +PL_is_atom.restype = c_int + +PL_is_integer = _lib.PL_is_integer +PL_is_integer.argtypes = [term_t] +PL_is_integer.restype = c_int + +PL_is_string = _lib.PL_is_string +PL_is_string.argtypes = [term_t] +PL_is_string.restype = c_int + +PL_is_float = _lib.PL_is_float +PL_is_float.argtypes = [term_t] +PL_is_float.restype = c_int + +#PL_is_rational = _lib.PL_is_rational +#PL_is_rational.argtypes = [term_t] +#PL_is_rational.restype = c_int + +PL_is_compound = _lib.PL_is_compound +PL_is_compound.argtypes = [term_t] +PL_is_compound.restype = c_int + +PL_is_functor = _lib.PL_is_functor +PL_is_functor.argtypes = [term_t, functor_t] +PL_is_functor.restype = c_int + +PL_is_list = _lib.PL_is_list +PL_is_list.argtypes = [term_t] +PL_is_list.restype = c_int + +PL_is_atomic = _lib.PL_is_atomic +PL_is_atomic.argtypes = [term_t] +PL_is_atomic.restype = c_int + +PL_is_number = _lib.PL_is_number +PL_is_number.argtypes = [term_t] +PL_is_number.restype = c_int + +# /* Assign to term-references */ +#PL_EXPORT(void) PL_put_variable(term_t t); +PL_put_variable = _lib.PL_put_variable +PL_put_variable.argtypes = [term_t] +PL_put_variable.restype = None + +#PL_EXPORT(void) PL_put_atom(term_t t, atom_t a); +PL_put_atom = _lib.PL_put_atom +PL_put_atom.argtypes = [term_t, atom_t] +PL_put_atom.restype = None + +#PL_EXPORT(void) PL_put_atom_chars(term_t t, const char *chars); +#PL_EXPORT(void) PL_put_string_chars(term_t t, const char *chars); +#PL_EXPORT(void) PL_put_list_chars(term_t t, const char *chars); +#PL_EXPORT(void) PL_put_list_codes(term_t t, const char *chars); +#PL_EXPORT(void) PL_put_atom_nchars(term_t t, size_t l, const char *chars); +#PL_EXPORT(void) PL_put_string_nchars(term_t t, size_t len, const char *chars); +#PL_EXPORT(void) PL_put_list_nchars(term_t t, size_t l, const char *chars); +#PL_EXPORT(void) PL_put_list_ncodes(term_t t, size_t l, const char *chars); +#PL_EXPORT(void) PL_put_integer(term_t t, long i); +PL_put_integer = _lib.PL_put_integer +PL_put_integer.argtypes = [term_t, c_long] +PL_put_integer.restype = None + +#PL_EXPORT(void) PL_put_pointer(term_t t, void *ptr); +#PL_EXPORT(void) PL_put_float(term_t t, double f); +PL_put_float = _lib.PL_put_float +PL_put_float.argtypes = [term_t, c_double] +PL_put_float.restype = None + +#PL_EXPORT(void) PL_put_functor(term_t t, functor_t functor); +PL_put_functor = _lib.PL_put_functor +PL_put_functor.argtypes = [term_t, functor_t] +PL_put_functor.restype = None + +#PL_EXPORT(void) PL_put_list(term_t l); +PL_put_list = _lib.PL_put_list +PL_put_list.argtypes = [term_t] +PL_put_list.restype = None + +#PL_EXPORT(void) PL_put_nil(term_t l); +PL_put_nil = _lib.PL_put_nil +PL_put_nil.argtypes = [term_t] +PL_put_nil.restype = None + +#PL_EXPORT(void) PL_put_term(term_t t1, term_t t2); +PL_put_term = _lib.PL_put_term +PL_put_term.argtypes = [term_t, term_t] +PL_put_term.restype = None + +# /* construct a functor or list-cell */ +#PL_EXPORT(void) PL_cons_functor(term_t h, functor_t f, ...); +#class _PL_cons_functor(object): +PL_cons_functor = _lib.PL_cons_functor # FIXME: + +#PL_EXPORT(void) PL_cons_functor_v(term_t h, functor_t fd, term_t a0); +PL_cons_functor_v = _lib.PL_cons_functor_v +PL_cons_functor_v.argtypes = [term_t, functor_t, term_t] +PL_cons_functor_v.restype = None + +#PL_EXPORT(void) PL_cons_list(term_t l, term_t h, term_t t); +PL_cons_list = _lib.PL_cons_list +PL_cons_list.argtypes = [term_t, term_t, term_t] +PL_cons_list.restype = None + +# +# term_t PL_exception(qid_t qid) +PL_exception = _lib.PL_exception +PL_exception.argtypes = [qid_t] +PL_exception.restype = term_t +# +PL_register_foreign = _lib.PL_register_foreign + +# +#PL_EXPORT(atom_t) PL_new_atom(const char *s); +PL_new_atom = _lib.PL_new_atom +PL_new_atom.argtypes = [c_char_p] +PL_new_atom.restype = atom_t + +#PL_EXPORT(functor_t) PL_new_functor(atom_t f, int a); +PL_new_functor = _lib.PL_new_functor +PL_new_functor.argtypes = [atom_t, c_int] +PL_new_functor.restype = functor_t + + +# /******************************* +# * COMPARE * +# *******************************/ +# +#PL_EXPORT(int) PL_compare(term_t t1, term_t t2); +#PL_EXPORT(int) PL_same_compound(term_t t1, term_t t2); +PL_compare = _lib.PL_compare +PL_compare.argtypes = [term_t, term_t] +PL_compare.restype = c_int + +PL_same_compound = _lib.PL_same_compound +PL_same_compound.argtypes = [term_t, term_t] +PL_same_compound.restype = c_int + + +# /******************************* +# * RECORDED DATABASE * +# *******************************/ +# +#PL_EXPORT(record_t) PL_record(term_t term); +PL_record = _lib.PL_record +PL_record.argtypes = [term_t] +PL_record.restype = record_t + +#PL_EXPORT(void) PL_recorded(record_t record, term_t term); +PL_recorded = _lib.PL_recorded +PL_recorded.argtypes = [record_t, term_t] +PL_recorded.restype = None + +#PL_EXPORT(void) PL_erase(record_t record); +PL_erase = _lib.PL_erase +PL_erase.argtypes = [record_t] +PL_erase.restype = None + +# +#PL_EXPORT(char *) PL_record_external(term_t t, size_t *size); +#PL_EXPORT(int) PL_recorded_external(const char *rec, term_t term); +#PL_EXPORT(int) PL_erase_external(char *rec); + +PL_new_module = _lib.PL_new_module +PL_new_module.argtypes = [atom_t] +PL_new_module.restype = module_t + +intptr_t = c_long +ssize_t = intptr_t +wint_t = c_uint + +#typedef struct +#{ +# int __count; +# union +# { +# wint_t __wch; +# char __wchb[4]; +# } __value; /* Value so far. */ +#} __mbstate_t; + +class _mbstate_t_value(Union): + _fields_ = [("__wch",wint_t), + ("__wchb",c_char*4)] + +class mbstate_t(Structure): + _fields_ = [("__count",c_int), + ("__value",_mbstate_t_value)] + +# stream related funcs +Sread_function = CFUNCTYPE(ssize_t, c_void_p, c_char_p, c_size_t) +Swrite_function = CFUNCTYPE(ssize_t, c_void_p, c_char_p, c_size_t) +Sseek_function = CFUNCTYPE(c_long, c_void_p, c_long, c_int) +Sseek64_function = CFUNCTYPE(c_int64, c_void_p, c_int64, c_int) +Sclose_function = CFUNCTYPE(c_int, c_void_p) +Scontrol_function = CFUNCTYPE(c_int, c_void_p, c_int, c_void_p) + +# IOLOCK +IOLOCK = c_void_p + +# IOFUNCTIONS +class IOFUNCTIONS(Structure): + _fields_ = [("read",Sread_function), + ("write",Swrite_function), + ("seek",Sseek_function), + ("close",Sclose_function), + ("seek64",Sseek64_function), + ("reserved",intptr_t*2)] + +# IOENC +ENC_UNKNOWN,ENC_OCTET,ENC_ASCII,ENC_ISO_LATIN_1,ENC_ANSI,ENC_UTF8,ENC_UNICODE_BE,ENC_UNICODE_LE,ENC_WCHAR = list(range(9)) +IOENC = c_int + +# IOPOS +class IOPOS(Structure): + _fields_ = [("byteno",c_int64), + ("charno",c_int64), + ("lineno",c_int), + ("linepos",c_int), + ("reserved", intptr_t*2)] + +# IOSTREAM +class IOSTREAM(Structure): + _fields_ = [("bufp",c_char_p), + ("limitp",c_char_p), + ("buffer",c_char_p), + ("unbuffer",c_char_p), + ("lastc",c_int), + ("magic",c_int), + ("bufsize",c_int), + ("flags",c_int), + ("posbuf",IOPOS), + ("position",POINTER(IOPOS)), + ("handle",c_void_p), + ("functions",IOFUNCTIONS), + ("locks",c_int), + ("mutex",IOLOCK), + ("closure_hook",CFUNCTYPE(None, c_void_p)), + ("closure",c_void_p), + ("timeout",c_int), + ("message",c_char_p), + ("encoding",IOENC)] +IOSTREAM._fields_.extend([("tee",IOSTREAM), + ("mbstate",POINTER(mbstate_t)), + ("reserved",intptr_t*6)]) + + + +#PL_EXPORT(IOSTREAM *) Sopen_string(IOSTREAM *s, char *buf, size_t sz, const char *m); +Sopen_string = _lib.Sopen_string +Sopen_string.argtypes = [POINTER(IOSTREAM), c_char_p, c_size_t, c_char_p] +Sopen_string.restype = POINTER(IOSTREAM) + +#PL_EXPORT(int) Sclose(IOSTREAM *s); +Sclose = _lib.Sclose +Sclose.argtypes = [POINTER(IOSTREAM)] + + +#PL_EXPORT(int) PL_unify_stream(term_t t, IOSTREAM *s); +PL_unify_stream = _lib.PL_unify_stream +PL_unify_stream.argtypes = [term_t, POINTER(IOSTREAM)] + +PL_unify_atom_chars = _lib.PL_unify_atom_chars +PL_unify_atom_chars.argtypes = [term_t, c_char_p] +PL_unify_atom_chars.restype = c_int diff --git a/prolog/engine.py b/prolog/engine.py new file mode 100755 index 0000000..9e7340f --- /dev/null +++ b/prolog/engine.py @@ -0,0 +1,182 @@ +#!/usr/bin/python3 + +import re + +from prolog.core import * + +class Atom(object): + __slots__ = 'ref' + + def __init__(self, src=None, ref=None): + if ref != None: + self.ref = ref + else: + self.ref = PL_new_atom(bytes(src, encoding=encoding)) + +class Term(object): + __slots__ = 'ref' + + def __init__(self, src=None, ref=None): + if ref != None: + self.ref = ref + else: + self.ref = PL_new_term_ref() + if isinstance(src, str): + if not PL_chars_to_term(bytes(src, encoding=encoding), self.ref): + raise ValueError("invalid compound term") + elif isinstance(src, int): + PL_put_integer(self.ref, src) + elif isinstance(src, float): + PL_put_float(self.ref, src) + elif isinstance(src, list): + PL_put_nil(self.ref) + for t in src: + PL_cons_list(self.ref, t.ref, self.ref) + elif isinstance(src, tuple) and len(src) == 2: + name = PL_new_atom(bytes(src[0], encoding=encoding)) + args = src[1] + PL_cons_functor_v(self.ref, PL_new_functor(name, len(args)), args.ref) + elif isinstance(src, Atom): + PL_put_atom(self.ref, src.ref) + + def __str__(self): + ptr = c_char_p() + if PL_get_chars(self.ref, byref(ptr), CVT_ALL|CVT_WRITE|BUF_RING): + return str(ptr.value, encoding=encoding) + +class Termv(object): + __slots__ = 'ref', 'size' + + def __init__(self, terms): + self.size = len(terms) + self.ref = PL_new_term_refs(self.size) + for i in range(len(terms)): + PL_put_term(self.ref+i, terms[i].ref) + + def __len__(self): + return self.size + + def __getitem__(self, i): + if not isinstance(i, int) or len(self) == 0 or i < 0 or i >= len(self): + raise TypeError + return Term(ref=self.ref+i) + + def __iter__(self): + for i in range(len(self)): + yield Term(ref=self.ref+i) + return + + def __str__(self): + return '[' + ','.join([str(term) for term in self]) + ']' + +class PrologEngine(object): + def __init__(self): + # initialize Prolog library + args = ['./', '-q', '--nosignals'] + if SWI_HOME_DIR is not None: + args.append('--home={0}'.format(SWI_HOME_DIR)) + s_plargs = len(args) + plargs = (c_char_p*s_plargs)() + for i in range(s_plargs): + plargs[i] = bytes(args[i], encoding) + if not PL_initialise(s_plargs, plargs): + raise EnvironmentError('Could not initialize Prolog environment.' + 'PL_initialise returned {0}'.format(result)) + + # construct needed predicates + self.p = { + 'assertz': PL_predicate(b'assertz', 2, None), + 'erase': PL_predicate(b'erase', 1, None), + 'call_with_depth_limit': PL_predicate(b'call_with_depth_limit', 3, None), + 'call_with_time_limit': PL_predicate(b'call_with_time_limit', 2, None), + 'consult': PL_predicate(b'consult', 1, None), + 'read_term_from_atom': PL_predicate(b'read_term_from_atom', 3, None), + 'safe_goal': PL_predicate(b'safe_goal', 1, None), + 'set_prolog_stack': PL_predicate(b'set_prolog_stack', 2, None) + } + self.err_flags = PL_Q_NODEBUG|PL_Q_CATCH_EXCEPTION + + # database of current asserts; each load gets a new index + self.refs = {} + self.max_index = 0 + + PL_call_predicate(None, self.err_flags, self.p['set_prolog_stack'], + Termv([Term('global'), Term('limit(2*10**9)')]).ref) + PL_call_predicate(None, self.err_flags, self.p['set_prolog_stack'], + Termv([Term('local'), Term('limit(2*10**9)')]).ref) + + def consult(self, filename): + args = Termv([Term(Atom(filename))]) + out = PL_call_predicate(None, self.err_flags, self.p['consult'], args.ref) + return out == 1 + + def load(self, program, module=None): + refs = [] + try: + for rule in re.split('\.(?:\s+|\Z)', program): + if rule == '': + continue + if module != None: + rule = module + ':(' + rule + ')' + args = Termv([Term(rule), Term('Ref')]) + out = PL_call_predicate(None, self.err_flags, self.p['assertz'], args.ref) + refs.append(args[1]) + except: + # unload already loaded rules (hacky, mustfix, todo) + for ref in refs: + args = Termv([ref]) + out = PL_call_predicate(None, self.err_flags, self.p['erase'], args.ref) + return None + + self.max_index += 1 + self.refs[self.max_index] = refs + return self.max_index + + def unload(self, index): + for ref in self.refs[index]: + args = Termv([ref]) + out = PL_call_predicate(None, self.err_flags, self.p['erase'], args.ref) + del self.refs[index] + + def query(self, q, module=None): + if module != None: + q = module + ':(' + q + ')' + + fid = PL_open_foreign_frame() + goal = Term() + var_list = Term() + options = Term([Term(('variable_names', Termv([var_list])))]) + + # parse term + if not PL_call_predicate(None, self.err_flags, self.p['read_term_from_atom'], + Termv([Term(Atom(q)), goal, options]).ref): + return None + # check if goal is safe with currently loaded rules + if not PL_call_predicate(None, self.err_flags, self.p['safe_goal'], + Termv([goal]).ref): + return None + + solutions = [] + #depth_limit = Term(('call_with_depth_limit', Termv([goal, Term(50), Term('_')]))) + qid = PL_open_query(None, self.err_flags, self.p['call_with_time_limit'], + Termv([Term(0.01), goal]).ref) + while len(solutions) < 10: + if PL_next_solution(qid): + solutions += [str(var_list)] + else: + ex = PL_exception(qid) + if ex != None: + ex = Term(ref=PL_exception(qid)) + if ex != None: + message_to_string = PL_predicate(b'message_to_string', 2, None) + args = Termv([ex, Term()]) + + if PL_call_predicate(None, self.err_flags, message_to_string, args.ref): + sys.stderr.write('Error: ' + str(args[1]) + '\n') + else: + sys.stderr.write('Unknown error\n') + break + PL_close_query(qid) + PL_discard_foreign_frame(fid) + + return solutions diff --git a/prolog/lexer.py b/prolog/lexer.py new file mode 100755 index 0000000..008051c --- /dev/null +++ b/prolog/lexer.py @@ -0,0 +1,92 @@ +#!/usr/bin/python3 + +import ply.lex as lex + +# LEXER + +#states = ( +# ('comment', 'exclusive'), +#) + +# tokens; treat operators as names if followed by ( +operators = { + r':-': 'FROM', + r'->': 'IMPLIES', + r'\+': 'NOT', + r'not': 'NOT', + r'=': 'EQU', + r'\=': 'NEQU', + r'==': 'EQ', + r'\==': 'NEQ', + r'=..': 'UNIV', + r'is': 'IS', + r'=:=': 'EQA', + r'=\=': 'NEQA', + r'<': 'LT', + r'=<': 'LE', + r'>': 'GT', + r'>=': 'GE', + r'@<': 'LTL', + r'@=<': 'LEL', + r'@>': 'GTL', + r'@>=': 'GEL', + r'+': 'PLUS', + r'-': 'MINUS', + r'*': 'STAR', + r'/': 'DIV', + r'//': 'IDIV', + r'mod': 'MOD', + r'**': 'POW', + r'.': 'PERIOD', + r',': 'COMMA', + r';': 'SEMI' +} +tokens = list(operators.values()) + [ + 'UINTEGER', 'UREAL', + 'NAME', 'VARIABLE', 'STRING', + 'LBRACKET', 'RBRACKET', 'LPAREN', 'RPAREN', 'PIPE', 'LBRACE', 'RBRACE', + 'INVALID' +] + +# punctuation +t_LBRACKET = r'\[' +t_RBRACKET = r'\]' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_PIPE = r'\|' +t_LBRACE = r'{' +t_RBRACE = r'}' + +t_UINTEGER = r'[0-9]+' +t_UREAL = r'[0-9]+\.[0-9]+([eE][-+]?[0-9]+)?|inf|nan' +t_VARIABLE = r'(_|[A-Z])[a-zA-Z0-9_]*' +t_STRING = r'"(""|\\.|[^\"])*"' + +def t_NAME(t): + r"'(''|\\.|[^\\'])*'|[a-z][a-zA-Z0-9_]*|[-+*/\\^<>=~:.?@#$&]+|!|;|," + if t.lexer.lexpos >= len(t.lexer.lexdata) or t.lexer.lexdata[t.lexer.lexpos] != '(': + t.type = operators.get(t.value, 'NAME') + return t + +t_ignore = ' \t' + +# no support for nested comments yet +def t_comment(t): + r'(/\*(.|\n)*?\*/)|(%.*)' + pass + +def t_newline(t): + r'\n+' + t.lexer.lineno += len(t.value) + +def t_error(t): + # TODO send this to stderr + #print("Illegal character '" + t.value[0] + "'") + t.type = 'INVALID' + t.value = t.value[0] + t.lexer.skip(1) + return t + +lexer = lex.lex() + + diff --git a/prolog/sandbox.pl b/prolog/sandbox.pl new file mode 100644 index 0000000..2ba5af0 --- /dev/null +++ b/prolog/sandbox.pl @@ -0,0 +1,455 @@ +/* Part of SWI-Prolog + + Author: Jan Wielemaker + E-mail: J.Wielemaker@cs.vu.nl + WWW: http://www.swi-prolog.org + Copyright (C): 2009-2013, VU University Amsterdam + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + As a special exception, if you link this library with other files, + compiled with a Free Software compiler, to produce an executable, this + library does not by itself cause the resulting executable to be covered + by the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +:- module(sandbox, + [ safe_goal/1 % :Goal + ]). +:- use_module(library(assoc)). +:- use_module(library(lists)). +:- use_module(library(debug)). +:- use_module(library(apply_macros), [expand_phrase/2]). + +:- multifile + safe_primitive/1, % Goal + safe_meta/2. % Goal, Calls + +% :- debug(sandbox). + +/** Sandboxed Prolog code + +Prolog is a full-featured Turing complete programming language in which +it is easy to write programs that can harm your computer. On the other +hand, Prolog is a logic based _query language_ which can be exploited to +query data interactively from, e.g., the web. This library provides +safe_goal/1, which determines whether it is safe to call its argument. + +@tbd Handling of ^ and // meta predicates +*/ + + +:- meta_predicate + safe_goal(0). + +%% safe_goal(:Goal) is det. +% +% True if calling Goal provides no security risc. This implies +% that: +% +% - The call-graph can be fully expanded. Full expansion *stops* +% if a meta-goal is found for which we cannot determine enough +% details to know which predicate will be called. +% +% - All predicates referenced from the fully expanded are +% whitelisted by the predicate safe_primitive/1 and safe_meta/2. +% +% @error instantiation_error if the analysis encounters a term in +% a callable position that is insufficiently instantiated +% to determine the predicate called. +% @error permission_error(call, sandboxed, Goal) if Goal is in +% the call-tree and not white-listed. + +safe_goal(M:Goal) :- + empty_assoc(Safe0), + safe(Goal, M, [], Safe0, _). + + +%% safe(+Goal, +Module, +Parents, +Safe0, -Safe) is semidet. +% +% Is true if Goal can only call safe code. + +safe(V, _, Parents, _, _) :- + var(V), !, + throw(error(instantiation_error, sandbox(V, Parents))). +safe(M:G, _, Parent, Safe0, Safe) :- !, + safe(G, M, Parent, Safe0, Safe). +safe(G, _, Parents, _, _) :- + debugging(sandbox(show)), + length(Parents, Level), + debug(sandbox(show), '[~D] SAFE ~q?', [Level, G]), + fail. +safe(G, _, _, Safe, Safe) :- + safe_primitive(G), + predicate_property(G, iso), !. +safe(G, M, _, Safe, Safe) :- + ( predicate_property(M:G, imported_from(M2)) + -> true + ; M2 = M + ), + safe_primitive(M2:G), !. +safe(G, M, Parents, Safe0, Safe) :- + safe_meta(G, Called), !, + safe_list(Called, M, Parents, Safe0, Safe). +safe(G, M, Parents, Safe0, Safe) :- + goal_id(M:G, Id, Gen), + ( get_assoc(Id, Safe0, _) + -> Safe = Safe0 + ; put_assoc(Id, Safe0, true, Safe1), + safe_clauses(Gen, M, [Id|Parents], Safe1, Safe) + ). + +safe_clauses(G, M, Parents, Safe0, Safe) :- + predicate_property(M:G, interpreted), !, +% \+ predicate_property(M:G, meta_predicate(_)), !, + def_module(M:G, MD:QG), + findall(Body, clause(MD:QG, Body), Bodies), + safe_list(Bodies, MD, Parents, Safe0, Safe). +safe_clauses(_, _M, [G|Parents], _, _) :- + throw(error(permission_error(call, sandboxed, G), + sandbox(G, Parents))). + +safe_list([], _, _, Safe, Safe). +safe_list([H|T], M, Parents, Safe0, Safe) :- + copy_term(H, H1), + safe(H1, M, Parents, Safe0, Safe1), + safe_list(T, M, Parents, Safe1, Safe). + + +def_module(M:G, MD:QG) :- + predicate_property(M:G, imported_from(MD)), !, + meta_qualify(MD:G, M, QG). +def_module(M:G, M:QG) :- + meta_qualify(M:G, M, QG). + +%% meta_qualify(:G, +M, -QG) is det. +% +% Perform meta-qualification of the goal-argument + +meta_qualify(MD:G, M, QG) :- + predicate_property(MD:G, meta_predicate(Head)), !, + G =.. [Name|Args], + Head =.. [_|Q], + qualify_args(Q, M, Args, QArgs), + QG =.. [Name|QArgs]. +meta_qualify(_:G, _, G). + +qualify_args([], _, [], []). +qualify_args([H|T], M, [A|AT], [Q|QT]) :- + qualify_arg(H, M, A, Q), + qualify_args(T, M, AT, QT). + +qualify_arg(S, M, A, Q) :- + q_arg(S), !, + qualify(A, M, Q). +qualify_arg(_, _, A, A). + +q_arg(I) :- integer(I), !. +q_arg(:). + +qualify(A, M, MZ:Q) :- + strip_module(M:A, MZ, Q). + +%% goal_id(:Goal, -Id, -Gen) is nondet. +% +% Generate an identifier for the goal proven to be safe. We +% first try to prove the most general form of the goal. If +% this fails, we try to prove more specific versions. +% +% @tbd Do step-by-step generalisation instead of the current +% two levels (most general and most specific). +% + +goal_id(M:Goal, M:Id, Gen) :- !, + goal_id(Goal, Id, Gen). +goal_id(Term, _, _) :- + \+ callable(Term), !, fail. +goal_id(Term, Name/Arity, Gen) :- % most general form + functor(Term, Name, Arity), + functor(Gen, Name, Arity). +goal_id(Term, Skolem, Term) :- % most specific form + copy_term(Term, Skolem), + numbervars(Skolem, 0, _). + +%% safe_primitive(?Goal) is nondet. +% +% True if Goal is safe to call (i.e., cannot access dangerous +% system-resources and cannot upset other parts of the Prolog +% process). There are two types of facts. ISO built-ins are +% declared without a module prefix. This is safe because it is not +% allowed to (re-)define these primitives (i.e., give them an +% unsafe implementation) and the way around +% (redefine_system_predicate/1) is unsafe. The other group are +% module-qualified and only match if the system infers that the +% predicate is (or will be) imported from the given module. + +% First, all ISO system predicates that are considered safe + +safe_primitive(true). +safe_primitive(fail). +safe_primitive(repeat). +safe_primitive(!). + % types +safe_primitive(var(_)). +safe_primitive(nonvar(_)). +safe_primitive(integer(_)). +safe_primitive(float(_)). +safe_primitive(atom(_)). +safe_primitive(compound(_)). +safe_primitive(ground(_)). + % ordering +safe_primitive(@>(_,_)). +safe_primitive(@>=(_,_)). +safe_primitive(==(_,_)). +safe_primitive(@<(_,_)). +safe_primitive(@=<(_,_)). +safe_primitive(compare(_,_,_)). +safe_primitive(sort(_,_)). +safe_primitive(keysort(_,_)). + % unification and equivalence +safe_primitive(=(_,_)). +safe_primitive(\=(_,_)). +safe_primitive(\==(_,_)). + % arithmetic +safe_primitive(is(_,_)). +safe_primitive(>(_,_)). +safe_primitive(>=(_,_)). +safe_primitive(=:=(_,_)). +safe_primitive(=\=(_,_)). +safe_primitive(=<(_,_)). +safe_primitive(<(_,_)). + % term-handling +safe_primitive(arg(_,_,_)). +safe_primitive(system:setarg(_,_,_)). +safe_primitive(functor(_,_,_)). +safe_primitive(_ =.. _). +safe_primitive(copy_term(_,_)). +safe_primitive(numbervars(_,_,_)). + % atoms +safe_primitive(atom_concat(_,_,_)). +safe_primitive(atom_chars(_, _)). + % Lists +safe_primitive(length(_,_)). + % exceptions +safe_primitive(throw(_)). + % misc +safe_primitive(current_prolog_flag(_,_)). + +safe_primitive(clause(_,_)). +safe_primitive(asserta(X)) :- safe_assert(X). +safe_primitive(assertz(X)) :- safe_assert(X). +safe_primitive(retract(X)) :- safe_assert(X). +safe_primitive(retractall(X)) :- safe_assert(X). + +% The non-ISO system predicates. These can be redefined, so we must +% be careful to ensure the system ones are used. + +safe_primitive(system:false). +safe_primitive(system:cyclic_term(_)). +safe_primitive(system:msort(_,_)). +safe_primitive(system:between(_,_,_)). +safe_primitive(system:succ(_,_)). +safe_primitive(system:plus(_,_,_)). +safe_primitive(system:term_variables(_,_)). +safe_primitive(system:atom_to_term(_,_,_)). +safe_primitive(system:term_to_atom(_,_)). +safe_primitive(system:atomic_list_concat(_,_,_)). +safe_primitive(system:atomic_list_concat(_,_)). +safe_primitive(system:downcase_atom(_,_)). +safe_primitive(system:upcase_atom(_,_)). +safe_primitive(system:is_list(_)). +safe_primitive(system:memberchk(_,_)). +safe_primitive(system:'$skip_list'(_,_,_)). + % attributes +safe_primitive(system:get_attr(_,_,_)). +safe_primitive(system:del_attr(_,_)). + % globals +safe_primitive(system:b_getval(_,_)). +safe_primitive(system:b_setval(_,_)). +safe_primitive(system:nb_current(_,_)). +safe_primitive(system:assert(X)) :- + safe_assert(X). + +% use_module/1. We only allow for .pl files that are loaded from +% relative paths that do not contain /../ + +safe_primitive(system:use_module(Spec)) :- + ground(Spec), + ( atom(Spec) + -> Path = Spec + ; Spec =.. [_Alias, Segments], + phrase(segments_to_path(Segments), List), + atomic_list_concat(List, Path) + ), + \+ is_absolute_file_name(Path), + \+ sub_atom(Path, _, _, _, '/../'), + absolute_file_name(Spec, AbsFile, + [ access(read), + file_type(prolog), + file_errors(fail) + ]), + file_name_extension(_, Ext, AbsFile), + save_extension(Ext). + +% Other library predicates. + + % rdf +safe_primitive(rdf_db:rdf(_,_,_)). +safe_primitive(rdf_db:rdf(_,_,_,_)). + % http +safe_primitive(http_session:http_session_data(_)). +safe_primitive(http_session:http_session_id(_)). + % random +safe_primitive(random:random(_)). + % porter +safe_primitive(porter_stem:porter_stem(_,_)). +safe_primitive(porter_stem:unaccent_atom(_,_)). +safe_primitive(porter_stem:tokenize_atom(_,_)). +safe_primitive(porter_stem:atom_to_stem_list(_,_)). + +% support predicates for safe_primitive, validating the safety of +% arguments to certain goals. + +segments_to_path(A/B) --> !, + segments_to_path(A), + [/], + segments_to_path(B). +segments_to_path(X) --> + [X]. + +save_extension(pl). + +%% safe_assert(+Term) is semidet. +% +% True if assert(Term) is safe, which means it asserts in the +% current module. Cross-module asserts are considered unsafe. We +% only allow for adding facts. In theory, we could also allow for +% rules if we prove the safety of the body. + +safe_assert(C) :- cyclic_term(C), !, fail. +safe_assert(X) :- var(X), !, fail. +safe_assert(_Head:-_Body) :- !, fail. +safe_assert(_:_) :- !, fail. +safe_assert(_). + +%% safe_meta(+Goal, -Called) is semidet. +% +% True if Goal is a meta-predicate that is considered safe iff all +% elements in Called are safe. + +safe_meta(put_attr(_,M,A), [M:attr_unify_hook(A, _)]) :- + atom(M), !. +safe_meta(Phrase, [Goal]) :- + expand_phrase(Phrase, Goal), !. +safe_meta(Goal, Called) :- + generic_goal(Goal, Gen), + safe_meta(Gen), + findall(C, called(Gen, Goal, C), Called). + +called(Gen, Goal, Called) :- + arg(I, Gen, Spec), + integer(Spec), + arg(I, Goal, Called0), + extend(Spec, Called0, Called). + +generic_goal(G, Gen) :- + functor(G, Name, Arity), + functor(Gen, Name, Arity). + +extend(0, G, G) :- !. +extend(I, G0, G) :- + G0 =.. List, + length(Extra, I), + append(List, Extra, All), + G =.. All. + +safe_meta((0,0)). +safe_meta((0;0)). +safe_meta((0->0)). +safe_meta(forall(0,0)). +safe_meta(catch(0,_,0)). +safe_meta(findall(_,0,_)). +safe_meta(findall(_,0,_,_)). +safe_meta(setof(_,0,_)). % TBD +safe_meta(bagof(_,0,_)). +safe_meta(^(_,0)). +safe_meta(\+(0)). +safe_meta(maplist(1, _)). +safe_meta(maplist(2, _, _)). +safe_meta(maplist(3, _, _, _)). +safe_meta(call(0)). +safe_meta(call(1, _)). +safe_meta(call(2, _, _)). +safe_meta(call(3, _, _, _)). +safe_meta(call(4, _, _, _, _)). +safe_meta(call(5, _, _, _, _, _)). + + + /******************************* + * SAFE COMPILATION HOOKS * + *******************************/ + +:- multifile + prolog:sandbox_allowed_directive/1, + prolog:sandbox_allowed_expansion/1. + +%% prolog:sandbox_allowed_directive(:G) is det. +% +% Throws an exception if G is not considered a safe directive. + +prolog:sandbox_allowed_directive(M:PredAttr) :- + safe_directive(PredAttr), + ( prolog_load_context(module, M) + -> PredAttr =.. [Attr, Preds], + safe_pattr(Preds, Attr) + ; permission_error(directive, sandboxed, (:- M:PredAttr)) + ). +prolog:sandbox_allowed_directive(G) :- + safe_goal(G). + +safe_directive(dynamic(_)). +safe_directive(thread_local(_)). +safe_directive(discontiguous(_)). +safe_directive(public(_)). + +safe_pattr(Var, _) :- + var(Var), !, + instantiation_error(Var). +safe_pattr((A,B), Attr) :- !, + safe_pattr(A, Attr), + safe_pattr(B, Attr). +safe_pattr(M:G, Attr) :- !, + ( atom(M), + prolog_load_context(module, M) + -> true + ; Goal =.. [Attr,M:G], + permission_error(directive, sandboxed, (:- Goal)) + ). +safe_pattr(_, _). + + +%% prolog:sandbox_allowed_expansion(:G) is det. +% +% Throws an exception if G is not considered a safe expansion +% goal. This deals with call-backs from the compiler for +% +% - goal_expansion/2 +% - term_expansion/2 +% - Quasi quotations. + +prolog:sandbox_allowed_expansion(G) :- + safe_goal(G). diff --git a/prolog/util.py b/prolog/util.py new file mode 100644 index 0000000..0aa0b09 --- /dev/null +++ b/prolog/util.py @@ -0,0 +1,114 @@ +#!/usr/bin/python3 + +import math +import re + +from .lexer import lexer + +# new lexer stuff +def tokenize(text): + # feed the troll + lexer.input(text) + # we are not interested in line numbers and absolute positions + return [(t.type, t.value) for t in lexer] + +operators = set([ + 'FROM', 'IMPLIES', 'NOT', + 'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA', + 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL', + 'PLUS', 'MINUS', 'STAR', 'DIV', 'IDIV', 'MOD', + 'POW', 'SEMI' +]) +def stringify(tokens, indent=''): + s = indent + for t in tokens: + if t[0] in operators: + s += ' ' + + if t[0] == 'FROM': + s += ':-\n ' + indent + elif t[0] == 'PERIOD': + s += '.\n' + indent + elif t[0] == 'COMMA': + s += ', ' + elif t[0] in operators: + s += t[1] + ' ' + else: + s += t[1] + + return s + +# return a list of lines in 'code', and a list of rule indexes +def decompose(code): + lines = [] + rules = [] + tokens = tokenize(code) + + line = [] + parens = [] + rule_start = 0 + for t in tokens: + if t[0] == 'SEMI': + lines.append(line[:]) + lines.append([t]) + line = [] + continue + if not parens: + if t[0] in ('PERIOD', 'FROM', 'COMMA'): + if line != []: + lines.append(line[:]) + line = [] + if t[0] == 'PERIOD': + rules.append((rule_start, len(lines))) + rule_start = len(lines) + continue + if t[0] in ('LPAREN', 'LBRACKET', 'LBRACE'): + parens.append(t[0]) + elif parens: + if t[0] == 'RPAREN' and parens[-1] == 'LPAREN': + parens.pop() + elif t[0] == 'RBRACKET' and parens[-1] == 'LBRACKET': + parens.pop() + elif t[0] == 'RBRACE' and parens[-1] == 'LBRACE': + parens.pop() + line.append(t) + return lines, rules + +# pretty-print a list of rules (no support for ; yet) +def compose(lines, rules): + code = '' + for start, end in rules: + for i in range(start, end): + line = lines[i] + if i > start: + code += ' ' + code += stringify(line).replace('\n', ' ') + if i == end-1: + code += '.\n' + elif i == start: + code += ' :-\n' + else: + if line and line[-1][0] != 'SEMI' and i < end-1 and lines[i+1][-1][0] != 'SEMI': + code += ',' + code += '\n' + return code + +# rename variables in order of appearance +def rename_vars(tokens, names={}): + # copy names so we don't fuck it up + names = {k: v for k, v in names.items()} + next_id = len(names) + for i in range(len(tokens)): + if tokens[i][0] == 'PERIOD': + names.clear() + next_id = 0 + elif tokens[i] == ('VARIABLE', '_'): + tokens[i] = ('VARIABLE', 'A' + str(next_id)) + next_id += 1 + elif tokens[i][0] == 'VARIABLE': + cur_name = tokens[i][1] + if cur_name not in names: + names[cur_name] = next_id + next_id += 1 + tokens[i] = ('VARIABLE', 'A' + str(names[cur_name])) + return names -- cgit v1.2.1