Author: Armin Rigo <[email protected]>
Branch: sandbox-lib
Changeset: r83230:1adffa44a320
Date: 2016-03-21 20:21 +0100
http://bitbucket.org/pypy/pypy/changeset/1adffa44a320/
Log: in-progress: write the API that I think would make sense, with lots
of comments
diff --git a/rpython/translator/rsandbox/rsandbox.py
b/rpython/translator/rsandbox/rsandbox.py
--- a/rpython/translator/rsandbox/rsandbox.py
+++ b/rpython/translator/rsandbox/rsandbox.py
@@ -24,50 +24,21 @@
assert lltype.typeOf(ll_func) == FUNC, (
"seen two sandboxed functions called %r with different "
"signatures:\n %r\n %r" % (fnname, FUNC, lltype.typeOf(ll_func)))
- return 'rsandbox_' + fnname
+ return 'rsandbox_fnptr_' + fnname
def add_sandbox_files(database, eci, targetdir):
- c_header = ['''
-#ifndef _RSANDBOX_H_
-#define _RSANDBOX_H_
+ c_part_header = py.path.local(__file__).join('..', 'src', 'part.h').read()
+ c_part_source = py.path.local(__file__).join('..', 'src', 'part.c').read()
+ c_header = [c_part_header]
+ c_source = [c_part_source]
-#ifndef RPY_SANDBOX_EXPORTED
-/* Common definitions when including this file from an external C project */
-
-#include <stdlib.h>
-#include <sys/utsname.h>
-
-#define RPY_SANDBOX_EXPORTED extern
-
-typedef long Signed;
-typedef unsigned long Unsigned;
-
-#endif
-
-/* The list of 'rsandbox_*' function pointers is automatically
- generated. Most of these function pointers are initialized to
- point to a function that aborts the sandboxed execution. The
- sandboxed program cannot, by default, use any of them. A few
- exceptions are provided, where the default implementation returns a
- safe default; for example rsandbox_getenv().
-*/
-''']
- c_source = ['''
-#include "common_header.h"
-#include "rsandbox.h"
-#include <stdlib.h>
-
-''']
-
- default_h = py.path.local(__file__).join('..', 'default.h').read()
- c_source.append(default_h)
- present = set(re.findall(r'\brsand_def_([a-zA-Z0-9_]+)[(]', default_h))
+ present = set(re.findall(r'\brsand_def_([a-zA-Z0-9_]+)[(]', c_part_source))
fnnames = database._sandboxlib_fnnames
for fnname in sorted(fnnames):
FUNC = fnnames[fnname]
- rsandboxname = 'rsandbox_' + fnname
+ rsandboxname = 'rsandbox_fnptr_' + fnname
vardecl = cdecl(database.gettype(lltype.Ptr(FUNC)), rsandboxname)
c_header.append('RPY_SANDBOX_EXPORTED %s;\n' % (vardecl,))
diff --git a/rpython/translator/rsandbox/default.h
b/rpython/translator/rsandbox/src/part.c
rename from rpython/translator/rsandbox/default.h
rename to rpython/translator/rsandbox/src/part.c
--- a/rpython/translator/rsandbox/default.h
+++ b/rpython/translator/rsandbox/src/part.c
@@ -1,4 +1,8 @@
-/*** translator/rsandbox/default.h ***/
+/*** rpython/translator/rsandbox/src/part.c ***/
+
+#include "common_header.h"
+#include "rsandbox.h"
+#include <stdlib.h>
/* This is called by most default implementations of 'rsandbox_*' */
diff --git a/rpython/translator/rsandbox/src/part.h
b/rpython/translator/rsandbox/src/part.h
new file mode 100644
--- /dev/null
+++ b/rpython/translator/rsandbox/src/part.h
@@ -0,0 +1,172 @@
+/*** rpython/translator/rsandbox/src/part.h ***/
+
+#ifndef _RSANDBOX_H_
+#define _RSANDBOX_H_
+
+#ifndef RPY_SANDBOX_EXPORTED
+/* Common definitions when including this file from an external C project */
+
+#include <stdlib.h>
+#include <sys/utsname.h>
+
+#define RPY_SANDBOX_EXPORTED extern
+
+typedef long Signed;
+typedef unsigned long Unsigned;
+
+#endif
+
+
+/* ***********************************************************
+
+ WARNING: Python is not meant to be a safe language. For example,
+ think about making a custom code object with a random string and
+ trying to interpret that. A sandboxed PyPy contains extra safety
+ checks that can detect such invalid operations before they cause
+ problems. When such a case is detected, THE WHOLE PROCESS IS
+ ABORTED right now. In the future, there should be a setjmp/longjmp
+ alternative to this, but the details need a bit of care (e.g. it
+ would still create memory leaks).
+
+ For now, you have to accept that the process can be aborted if
+ given malicious code. Also, running several Python sources from
+ different sources in the same process is not recommended---there is
+ only one global state: malicious code can easily mangle the state
+ of the Python interpreter, influencing subsequent runs. Unless you
+ are fine with both issues, you MUST run Python from subprocesses,
+ not from your main program.
+
+ Multi-threading issues: DO NOT USE FROM SEVERAL THREADS AT THE SAME
+ TIME! You need a lock. If you use subprocesses, they will likely
+ be single-threaded anyway. (This issue might be fixed in the
+ future. Note that the sandboxed Python itself doesn't have the
+ possibility of starting threads.)
+*/
+
+
+/* This configures the maximum heap size allowed to the Python
+ interpreter. It only accounts for GC-tracked objects, so actual
+ memory usage can be larger. (It should hopefully never be more
+ than about twice the value, but for the paranoid, you should not
+ use this. You should do setrlimit() to bound the total RAM usage
+ of the subprocess. Similarly, you have no direct way to bound the
+ amount of time spent inside Python, but it is easy to set up an
+ alarm signal with alarm().)
+*/
+void rsandbox_set_heap_size(size_t maximum);
+
+
+/* Entry point: rsandbox_open() loads the given source code inside a
+ new Python module. The source code should define the interesting
+ Python functions, but cannot actually compute stuff yet: you cannot
+ pass here arguments or return values. rsandbox_open() returns a
+ module pointer if execution succeeds, or NULL if it gets an
+ exception. The pointer can be used in rsandbox_call(). It can
+ optionally be freed with rsandbox_close().
+
+ You can use this function with source code that is assembled from
+ untrusted sources, but it is recommended to pass a constant string
+ here. You can pass extra arguments with 'rpython_push_*()',
+ declared below; they are visible as 'args[0]', 'args[1]', and so
+ on. This allows you to do things like this:
+
+ rsandbox_module_t *compile_expression(const char *expression)
+ {
+ rsandbox_push_string(expression); // 'expression' is untrusted
+ return rsandbox_open(
+ "code = compile(args[0], '<untrusted>', 'eval')\n"
+ "def evaluate(n):\n"
+ " return eval(code, {'n': n})\n")
+ }
+
+ long safely_evaluate(rsandbox_module_t *mod, long n_value)
+ {
+ rsandbox_push_long(n_value);
+ rsandbox_call(mod, "evaluate"); // ignore exceptions
+ return rsandbox_result_long(); // result; if any problem, will be 0
+ }
+*/
+typedef struct rsandbox_module_s rsandbox_module_t;
+RPY_SANDBOX_EXPORTED rsandbox_module_t *rsandbox_open(const char *src);
+RPY_SANDBOX_EXPORTED void rsandbox_close(rsandbox_module_t *mod);
+
+/* To call one of the Python functions defined in the module, first
+ push the arguments one by one with rsandbox_push_*(), then call
+ rsandbox_call(). If an exception occur, -1 is returned.
+
+ rsandbox_push_rw_buffer() is a way to pass read-write data. From
+ the Python side, this will pass a read-write buffer object. After
+ rsandbox_call() returns, the buffer becomes invalid.
+ (rsandbox_push_rw_buffer() is not very useful for rsandbox_open():
+ the buffer becomes invalid as soon as rsandbox_open() returns.)
+*/
+RPY_SANDBOX_EXPORTED void rsandbox_push_long(long);
+RPY_SANDBOX_EXPORTED void rsandbox_push_double(double);
+RPY_SANDBOX_EXPORTED void rsandbox_push_string(const char *);
+RPY_SANDBOX_EXPORTED void rsandbox_push_string_and_size(const char *, size_t);
+RPY_SANDBOX_EXPORTED void rsandbox_push_none(void);
+RPY_SANDBOX_EXPORTED void rsandbox_push_rw_buffer(char *, size_t);
+
+RPY_SANDBOX_EXPORTED int rsandbox_call(rsandbox_module_t *mod,
+ const char *func_name);
+
+/* Returns the result of the previous rsandbox_call() if the Python
+ function returned an 'int' object. Otherwise, returns 0. (You
+ MUST NOT assume anything about the 'long': careful with malicious
+ code returning results like sys.maxint or -sys.maxint-1.) */
+RPY_SANDBOX_EXPORTED long rsandbox_result_long(void);
+
+/* Returns the result of the previous rsandbox_call() if the Python
+ function returned an 'int' or 'float' object. Otherwise, 0.0.
+ (You MUST NOT assume anything about the 'double': careful with
+ malicious code returning results like inf, nan, or 1e-323.) */
+RPY_SANDBOX_EXPORTED double rsandbox_result_double(void);
+
+/* Returns the length of the string returned in the previous
+ rsandbox_call(). If it was not a string, returns 0. */
+RPY_SANDBOX_EXPORTED size_t rsandbox_result_string_length(void);
+
+/* Returns the data in the string. This function always writes an
+ additional '\0'. If the string is longer than 'bufsize-1', it is
+ truncated to 'bufsize-1' characters.
+
+ For small human-readable strings you can call
+ rsandbox_result_string() with some fixed maximum size. You get a
+ regular null-terminated 'char *' string. (If it contains embedded
+ '\0', it will appear truncated; if the Python function did not
+ return a string at all, it will be completely empty; but anyway
+ you MUST be ready to handle any malformed string at all.)
+
+ For strings of larger sizes or strings that can meaningfully
+ contain embedded '\0', you should allocate a 'buf' of size
+ 'rsandbox_result_string_length() + 1'.
+
+ To repeat: Be careful when reading strings from Python! They can
+ contain any character, so be sure to escape them correctly (or
+ reject them outright) if, for example, you are passing them
+ further. Malicious code can return any string. Your code must be
+ ready for anything. Err on the side of caution.
+*/
+RPY_SANDBOX_EXPORTED void rsandbox_result_string(char *buf, size_t bufsize);
+
+/* When an exception occurred in rsandbox_open() or rsandbox_call(),
+ return more information as a string. Same rules as
+ rsandbox_result_string(). (Careful, you MUST NOT assume that the
+ string is well-formed: malicious code can make it contain anything.
+ If you are copying it to a web page, for example, then a good idea
+ is to replace any character not in a whitelist with '?'.)
+*/
+RPY_SANDBOX_EXPORTED void rsandbox_last_exception(char *buf, size_t bufsize,
+ int include_traceback);
+
+
+/************************************************************/
+
+
+/* The list of 'rsandbox_fnptr_*' function pointers is automatically
+ generated. Most of these function pointers are initialized to
+ point to a function that aborts the sandboxed execution. The
+ sandboxed program cannot, by default, use any of them. A few
+ exceptions are provided, where the default implementation returns a
+ safe default (for example rsandbox_fnptr_getenv()).
+*/
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit