Hello all!
Many users want have limits on resourse for jail, for examle cpu and
memory limit.
I`m rewrire original cdjones patch for cpu limit for jail under ULE
scheduler.
So, this work simple.
We count cpu usage for all jails, and if jail use cpu more than have
shared cpu, we move his threads to IDLE queue and return to TIMESHARE in
reverse case.
Jailed thread can use all avaliable cpu time, if system has avaliable cpu.
If system under heavy load, jailed thread can`t use cpu long as ratio
(shared cpu for jail/ all shared cpu) < (estimate usage cpu for jail /
all usage cpu) .
Unjailed thread and interactive thread are not subject to this regime.
Add 2 sysctl
kern.sched.total_sched_shares - total count shares cpu in system,
increase if we have more cpu
kern.sched.flush_estcpu_interval - flush estcpu interval in ticks,
default is 2560 = 2 * 128 * 10, NCPU*stathz*sec, increase if we have
more cpu
For use cpu limit, you need use flag -S NSharedCPU in /usr/sbin/jail
program.
My example jail -S100 /usr/jails/root/ root.kostjn.pht 192.168.0.245
/bin/csh
I`m tested this under 10 simultaneous process in jail and in main
system. test program is infinity cycle an 8 core xeon, use RELENG_7.
First run process in jail, and after in main system.
This one process tracking cpu usage
Jail
root 1052 0.0 0.0 3692 784 p1 RJ 7:38PM 0:00.39 /test.o
root 1052 21.2 0.0 3692 784 p1 RJ 7:38PM 0:02.40 /test.o
root 1052 35.6 0.0 3692 784 p1 RJ 7:38PM 0:04.40 /test.o
root 1052 47.5 0.0 3692 784 p1 RJ 7:38PM 0:06.41 /test.o
root 1052 39.9 0.0 3692 784 p1 RJ 7:38PM 0:06.62 /test.o
root 1052 33.2 0.0 3692 784 p1 RJ 7:38PM 0:06.62 /test.o
root 1052 27.6 0.0 3692 784 p1 RJ 7:38PM 0:06.62 /test.o
root 1052 22.9 0.0 3692 784 p1 RJ 7:38PM 0:06.62 /test.o
root 1052 19.0 0.0 3692 784 p1 RJ 7:38PM 0:06.62 /test.o
root 1052 15.8 0.0 3692 784 p1 RJ 7:38PM 0:06.62 /test.o
root 1052 13.0 0.0 3692 784 p1 RJ 7:38PM 0:06.62 /test.o
root 1052 10.8 0.0 3692 784 p1 RJ 7:38PM 0:06.62 /test.o
root 1052 8.9 0.0 3692 784 p1 RJ 7:38PM 0:06.62 /tes
Main system
root 1088 14.9 0.0 3692 780 p0 R 7:38PM 0:01.57 /root/test.o
root 1088 30.8 0.0 3692 780 p0 R 7:38PM 0:03.60 /root/test.o
root 1088 43.8 0.0 3692 780 p0 R 7:38PM 0:05.60 /root/test.o
root 1088 51.0 0.0 3692 780 p0 R 7:38PM 0:07.25 /root/test.o
root 1088 50.8 0.0 3692 780 p0 R 7:38PM 0:08.28 /root/test.o
root 1088 49.1 0.0 3692 780 p0 R 7:38PM 0:09.21 /root/test.o
root 1088 48.1 0.0 3692 780 p0 R 7:38PM 0:10.24 /root/test.o
root 1088 46.2 0.0 3692 780 p0 R 7:38PM 0:11.17 /root/test.o
root 1088 42.9 0.0 3692 780 p0 R 7:38PM 0:11.95 /root/test.o
So we see, that after run in main system, jailed process can`t usage cpu.
Please communicate me about all problem in this patch.
This is initial version, without tune jail parameter in runtime.
So, this work. But i`m not sure, that is best way.
Attempt increase priority for jailed thread not work, because non
interactive thread (that utilize many cpu) already have small
prioriry(numerical high).
Attempt decrease number ticks in cpu time slice, also not good idea,
because, this increase number context switching on high load.
May be you see other way for do this?
Share you idea.
Thank.
Original cdjones cpu and memory limit patch
http://wiki.freebsd.org/JailResourceLimits
diff -U3 -r --show-c-function --ignore-all-space --ignore-tab-expansion
--ignore-blank-lines sys/kern/kern_jail.c sys.new/kern/kern_jail.c
--- sys/kern/kern_jail.c 2009-03-10 22:33:50.000000000 +0300
+++ sys.new/kern/kern_jail.c 2009-04-17 18:51:34.000000000 +0400
@@ -531,6 +532,7 @@ kern_jail(struct thread *td, struct jail
}
#endif
pr->pr_linux = NULL;
+ pr->pr_sched_shares = j->sched_shares;
pr->pr_securelevel = securelevel;
if (prison_service_slots == 0)
pr->pr_slots = NULL;
diff -U3 -r --show-c-function --ignore-all-space --ignore-tab-expansion
--ignore-blank-lines sys/kern/sched_ule.c sys.new/kern/sched_ule.c
--- sys/kern/sched_ule.c 2009-03-30 23:20:56.000000000 +0400
+++ sys.new/kern/sched_ule.c 2009-04-17 19:10:07.000000000 +0400
@@ -61,6 +61,7 @@ __FBSDID("$FreeBSD: src/sys/kern/sched_u
#include <sys/umtx.h>
#include <sys/vmmeter.h>
#include <sys/cpuset.h>
+#include <sys/jail.h>
#ifdef KTRACE
#include <sys/uio.h>
#include <sys/ktrace.h>
@@ -186,6 +187,22 @@ static int sched_interact = SCHED_INTERA
static int realstathz;
static int tickincr;
static int sched_slice;
+
+#define ESTCPU_SHIFT 10
+/*
+ * estcpu: Global counter ticks from stat
timer
+ * flush_estcpu_interval: Number ticks, after that we to zero estcpu,
+ * flush_estcpu_interval = mp_ncpus*stathz*10,
+ * default 2*128*10 = 2560
+ * total_sched_shares: Total count shares cpu, 1000 per core,
+ * default 2*1000 = 2000
+*/
+
+
+static int estcpu;
+static int flush_estcpu_interval = 2560;
+static int total_sched_shares = 2000;
+
#ifdef PREEMPTION
#ifdef FULL_PREEMPTION
static int preempt_thresh = PRI_MAX_IDLE;
@@ -2200,6 +2219,7 @@ sched_clock(struct thread *td)
{
struct tdq *tdq;
struct td_sched *ts;
+ struct prison *pr = td->td_proc->p_ucred->cr_prison;
THREAD_LOCK_ASSERT(td, MA_OWNED);
tdq = TDQ_SELF();
@@ -2234,6 +2254,20 @@ sched_clock(struct thread *td)
td->td_sched->ts_runtime += tickincr;
sched_interact_update(td);
}
+
+ /* Increase counter and flush if need */
+ estcpu++;
+ if (pr != NULL)
+ pr->pr_estcpu++;
+
+ if (estcpu > flush_estcpu_interval){
+ estcpu = 0;
+ LIST_FOREACH(pr, &allprison, pr_list) {
+ pr->pr_estcpu = 0;
+ }
+ CTR0(KTR_SCHED,"Flush estcpu and pr_estcpu for all jails");
+ }
+
/*
* We used up one time slice.
*/
@@ -2375,6 +2409,8 @@ tdq_add(struct tdq *tdq, struct thread *
int cpumask;
#endif
+ struct prison *pr = td->td_proc->p_ucred->cr_prison;
+
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
KASSERT((td->td_inhibitors == 0),
("sched_add: trying to run inhibited thread"));
@@ -2383,6 +2419,32 @@ tdq_add(struct tdq *tdq, struct thread *
KASSERT(td->td_flags & TDF_INMEM,
("sched_add: thread swapped out"));
+ /* We move thread in IDLE queue if prison estimate cpu more than shares
+ * cpu and thread is not interactive. Use ESTCPU_SHIFT to avoid
+ * rounding away results */
+ if(pr != NULL)
+ CTR6(KTR_SCHED,"pid %i, prison %i, pr_estcpu %i,\
+ estcpu %i shares %i interact
%i",
+ td->td_proc->p_pid,pr->pr_id,pr->pr_estcpu,
+ estcpu, pr->pr_sched_shares,
sched_interact_score(td));
+ if (pr != NULL && pr->pr_sched_shares != 0 &&
+ sched_interact_score(td) > sched_interact &&
+ estcpu != 0 && total_sched_shares != 0){
+
+ if ((pr->pr_estcpu << ESTCPU_SHIFT) / (estcpu) >
+ (pr->pr_sched_shares << ESTCPU_SHIFT) /
(total_sched_shares))
+ {
+ td->td_priority = PRI_MIN_IDLE;
+ td->td_pri_class = PRI_IDLE;
+ CTR2(KTR_SCHED,"prison %i excess cpu limit!!! new pri = %i
",pr->pr_id,td->td_priority);
+
+ } else {
+ CTR1(KTR_SCHED,"prison %i use cpu less limit",pr->pr_id);
+ sched_priority(td);
+ td->td_pri_class = PRI_TIMESHARE;
+ }
+ }
+
ts = td->td_sched;
class = PRI_BASE(td->td_pri_class);
TD_SET_RUNQ(td);
@@ -2746,6 +2808,10 @@ SYSCTL_INT(_kern_sched, OID_AUTO, intera
"Interactivity score threshold");
SYSCTL_INT(_kern_sched, OID_AUTO, preempt_thresh, CTLFLAG_RW, &preempt_thresh,
0,"Min priority for preemption, lower priorities have greater
precedence");
+SYSCTL_INT(_kern_sched, OID_AUTO, flush_estcpu_interval, CTLFLAG_RW,
&flush_estcpu_interval,
+ 0,"Number ticks stat timer after thar we zero estcpu counter");
+SYSCTL_INT(_kern_sched, OID_AUTO, total_sched_shares, CTLFLAG_RW,
&total_sched_shares,
+ 0,"Total number shared cpu for system");
#ifdef SMP
SYSCTL_INT(_kern_sched, OID_AUTO, pick_pri, CTLFLAG_RW, &pick_pri, 0,
"Pick the target cpu based on priority rather than load.");
diff -U3 -r --show-c-function --ignore-all-space --ignore-tab-expansion
--ignore-blank-lines sys/sys/jail.h sys.new/sys/jail.h
--- sys/sys/jail.h 2009-02-18 23:12:08.000000000 +0300
+++ sys.new/sys/jail.h 2009-04-17 18:53:43.000000000 +0400
@@ -31,6 +31,7 @@ struct jail {
uint32_t ip6s;
struct in_addr *ip4;
struct in6_addr *ip6;
+ uint32_t sched_shares;
};
#define JAIL_API_VERSION 2
@@ -132,6 +133,9 @@ struct prison {
struct task pr_task; /* (d) destroy task */
struct mtx pr_mtx;
void **pr_slots; /* (p) additional data
*/
+ uint32_t pr_estcpu; /* (p) cpu usage */
+ uint32_t pr_sched_shares; /* (c) number virtual cpu */
+
int pr_ip4s; /* (c) number of v4 IPs
*/
struct in_addr *pr_ip4; /* (c) v4 IPs of jail */
int pr_ip6s; /* (c) number of v6 IPs
*/
diff -U3 -r --show-c-function --ignore-all-space --ignore-tab-expansion
--ignore-blank-lines usr.sbin/jail/jail.c usr.sbin.new/jail/jail.c
--- usr.sbin/jail/jail.c 2009-02-07 16:19:08.000000000 +0300
+++ usr.sbin.new/jail/jail.c 2009-04-17 18:57:15.000000000 +0400
@@ -83,6 +83,7 @@ main(int argc, char **argv)
int ch, error, i, ngroups, securelevel;
int hflag, iflag, Jflag, lflag, uflag, Uflag;
char path[PATH_MAX], *jailname, *ep, *username, *JidFile, *ip;
+ uint32_t sched_shares = 0;
static char *cleanenv;
const char *shell, *p = NULL;
long ltmp;
@@ -94,7 +95,7 @@ main(int argc, char **argv)
jailname = username = JidFile = cleanenv = NULL;
fp = NULL;
- while ((ch = getopt(argc, argv, "hiln:s:u:U:J:")) != -1) {
+ while ((ch = getopt(argc, argv, "hilS:n:s:u:U:J:")) != -1) {
switch (ch) {
case 'h':
hflag = 1;
@@ -115,6 +116,9 @@ main(int argc, char **argv)
errx(1, "invalid securelevel: `%s'", optarg);
securelevel = ltmp;
break;
+ case 'S':
+ sched_shares = (uint32_t)strtol(optarg,NULL,10);
+ break;
case 'u':
username = optarg;
uflag = 1;
@@ -152,6 +156,8 @@ main(int argc, char **argv)
if (jailname != NULL)
j.jailname = jailname;
+ j.sched_shares = sched_shares;
+
/* Handle IP addresses. If requested resolve hostname too. */
bzero(&hints, sizeof(struct addrinfo));
hints.ai_protocol = IPPROTO_TCP;
@@ -264,9 +270,10 @@ static void
usage(void)
{
- (void)fprintf(stderr, "%s%s%s\n",
+ (void)fprintf(stderr, "%s%s%s%s\n",
"usage: jail [-hi] [-n jailname] [-J jid_file] ",
"[-s securelevel] [-l -u username | -U username] ",
+ "[-S number shared cpu] ",
"path hostname [ip[,..]] command ...");
exit(1);
}
diff -U3 -r --show-c-function --ignore-all-space --ignore-tab-expansion
--ignore-blank-lines sys/kern/kern_jail.c sys.new/kern/kern_jail.c
--- sys/kern/kern_jail.c 2008-11-25 05:59:29.000000000 +0300
+++ sys.new/kern/kern_jail.c 2009-04-17 20:23:40.000000000 +0400
@@ -156,6 +156,7 @@ jail(struct thread *td, struct jail_args
goto e_dropvnref;
pr->pr_ip = j.ip_number;
pr->pr_linux = NULL;
+ pr->pr_sched_shares = j->sched_shares;
pr->pr_securelevel = securelevel;
if (prison_service_slots == 0)
pr->pr_slots = NULL;
diff -U3 -r --show-c-function --ignore-all-space --ignore-tab-expansion
--ignore-blank-lines sys/kern/sched_ule.c sys.new/kern/sched_ule.c
--- sys/kern/sched_ule.c 2008-11-25 05:59:29.000000000 +0300
+++ sys.new/kern/sched_ule.c 2009-04-17 20:23:40.000000000 +0400
@@ -61,6 +61,7 @@ __FBSDID("$FreeBSD: src/sys/kern/sched_u
#include <sys/umtx.h>
#include <sys/vmmeter.h>
#include <sys/cpuset.h>
+#include <sys/jail.h>
#ifdef KTRACE
#include <sys/uio.h>
#include <sys/ktrace.h>
@@ -186,6 +187,22 @@ static int sched_interact = SCHED_INTERA
static int realstathz;
static int tickincr;
static int sched_slice;
+
+#define ESTCPU_SHIFT 10
+/*
+ * estcpu: Global counter ticks from stat
timer
+ * flush_estcpu_interval: Number ticks, after that we to zero estcpu,
+ * flush_estcpu_interval = mp_ncpus*stathz*10,
+ * default 2*128*10 = 2560
+ * total_sched_shares: Total count shares cpu, 1000 per core,
+ * default 2*1000 = 2000
+*/
+
+
+static int estcpu;
+static int flush_estcpu_interval = 2560;
+static int total_sched_shares = 2000;
+
#ifdef PREEMPTION
#ifdef FULL_PREEMPTION
static int preempt_thresh = PRI_MAX_IDLE;
@@ -2200,6 +2217,7 @@ sched_clock(struct thread *td)
{
struct tdq *tdq;
struct td_sched *ts;
+ struct prison *pr = td->td_proc->p_ucred->cr_prison;
THREAD_LOCK_ASSERT(td, MA_OWNED);
tdq = TDQ_SELF();
@@ -2234,6 +2252,20 @@ sched_clock(struct thread *td)
td->td_sched->ts_runtime += tickincr;
sched_interact_update(td);
}
+
+ /* Increase counter and flush if need */
+ estcpu++;
+ if (pr != NULL)
+ pr->pr_estcpu++;
+
+ if (estcpu > flush_estcpu_interval){
+ estcpu = 0;
+ LIST_FOREACH(pr, &allprison, pr_list) {
+ pr->pr_estcpu = 0;
+ }
+ CTR0(KTR_SCHED,"Flush estcpu and pr_estcpu for all jails");
+ }
+
/*
* We used up one time slice.
*/
@@ -2375,6 +2407,8 @@ tdq_add(struct tdq *tdq, struct thread *
int cpumask;
#endif
+ struct prison *pr = td->td_proc->p_ucred->cr_prison;
+
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
KASSERT((td->td_inhibitors == 0),
("sched_add: trying to run inhibited thread"));
@@ -2383,6 +2417,32 @@ tdq_add(struct tdq *tdq, struct thread *
KASSERT(td->td_flags & TDF_INMEM,
("sched_add: thread swapped out"));
+ /* We move thread in IDLE queue if prison estimate cpu more than shares
+ * cpu and thread is not interactive. Use ESTCPU_SHIFT to avoid
+ * rounding away results */
+ if(pr != NULL)
+ CTR6(KTR_SCHED,"pid %i, prison %i, pr_estcpu %i,\
+ estcpu %i shares %i interact
%i",
+ td->td_proc->p_pid,pr->pr_id,pr->pr_estcpu,
+ estcpu, pr->pr_sched_shares,
sched_interact_score(td));
+ if (pr != NULL && pr->pr_sched_shares != 0 &&
+ sched_interact_score(td) > sched_interact &&
+ estcpu != 0 && total_sched_shares != 0){
+
+ if ((pr->pr_estcpu << ESTCPU_SHIFT) / (estcpu) >
+ (pr->pr_sched_shares << ESTCPU_SHIFT) /
(total_sched_shares))
+ {
+ td->td_priority = PRI_MIN_IDLE;
+ td->td_pri_class = PRI_IDLE;
+ CTR2(KTR_SCHED,"prison %i excess cpu limit!!! new pri = %i
",pr->pr_id,td->td_priority);
+
+ } else {
+ CTR1(KTR_SCHED,"prison %i use cpu less limit",pr->pr_id);
+ sched_priority(td);
+ td->td_pri_class = PRI_TIMESHARE;
+ }
+ }
+
ts = td->td_sched;
class = PRI_BASE(td->td_pri_class);
TD_SET_RUNQ(td);
@@ -2741,6 +2801,10 @@ SYSCTL_INT(_kern_sched, OID_AUTO, intera
"Interactivity score threshold");
SYSCTL_INT(_kern_sched, OID_AUTO, preempt_thresh, CTLFLAG_RW, &preempt_thresh,
0,"Min priority for preemption, lower priorities have greater
precedence");
+SYSCTL_INT(_kern_sched, OID_AUTO, flush_estcpu_interval, CTLFLAG_RW,
&flush_estcpu_interval,
+ 0,"Number ticks stat timer after thar we zero estcpu counter");
+SYSCTL_INT(_kern_sched, OID_AUTO, total_sched_shares, CTLFLAG_RW,
&total_sched_shares,
+ 0,"Total number shared cpu for system");
#ifdef SMP
SYSCTL_INT(_kern_sched, OID_AUTO, pick_pri, CTLFLAG_RW, &pick_pri, 0,
"Pick the target cpu based on priority rather than load.");
diff -U3 -r --show-c-function --ignore-all-space --ignore-tab-expansion
--ignore-blank-lines sys/sys/jail.h sys.new/sys/jail.h
--- sys/sys/jail.h 2008-11-25 05:59:29.000000000 +0300
+++ sys.new/sys/jail.h 2009-04-17 20:26:54.000000000 +0400
@@ -18,6 +18,7 @@ struct jail {
char *path;
char *hostname;
u_int32_t ip_number;
+ uint32_t sched_shares;
};
struct xprison {
@@ -74,6 +75,8 @@ struct prison {
struct task pr_task; /* (d) destroy task */
struct mtx pr_mtx;
void **pr_slots; /* (p) additional data
*/
+ uint32_t pr_estcpu; /* (p) cpu usage */
+ uint32_t pr_sched_shares; /* (c) number virtual cpu */
};
#endif /* _KERNEL || _WANT_PRISON */
diff -U3 -r --show-c-function --ignore-all-space --ignore-tab-expansion
--ignore-blank-lines usr.sbin/jail/jail.c usr.sbin.new/jail/jail.c
--- usr.sbin/jail/jail.c 2008-11-25 05:59:29.000000000 +0300
+++ usr.sbin.new/jail/jail.c 2009-04-17 20:31:17.000000000 +0400
@@ -57,6 +57,7 @@ main(int argc, char **argv)
gid_t groups[NGROUPS];
int ch, i, iflag, Jflag, lflag, ngroups, securelevel, uflag, Uflag;
char path[PATH_MAX], *ep, *username, *JidFile;
+ uint32_t sched_shares = 0;
static char *cleanenv;
const char *shell, *p = NULL;
long ltmp;
@@ -67,7 +68,7 @@ main(int argc, char **argv)
username = JidFile = cleanenv = NULL;
fp = NULL;
- while ((ch = getopt(argc, argv, "ils:u:U:J:")) != -1) {
+ while ((ch = getopt(argc, argv, "ilS:s:u:U:J:")) != -1) {
switch (ch) {
case 'i':
iflag = 1;
@@ -82,6 +83,9 @@ main(int argc, char **argv)
errx(1, "invalid securelevel: `%s'", optarg);
securelevel = ltmp;
break;
+ case 'S':
+ sched_shares = (uint32_t)strtol(optarg,NULL,10);
+ break;
case 'u':
username = optarg;
uflag = 1;
@@ -115,6 +119,7 @@ main(int argc, char **argv)
j.version = 0;
j.path = path;
j.hostname = argv[1];
+ j.sched_shares = sched_shares;
if (inet_aton(argv[2], &in) == 0)
errx(1, "Could not make sense of ip-number: %s", argv[2]);
j.ip_number = ntohl(in.s_addr);
@@ -182,9 +187,10 @@ static void
usage(void)
{
- (void)fprintf(stderr, "%s%s%s\n",
+ (void)fprintf(stderr, "%s%s%s%s\n",
"usage: jail [-i] [-J jid_file] [-s securelevel] [-l -u ",
"username | -U username]",
+ "[-S number shared cpu] ",
" path hostname ip-number command ...");
exit(1);
}
_______________________________________________
freebsd-jail@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-jail
To unsubscribe, send any mail to "freebsd-jail-unsubscr...@freebsd.org"