This patch (linux-2.4.2-csa_module.patch) applies against the 2.4.2 kernel with the PAGG linux-2.4.2-pagg.patch, linux-2.4.2-pagg-job.patch, and linux-2.4.2-csa.patch patches applied first. This CSA patch supports i386 and ia64 platforms. This patch provides job accounting code which can be compiled directly into the kernel or compiled as a module. The following record types are written: - start-of-job record (when a new job is initiated) - end-of-job record (when the last process within a job exits) - end-of-process record (when a process within a job exits) - configuration record (when the CSA job accounting configuration changes) - daemon record (daemon specific events) These records are processed and grouped by job id outside of the kernel. CSA can be configured to limit the amount and types of records written, including defining thresholds for cpu and memory usage (don't write a record if process usage is below these values). For further information about CSA job accounting, please read the overview and kernel changes documents available at oss.sgi.com/projects/csa. There is a download link at that site to get the CSA kernel patches, commands packages in rpm and tarball format, and an Admin Guide. For further information about PAGG and jobs, please see the oss.sgi.com/projects/pagg web site. ---- Marlys Kohnke Silicon Graphics Inc. [EMAIL PROTECTED] 655F Lone Oak Drive (651)683-5324 Eagan, MN 55121 linux-2.4.2-csa_module.patch follows: ------------------------------------------------------------------------------------- diff -urN linux-2.4.2.csa+csa-patch/Documentation/Configure.help linux-2.4.2.csa/Documentation/Configure.help --- linux-2.4.2.csa+csa-patch/Documentation/Configure.help Mon Mar 5 11:22:24 2001 +++ linux-2.4.2.csa/Documentation/Configure.help Mon Mar 5 12:02:31 2001 @@ -2758,13 +2758,15 @@ within system boot uptime periods. These accounting records are then used to produce reports and charge fees to users. - Say Y here if you want job level accounting to be done by the - kernel. The CSA module needs to be loaded to write the - accounting records to a file. The CSA commands and scripts - package needs to be installed to process the CSA accounting - records. See http://oss.sgi.com/projects/csa for further - information about CSA and download instructions for the CSA - module and commands package. + Say Y here if you want job level accounting to be compiled into + the kernel. Say M here if you want the writing of accounting + records portion of this feature to be a loadable module. Say + N here if you do not want job level accounting (the default). + + The CSA commands and scripts package needs to be installed to + process the CSA accounting records. See http://oss.sgi.com/projects/csa + for further information about CSA and download instructions for the CSA + commands package and documentation. Sysctl support CONFIG_SYSCTL diff -urN linux-2.4.2.csa+csa-patch/arch/i386/config.in linux-2.4.2.csa/arch/i386/config.in --- linux-2.4.2.csa+csa-patch/arch/i386/config.in Mon Mar 5 11:22:24 2001 +++ linux-2.4.2.csa/arch/i386/config.in Mon Mar 5 12:02:31 2001 @@ -217,12 +217,12 @@ bool 'System V IPC' CONFIG_SYSVIPC bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT -bool 'CSA Job Accounting' CONFIG_CSA_JOB_ACCT bool 'Sysctl support' CONFIG_SYSCTL bool 'Support for process aggregates (PAGGs)' CONFIG_PAGG if [ "$CONFIG_PAGG" = "y" ] ; then tristate ' Process aggregate based jobs' CONFIG_PAGG_JOB fi +dep_tristate ' CSA Job Accounting' CONFIG_CSA_JOB_ACCT $CONFIG_PAGG_JOB if [ "$CONFIG_PROC_FS" = "y" ]; then choice 'Kernel core (/proc/kcore) format' \ "ELF CONFIG_KCORE_ELF \ diff -urN linux-2.4.2.csa+csa-patch/arch/ia64/config.in linux-2.4.2.csa/arch/ia64/config.in --- linux-2.4.2.csa+csa-patch/arch/ia64/config.in Mon Mar 5 11:22:24 2001 +++ linux-2.4.2.csa/arch/ia64/config.in Mon Mar 5 12:02:31 2001 @@ -92,12 +92,12 @@ bool 'Networking support' CONFIG_NET bool 'System V IPC' CONFIG_SYSVIPC bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT -bool 'CSA Job Accounting' CONFIG_CSA_JOB_ACCT bool 'Sysctl support' CONFIG_SYSCTL bool 'Support for process aggregates (PAGGs)' CONFIG_PAGG if [ "$CONFIG_PAGG" = "y" ] ; then tristate ' Process aggregate based jobs' CONFIG_PAGG_JOB fi +dep_tristate ' CSA Job Accounting' CONFIG_CSA_JOB_ACCT $CONFIG_PAGG_JOB tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC diff -urN linux-2.4.2.csa+csa-patch/drivers/misc/Makefile linux-2.4.2.csa/drivers/misc/Makefile --- linux-2.4.2.csa+csa-patch/drivers/misc/Makefile Mon Mar 5 11:19:16 2001 +++ linux-2.4.2.csa/drivers/misc/Makefile Mon Mar 5 12:02:31 2001 @@ -14,6 +14,7 @@ export-objs := job.o obj-$(CONFIG_PAGG_JOB) += job.o +obj-$(CONFIG_CSA_JOB_ACCT) += csa_job_acct.o include $(TOPDIR)/Rules.make diff -urN linux-2.4.2.csa+csa-patch/drivers/misc/csa_job_acct.c linux-2.4.2.csa/drivers/misc/csa_job_acct.c --- linux-2.4.2.csa+csa-patch/drivers/misc/csa_job_acct.c Wed Dec 31 18:00:00 1969 +++ linux-2.4.2.csa/drivers/misc/csa_job_acct.c Mon Mar 5 12:02:31 2001 @@ -0,0 +1,1479 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc and LANL All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + */ + +/* + * Description: + * This file, drivers/misc/csa_job_acct.c, contains the procedures that + * do the real work of configuring CSA, writing CSA accounting + * records, and processing the acctctl syscall. This code can + * either be compiled directly into the kernel or compiled as + * a loadable module. + * + * During initialization, this code registers procedure callbacks + * with the PAGG job code and the kernel/csa.c code. + * + * Author: + * Marlys Kohnke ([EMAIL PROTECTED]) + * + * Contributors: + * + * Changes: + * January 31, 2001 (kohnke) Changed to use semaphores rather than + * spinlocks. Was seeing a spinlock deadlock sometimes when an accounting + * record was being written to disk with 2.4.0 (didn't happen with + * 2.4.0-test7). + * + * February 2, 2001 (kohnke) Changed to handle being compiled directly + * into the kernel, not just compiled as a loadable module. Renamed + * init_module() as init_csa() and cleanup_module() as cleanup_csa(). + * Added calls to module_init() and module_exit(). + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/file.h> +#include <linux/utsname.h> +#include <asm/uaccess.h> +#include <asm/semaphore.h> + +#include <linux/csa_internal.h> +#include <linux/csa.h> +#include <linux/job.h> + +int csa_jstart(int, void *); +int csa_jexit(int, void *); +int do_acctctl(int, void *); +void do_csa_acct(int, struct task_struct *); +static int csa_modify_buf(char *, struct acctcsa *, struct acctmem *, + struct acctio *, int, int); +static int csa_write(char *, int, int, uint64_t, int, job_csa_t *); +static void csa_config_make(ac_eventtype, struct acctcfg *); +static int csa_config_write(ac_eventtype,struct file *); +static void csa_header(struct achead *, int, int, int); +static long int sc_CLK(long int); + +#if defined __ia64__ +#define JID_ERR1 "do_csa_acct: No job table entry for jid 0x%lx.\n" +#define JID_ERR2 "csa user job accounting write error %d, jid 0x%lx\n" +#define JID_ERR3 "Can't disable csa user job accounting jid 0x%lx\n" +#define JID_ERR4 "csa user job accounting disabled, jid 0x%lx\n" +#else +#define JID_ERR1 "do_csa_acct: No job table entry for jid 0x%llx.\n" +#define JID_ERR2 "csa user job accounting write error %d, jid 0x%llx\n" +#define JID_ERR3 "Can't disable csa user job accounting jid 0x%llx\n" +#define JID_ERR4 "csa user job accounting disabled, jid 0x%llx\n" +#endif + +/* this defines can be removed once they're available in kernel header files */ +#define USEC_PER_SEC 1000000L /* number of usecs for 1 second */ +#define USEC_PER_TICK (USEC_PER_SEC/HZ) +#define NBPC PAGE_SIZE /* Number of bytes per click */ +#define ctob(x) ((uint64_t)(x)*NBPC) + + +static struct file *csa_acctvp = (struct file *)NULL; +static time_t boottime = 0; + +struct timeval acct_now; /* present time (sec, usec) */ + +static DECLARE_MUTEX(csa_sem); +static DECLARE_MUTEX(csa_write_sem); + +static int csa_flag = 0; /* accounting start state flag */ +char csa_path[ACCT_PATH] = ""; /* current accounting file path name */ +char new_path[ACCT_PATH] = ""; /* new accounting file path name */ + +static job_acctmod_t csa_job_callbacks = { + JOB_ACCT_CSA, + csa_jstart, + csa_jexit, + THIS_MODULE +}; +static struct csa_module_s csa_callbacks = { + do_acctctl, + do_csa_acct, + THIS_MODULE +}; + +/* modify this when changes are made to ac_kdrcd in csa.h */ +char *acct_dmd_name[ACCT_MAXKDS] = + {"CSA", + "JOB", + "ASH", + "NQS", + "WORKLOAD MGMT", + "TAPE", + "DATA MIGRATION", + "SOCKET", + "SITE1", + "SITE2" }; + +typedef enum { + A_SYS, /* system accounting action (0) */ + A_CJA, /* Job accounting action (1) */ + A_DMD, /* daemon accounting action (2) */ + A_MAX} a_fnc; + +struct actstat acct_dmd[ACCT_MAXKDS][A_MAX]; +struct actstat acct_rcd[ACCT_MAXRCDS-ACCT_RCDS][A_MAX]; + +/* Initialize the CSA accounting state information. */ +#define INIT_DMD(t, i, s, p) acct_dmd[i][t].ac_ind = i; \ + acct_dmd[i][t].ac_state = s; \ + acct_dmd[i][t].ac_param = p; +#define INIT_RCD(t, i, s, p) acct_rcd[i-ACCT_RCDS][t].ac_ind = i; \ + acct_rcd[i-ACCT_RCDS][t].ac_state = s; \ + acct_rcd[i-ACCT_RCDS][t].ac_param = p; + +/* + * register procedure callbacks with the kernel/csa.c CSA + * code and with the PAGG job code + */ +static int __init +init_csa(void) +{ + int retval = 0; + + /* + * register callbacks with the CSA kernel/csa.c code to + * process the acctctl syscall and write end-of-process accounting + * records. + */ + retval = register_csa(&csa_callbacks); + if (retval != 0) { + /* no point in continuing */ + return retval; + } + /* + * register callbacks with the PAGG job code to process + * start-of-job and end-of-job accounting records. If this is a + * module, this registration will also increment the job module + * use count so the job module won't be unloaded out from under + * the CSA module. + */ + retval = job_register_acct(&csa_job_callbacks); + return retval; +} + +/* + * Do module cleanup before the module is removed; unregister + * procedure callbacks with the kernel non-module CSA code and + * with the PAGG job module (which decrements the job module use count). + */ +static void __exit +cleanup_csa(void) +{ + int retval = 0; + + unregister_csa(); + + retval = job_unregister_acct(&csa_job_callbacks); + if (retval < 0) { + printk(KERN_ERR "CSA module can't unregister with job module." + "Continuing with CSA module cleanup.\n"); + } + return; +} + +/* + * Initialize the CSA accounting state table. + * Modify this when changes are made to ac_kdrcd in csa.h + * + */ +static void +csa_init_acct(int flag) +{ + csa_flag = flag; + + boottime = xtime.tv_sec - (jiffies / HZ); + + /* Initialize system accounting states. */ + INIT_DMD(A_SYS, ACCT_KERN_CSA, ACS_OFF, 0); + INIT_DMD(A_SYS, ACCT_KERN_JOB_PROC, ACS_OFF, 0); + INIT_DMD(A_SYS, ACCT_KERN_ASH, ACS_OFF, 0); + INIT_DMD(A_SYS, ACCT_DMD_NQS, ACS_OFF, 0); + INIT_DMD(A_SYS, ACCT_DMD_WKMG, ACS_OFF, 0); + INIT_DMD(A_SYS, ACCT_DMD_TAPE, ACS_OFF, 0); + INIT_DMD(A_SYS, ACCT_DMD_SOCKET, ACS_OFF, 0); + INIT_DMD(A_SYS, ACCT_DMD_DMIG, ACS_OFF, 0); + INIT_DMD(A_SYS, ACCT_DMD_SITE1, ACS_OFF, 0); + INIT_DMD(A_SYS, ACCT_DMD_SITE2, ACS_OFF, 0); + + INIT_RCD(A_SYS, ACCT_RCD_MPPDET, ACS_OFF, 0); + INIT_RCD(A_SYS, ACCT_RCD_MEM, ACS_OFF, 0); + INIT_RCD(A_SYS, ACCT_RCD_IO, ACS_OFF, 0); + INIT_RCD(A_SYS, ACCT_RCD_MT, ACS_OFF, 0); + INIT_RCD(A_SYS, ACCT_RCD_MPP, ACS_OFF, 0); + INIT_RCD(A_SYS, ACCT_THD_MEM, ACS_OFF, 0); + INIT_RCD(A_SYS, ACCT_THD_TIME, ACS_OFF, 0); + INIT_RCD(A_SYS, ACCT_RCD_INCACCT, ACS_OFF, 0); + INIT_RCD(A_SYS, ACCT_RCD_APPACCT, ACS_OFF, 0); + INIT_RCD(A_SYS, ACCT_RCD_SITE1, ACS_OFF, 0); + INIT_RCD(A_SYS, ACCT_RCD_SITE2, ACS_OFF, 0); + + return; +} + +/* + * convert ticks into microseconds; necessary kernel math ops not + * available on 32-bit systems, so can't use uint64_t + */ +static long int +sc_CLK(long int clock) +{ + long int sec, split; + + sec = clock / HZ; + split = (clock % HZ) * 1000000 / HZ; + + return ((sec * 1000000) + split); +} + +/* Initialize CSA accounting header. */ +static void +csa_header(struct achead *head, int revision, int type, int size) +{ + head->ah_magic = ACCT_MAGIC; + head->ah_revision = revision; + head->ah_type = type; + head->ah_flag = 0; + head->ah_size = size; + + return; +} + +/* + * Create a CSA end-of-process accounting record and write it to + * appropriate file(s) + */ +void +do_csa_acct(int exitcode, struct task_struct *p) +{ + char acctent[sizeof(struct acctcsa) + + sizeof(struct acctmem) + + sizeof(struct acctio) ]; + char modacctent[sizeof(struct acctcsa) + + sizeof(struct acctmem) + + sizeof(struct acctio) ]; + struct acctcsa *csa = NULL; + struct acctmem *mem = NULL; + struct acctio *io = NULL; + struct achead *hdr1, *hdr2; + char *cb = acctent; + job_csa_t job_acctbuf; + uint64_t jid = 0; + int len = 0; + int csa_enabled = 0; + int ja_enabled = 0; + int io_enabled = 0; + int mem_enabled = 0; + int retval = 0; + uint64_t memtime; + + if (p == NULL) { + printk(KERN_ERR "do_csa_acct: CSA null task pointer\n"); + return; + } + jid = job_getjid(p); + if (jid <= 0) { + /* no job table entry; not all processes are part of a job */ + return; + } + memset(&job_acctbuf, 0, sizeof(job_acctbuf)); + retval = job_getacct(jid, JOB_ACCT_CSA, &job_acctbuf); + if (retval != 0) { + /* couldn't get accounting info stored in the job table entry */ + printk(KERN_WARNING JID_ERR1, jid); + return; + } + + down(&csa_sem); + /* + * figure out what's turned on, which determines which record types + * need to be written. All records are written to a user job + * accounting file. Only those record types configured on are + * written to the system pacct file + */ + if (job_acctbuf.job_acctfile != (struct file *)NULL) { + ja_enabled = 1; + } + if (acct_dmd[ACCT_KERN_CSA][A_SYS].ac_state == ACS_ON) { + csa_enabled = 1; + } + if (acct_rcd[ACCT_RCD_IO-ACCT_RCDS][A_SYS].ac_state == ACS_ON) { + io_enabled = 1; + } + if (acct_rcd[ACCT_RCD_MEM-ACCT_RCDS][A_SYS].ac_state == ACS_ON) { + mem_enabled = 1; + } + + if (!ja_enabled && !csa_enabled) { + /* nothing to do */ + up(&csa_sem); + return; + } + up(&csa_sem); + + csa = (struct acctcsa *)acctent; + memset(csa, 0, sizeof(struct acctcsa)); + hdr1 = &csa->ac_hdr1; + csa_header(hdr1, REV_CSA, ACCT_KERNEL_CSA, sizeof(struct acctcsa) ); + hdr2 = &csa->ac_hdr2; + csa_header(hdr2, REV_CSA, ACCT_KERNEL_CSA, 0 ); + hdr2->ah_magic = ~ACCT_MAGIC; + + csa->ac_stat = exitcode; + csa->ac_uid = p->uid; + csa->ac_gid = p->gid; + + /* XXX change this when array session handle info available */ + csa->ac_ash = 0; + csa->ac_jid = job_acctbuf.job_id; + /* XXX change this when project ids are available */ + csa->ac_prid = 0; + csa->ac_nice = p->nice; + csa->ac_sched = p->policy; + + csa->ac_pid = p->pid; + csa->ac_ppid = (p->p_pptr) ? p->p_pptr->pid : 0; + if (p->flags & PF_FORKNOEXEC) { + csa->ac_hdr1.ah_flag |= AFORK; + } + if (p->flags & PF_SUPERPRIV) { + csa->ac_hdr1.ah_flag |= ASU; + } + if (p->flags & PF_DUMPCORE) { + csa->ac_hdr1.ah_flag |= ACORE; + } + if (p->flags & PF_SIGNALED) { + csa->ac_hdr1.ah_flag |= AXSIG; + } + csa->ac_hdr1.ah_flag &= ~ACKPT; + + strncpy(csa->ac_comm, p->comm, sizeof(csa->ac_comm)); + csa->ac_btime = CT_TO_SECS(p->start_time) + (xtime.tv_sec - + (jiffies / HZ)); + /* + * cpu usage is accumulated by the kernel in ticks. + * convert from clock ticks to microseconds; each process gets + * a minimum of a tick for elapsed time. If the granularity + * changes to something finer than a tick in the future, + * then these zero cpu and elapsed time modifications should be + * looked at again. + */ + csa->ac_etime = (jiffies - p->start_time == 0) ? (USEC_PER_TICK) : + ((uint64_t)(jiffies - p->start_time) * USEC_PER_TICK); + + cb += sizeof(struct acctcsa); + len += sizeof(struct acctcsa); + + /* convert from ticks to microseconds */ + csa->ac_utime = p->times.tms_utime * USEC_PER_TICK; + csa->ac_stime = p->times.tms_stime * USEC_PER_TICK; + /* Each process gets a minimum of a half tick cpu time */ + if ((csa->ac_utime == 0) && (csa->ac_stime == 0)) { + csa->ac_stime = USEC_PER_TICK/2; + } + + /* Create the memory record if needed */ + if (ja_enabled || mem_enabled) { + mem = (struct acctmem *)cb; + memset(mem, 0, sizeof(struct acctmem)); + hdr1->ah_flag |= AMORE; + hdr2->ah_type |= ACCT_MEM; + hdr1 = &mem->ac_hdr; + csa_header(hdr1, REV_MEM, ACCT_KERNEL_MEM, + sizeof(struct acctmem) ); + + /* adjust from pages/ticks to Mb/usec */ + memtime = sc_CLK((long int)p->csa_rss_mem1); + mem->ac_core.mem1 = ctob(memtime) / (1024 * 1024); + memtime = sc_CLK((long int)p->csa_vm_mem1); + mem->ac_virt.mem1 = ctob(memtime) / (1024 * 1024); + + /* adjust page size to 1K units */ + if (p->mm) { + mem->ac_virt.himem = p->mm->hiwater_vm * (PAGE_SIZE / 1024); + mem->ac_core.himem = p->mm->hiwater_rss * (PAGE_SIZE/1024); + /* + * For processes with zero systime, set the integral + * to the highwater mark rather than leave at zero + */ + if (mem->ac_core.mem1 == 0) { + mem->ac_core.mem1 = mem->ac_core.himem / 1024; + } + if (mem->ac_virt.mem1 == 0) { + mem->ac_virt.mem1 = mem->ac_virt.himem / 1024; + } + } + + mem->ac_pgswap = p->nswap; + mem->ac_minflt = p->min_flt; + mem->ac_majflt = p->maj_flt; + + cb += sizeof(struct acctmem); + hdr2->ah_size += sizeof(struct acctmem); + len += sizeof(struct acctmem); + } + /* Create the I/O record */ + if (ja_enabled || io_enabled) { + io = (struct acctio *)cb; + memset(io, 0, sizeof(struct acctio)); + hdr1->ah_flag |= AMORE; + hdr2->ah_type |= ACCT_IO; + hdr1 = &io->ac_hdr; + csa_header(hdr1, REV_IO, ACCT_KERNEL_IO, + sizeof(struct acctio) ); + + /* convert from ticks to microseconds */ + /* XXX when able to do kernel 64 bit divide, change type */ + io->ac_bwtime = CT_TO_USECS((unsigned long int)p->bwtime); + + io->ac_bkr = p->rblk; + io->ac_bkw = p->wblk; + + /* raw wait time; currently not used */ + io->ac_rwtime = 0; + + io->ac_chr = p->rchar; + io->ac_chw = p->wchar; + io->ac_scr = p->syscr; + io->ac_scw = p->syscw; + + cb += sizeof(struct acctio); + hdr2->ah_size += sizeof(struct acctio); + len += sizeof(struct acctio); + } + + /* record always written to a user job accounting file */ + if ((len > 0) && (job_acctbuf.job_acctfile != (struct file *)NULL) ) { + csa_write((caddr_t)&acctent, ACCT_KERN_CSA, + len, jid, A_CJA, &job_acctbuf); + } + /* + * check the cpu time and virtual memory thresholds before writing + * this record to the system pacct file + */ + if ((acct_rcd[ACCT_THD_MEM-ACCT_RCDS][A_SYS].ac_state == ACS_ON) && + (ja_enabled || mem_enabled)) { + if (mem->ac_virt.himem < + acct_rcd[ACCT_THD_MEM-ACCT_RCDS][A_SYS].ac_param) { + /* don't write record to pacct */ + return; + } + } + if ((acct_rcd[ACCT_THD_TIME-ACCT_RCDS][A_SYS].ac_state == ACS_ON)) { + if ((csa->ac_utime + csa->ac_stime) < + acct_rcd[ACCT_THD_TIME-ACCT_RCDS][A_SYS].ac_param) { + /* don't write record to pacct */ + return; + } + } + + if ((len > 0) && (csa_acctvp != (struct file *)NULL) && csa_enabled ) { + if (io_enabled && mem_enabled) { + /* write out buffer as is to system pacct file */ + csa_write((caddr_t)&acctent, ACCT_KERN_CSA, + len, jid, A_SYS, &job_acctbuf); + } else { + /* only write out record types turned on */ + len = csa_modify_buf(modacctent, csa, mem, io, + io_enabled, mem_enabled); + csa_write((caddr_t)&modacctent, ACCT_KERN_CSA, + len, jid, A_SYS, &job_acctbuf); + } + } + return; +} + +/* + * Copy needed accounting records into buffer, skipping record + * types which are not enabled. May need to adjust downward + * the second header size if not both memory and io continuation + * records are written, plus adjust the second header types and + * first header flags. + */ +static int +csa_modify_buf(char *modacctent, struct acctcsa *csa, struct acctmem *mem, + struct acctio *io, int io_enabled, int mem_enabled) +{ + int size = 0; + int len = 0; + char *bufptr; + struct achead *hdr1, *hdr2; + + size = sizeof(struct acctcsa) + sizeof(struct acctmem) + + sizeof(struct acctio); + memset(modacctent, 0, size); + bufptr = modacctent; + /* + * adjust values that might not be correct anymore if all of + * the continuation records aren't written out to the pacct file + */ + hdr1 = &csa->ac_hdr1; + hdr2 = &csa->ac_hdr2; + hdr1->ah_flag &= ~AMORE; + hdr2->ah_type = ACCT_KERNEL_CSA; + hdr2->ah_size = 0; + if (mem_enabled) { + hdr1->ah_flag |= AMORE; + hdr2->ah_type |= ACCT_MEM; + hdr2->ah_size += sizeof(struct acctmem); + hdr1 = &mem->ac_hdr; + hdr1->ah_flag &= ~AMORE; + } + if (io_enabled) { + hdr1->ah_flag |= AMORE; + hdr2->ah_type |= ACCT_IO; + hdr2->ah_size += sizeof(struct acctio); + hdr1 = &io->ac_hdr; + hdr1->ah_flag &= ~AMORE; + } + memcpy(bufptr, csa, sizeof(struct acctcsa)); + bufptr += sizeof(struct acctcsa); + len += sizeof(struct acctcsa); + + if (mem_enabled) { + memcpy(bufptr, mem, sizeof(struct acctmem)); + len += sizeof(struct acctmem); + bufptr += sizeof(struct acctmem); + } + if(io_enabled) { + memcpy(bufptr, io, sizeof(struct acctio)); + len += sizeof(struct acctio); + } + + return len; +} + +/* + * Process acctctl syscall from a user request. + */ +int +do_acctctl(int req, void *act) +{ + struct actctl actctl; + struct actstat actstat; + + int daemon = 0; + int error = 0; + int err = 0; + static int flag = 010000; + int ind; + int id; + int len; + int num; + + down(&csa_sem); + if (!csa_flag) { + csa_init_acct(flag++); + } + up(&csa_sem); + + if ((req < 0) || (req >= AC_MREQ) ) { + return -EINVAL; + } + + memset(&actctl, 0, sizeof(struct actctl)); + memset(&actstat, 0, sizeof(struct actstat)); + + switch (req) { + /* + * Start specified types of accounting. + */ + case AC_START: + { + int id, ind; + struct file *newvp; + + if (!capable(CAP_SYS_PACCT) ) { + error = -EPERM; + break; + } + + if (copy_from_user(&actctl, act, sizeof(int)) ) { + error = -EFAULT; + break; + } + + num = (actctl.ac_sttnum == 0) ? 1 : actctl.ac_sttnum; + if ((num < 0) || (num > NUM_KDRCDS) ) { + error = -EINVAL; + break; + + } + + len = sizeof(struct actctl) - + sizeof(struct actstat) * NUM_KDRCDS + + sizeof(struct actstat) * num; + if (copy_from_user(&actctl, act, len)) { + error = -EFAULT; + break; + } + /* + * Verify all indexes in actstat structures specified. + */ + for(ind = 0; ind < num; ind++) { + id = actctl.ac_stat[ind].ac_ind; + if ((id < 0) || (id >= ACCT_MAXRCDS) ) { + error = -EINVAL; + break; + } + + if (id == ACCT_MAXKDS) { + error = -EINVAL; + break; + } + } + down(&csa_sem); + /* + * If an accounting file was specified, make sure + * that we can access it. + */ + if (strlen(actctl.ac_path) ) { + strncpy(new_path, actctl.ac_path, ACCT_PATH); + newvp = filp_open(new_path,O_WRONLY|O_APPEND, 0); + if (IS_ERR(newvp)) { + error = PTR_ERR(newvp); + up(&csa_sem); + break; + } else if (!S_ISREG(newvp->f_dentry->d_inode->i_mode)) { + error = -EACCES; + filp_close(newvp, NULL); + up(&csa_sem); + break; + } else if (!newvp->f_op->write) { + error = -EIO; + filp_close(newvp, NULL); + up(&csa_sem); + break; + } + if ((csa_acctvp != (struct file *)NULL) && + csa_acctvp == newvp) { + /* + * this file already being used, so ignore + * request to use this file; just continue on + */ + filp_close(newvp, NULL); + newvp = (struct file *)NULL; + } + + } else { + newvp = (struct file *)NULL; + } + /* + * If a new accounting file was specified and there's + * an old accounting file, stop writing to it. + */ + if (newvp != (struct file *)NULL) { + if (csa_acctvp != (struct file *)NULL) { + error = csa_config_write(AC_CONFCHG_FILE,NULL); + filp_close(csa_acctvp, NULL); + } else if (!csa_flag) { + csa_init_acct(flag++); + } + + strncpy(csa_path, new_path, ACCT_PATH); + down(&csa_write_sem); + csa_acctvp = newvp; + up(&csa_write_sem); + + } else { + if (csa_acctvp == (struct file *)NULL) { + error = -EINVAL; + up(&csa_sem); + break; + } + } + + /* + * Loop through each actstat block and turn ON that accounting. + */ + for(ind = 0; ind < num; ind++) { + struct actstat *stat; + + id = actctl.ac_stat[ind].ac_ind; + stat = &actctl.ac_stat[ind]; + if (id < ACCT_RCDS) { + acct_dmd[id][A_SYS].ac_state = ACS_ON; + acct_dmd[id][A_SYS].ac_param = stat->ac_param; + + stat->ac_state = acct_dmd[id][A_SYS].ac_state; + stat->ac_param = acct_dmd[id][A_SYS].ac_param; + } else { + int tid = id -ACCT_RCDS; + + acct_rcd[tid][A_SYS].ac_state = ACS_ON; + acct_rcd[tid][A_SYS].ac_param = stat->ac_param; + + stat->ac_state = acct_rcd[tid][A_SYS].ac_state; + stat->ac_param = acct_rcd[tid][A_SYS].ac_param; + } + } + + up(&csa_sem); + error = csa_config_write(AC_CONFCHG_ON, NULL); + /* + * Return the accounting states to the user. + */ + if (copy_to_user(act, &actctl, len)) { + error = -EFAULT; + break; + } + } + break; + + /* + * Stop specified types of accounting. + */ + case AC_STOP: + { + int id, ind; + + if (!capable(CAP_SYS_PACCT) ) { + error = -EPERM; + break; + } + + if (copy_from_user(&actctl, act, sizeof(int)) ) { + error = -EFAULT; + break; + } + + num = (actctl.ac_sttnum == 0) ? 1 : actctl.ac_sttnum; + if ((num <= 0) || (num > NUM_KDRCDS) ) { + error = -EINVAL; + break; + } + + len = sizeof(struct actctl) - + sizeof(struct actstat) * NUM_KDRCDS + + sizeof(struct actstat) * num; + if (copy_from_user(&actctl, act, len)) { + error = -EFAULT; + break; + } + + /* + * Verify all of the indexes in actstat structures specified. + */ + for(ind = 0; ind < num; ind++) { + id = actctl.ac_stat[ind].ac_ind; + if ((id < 0) || (id >= NUM_KDRCDS) ) { + error = -EINVAL; + break; + } + } + + /* + * Loop through each actstat block and turn off that accounting. + */ + down(&csa_sem); + /* + * Disable accounting for this entry. + */ + for(ind = 0; ind < num; ind++) { + id = actctl.ac_stat[ind].ac_ind; + if (id < ACCT_RCDS) { + acct_dmd[id][A_SYS].ac_state = ACS_OFF; + acct_dmd[id][A_SYS].ac_param = 0; + + actctl.ac_stat[ind].ac_state = + acct_dmd[id][A_SYS].ac_state; + actctl.ac_stat[ind].ac_param = 0; + } else { + int tid = id -ACCT_RCDS; + + acct_rcd[tid][A_SYS].ac_state = ACS_OFF; + acct_rcd[tid][A_SYS].ac_param = 0; + actctl.ac_stat[ind].ac_state = + acct_rcd[tid][A_SYS].ac_state; + actctl.ac_stat[ind].ac_param = + acct_rcd[tid][A_SYS].ac_param; + } + } /* end of for(ind) */ + /* + * Check the daemons to see if any are still on. + */ + for(ind = 0; ind < ACCT_MAXKDS; ind++) { + if (acct_dmd[ind][A_SYS].ac_state == ACS_ON) { + daemon += 1<<ind; + } + } + up(&csa_sem); + /* + * If all daemons are off and there's an old accounting file, + * stop writing to it. + */ + if (!daemon && (csa_acctvp != (struct file *)NULL) ) { + error = csa_config_write(AC_CONFCHG_OFF,NULL); + filp_close(csa_acctvp, NULL); + down(&csa_write_sem); + csa_acctvp = (struct file *)NULL; + up(&csa_write_sem); + } else { + error = csa_config_write(AC_CONFCHG_OFF, NULL); + } + /* + * Return the accounting states to the user. + */ + if (copy_to_user(act, &actctl, len)) { + error = -EFAULT; + break; + } + } + break; + + /* + * Halt all accounting. + */ + case AC_HALT: + { + int ind; + + if (!capable(CAP_SYS_PACCT) ) { + error = -EPERM; + break; + } + down(&csa_sem); + /* Turn off all accounting if any is on. */ + for(ind = 0; ind <ACCT_MAXKDS; ind++) { + acct_dmd[ind][A_SYS].ac_state = ACS_OFF; + acct_dmd[ind][A_SYS].ac_param = 0; + } + + for(ind = ACCT_RCDS; ind < ACCT_MAXRCDS; ind++) { + int tid = ind -ACCT_RCDS; + + acct_rcd[tid][A_SYS].ac_state = ACS_OFF; + acct_rcd[tid][A_SYS].ac_param = 0; + } + + up(&csa_sem); + /* If there's an old accounting file, stop writing to it. */ + if (csa_acctvp != (struct file *)NULL) { + error = csa_config_write(AC_CONFCHG_OFF,NULL); + filp_close(csa_acctvp, NULL); + down(&csa_write_sem); + csa_acctvp = (struct file *)NULL; + up(&csa_write_sem); + } + } + break; + + /* + * Process daemon/record status function. + */ + case AC_CHECK: + { + if (copy_from_user(&actstat, act, sizeof(struct actstat)) ) { + error = -EFAULT; + break; + } + id = actstat.ac_ind; + if ((id >= 0) && (id < ACCT_MAXKDS) ) { + actstat.ac_state = acct_dmd[id][A_SYS].ac_state; + actstat.ac_param = acct_dmd[id][A_SYS].ac_param; + + } else if ((id >= ACCT_RCDS) && (id < ACCT_MAXRCDS) ) { + int tid = id-ACCT_RCDS; + + actstat.ac_state = acct_rcd[tid][A_SYS].ac_state; + actstat.ac_param = acct_rcd[tid][A_SYS].ac_param; + + } else { + error = -EINVAL; + break; + } + if (copy_to_user(act, &actstat, sizeof(struct actstat)) ) { + error = -EFAULT; + } + } + break; + + /* + * Process daemon status function. + */ + case AC_KDSTAT: + { + if (copy_from_user(&actctl, act, sizeof(int)) ) { + error = -EFAULT; + break; + } + + num = actctl.ac_sttnum; + + if (num <= 0) { + error = EINVAL; + break; + } else if (num > NUM_KDS) { + num = NUM_KDS; + } + for(ind = 0; ind < num; ind++) { + actctl.ac_stat[ind].ac_ind = + acct_dmd[ind][A_SYS].ac_ind; + actctl.ac_stat[ind].ac_state = + acct_dmd[ind][A_SYS].ac_state; + actctl.ac_stat[ind].ac_param = + acct_dmd[ind][A_SYS].ac_param; + } /* end of for(ind) */ + actctl.ac_sttnum = num; + strncpy(actctl.ac_path, csa_path, ACCT_PATH); + + len = sizeof(struct actctl) - + sizeof(struct actstat) * NUM_KDRCDS + + sizeof(struct actstat) * num; + if (copy_to_user(act, &actctl, len)) { + error = -EFAULT; + break; + } + } + break; + + /* + * Process record status function. + */ + case AC_RCDSTAT: + { + if (copy_from_user(&actctl, act, sizeof(int)) ) { + error = -EFAULT; + break; + } + num = actctl.ac_sttnum; + + if (num <= 0) { + error = -EINVAL; + break; + } else if (num > NUM_RCDS) { + num = NUM_RCDS; + } + for(ind = 0; ind < num; ind++) { + actctl.ac_stat[ind].ac_ind = + acct_rcd[ind][A_SYS].ac_ind; + actctl.ac_stat[ind].ac_state = + acct_rcd[ind][A_SYS].ac_state; + actctl.ac_stat[ind].ac_param = + acct_rcd[ind][A_SYS].ac_param; + } + actctl.ac_sttnum = num; + strncpy(actctl.ac_path, csa_path, ACCT_PATH); + len = sizeof(struct actctl) - + sizeof(struct actstat) * NUM_KDRCDS + + sizeof(struct actstat) * num; + if (copy_to_user(act, &actctl, len)) { + error = -EFAULT; + break; + } + } + break; + + /* + * Turn user job accounting ON or OFF. + */ + case AC_JASTART: + case AC_JASTOP: + { + char localpath[ACCT_PATH]; + struct file *newvp = NULL; + struct file *oldvp; + uint64_t jid; + job_csa_t job_acctbuf; + int retval = 0; + + len = sizeof(struct actctl) - + sizeof(struct actstat) * (NUM_KDRCDS -1); + if (copy_from_user(&actctl, act, len)) { + error = -EFAULT; + break; + } + /* + * If an accounting file was specified, make sure + * that we can access it. + */ + if (strlen(actctl.ac_path)) { + strncpy(localpath, actctl.ac_path, ACCT_PATH); + newvp = filp_open(localpath,O_WRONLY|O_APPEND,0); + if (IS_ERR(newvp)) { + error = PTR_ERR(newvp); + break; + } else if (!S_ISREG(newvp->f_dentry->d_inode->i_mode)) { + error = -EACCES; + filp_close(newvp, NULL); + break; + } else if (!newvp->f_op->write) { + error = -EIO; + filp_close(newvp, NULL); + break; + } + } else if (req == AC_JASTART) { + error = -EINVAL; + break; + } + if (req == AC_JASTOP) { + newvp = (struct file *)NULL; + } + jid = job_getjid(current); + if (jid <= 0) { + /* no job table entry */ + error = -ENOENT; + break; + } + memset(&job_acctbuf, 0, sizeof(job_acctbuf)); + retval = job_getacct(jid, JOB_ACCT_CSA, &job_acctbuf); + if (retval != 0) { + /* couldn't get csa info in the job table entry */ + error = retval; + break; + } + /* Use this semaphore since csa_write() can also change this + * file pointer. + */ + down(&csa_write_sem); + if ((oldvp = job_acctbuf.job_acctfile) != (struct file *)NULL) { + /* Stop writing to the old job accounting file */ + filp_close(oldvp, NULL); + } + + /* Establish new job accounting file or stop job accounting */ + job_acctbuf.job_acctfile = newvp; + + retval = job_setacct(jid, JOB_ACCT_CSA, JOB_CSA_ACCTFILE, + &job_acctbuf); + if (retval != 0) { + /* couldn't set the new file name in the job entry */ + error = retval; + up(&csa_write_sem); + break; + } + up(&csa_write_sem); + /* Write a config record so ja has uname info */ + if (req == AC_JASTART) { + error = csa_config_write(AC_CONFCHG_ON, + job_acctbuf.job_acctfile); + } + } + break; + + /* + * Write an accounting record for a system daemon. + */ + case AC_WRACCT: + { + int len; + int retval = 0; + uint64_t jid; + job_csa_t job_acctbuf; + struct actwra actwra; + + if (!capable(CAP_SYS_PACCT) ) { + error = -EPERM; + break; + } + if (copy_from_user(&actwra, act, sizeof(struct actwra))) { + error = -EFAULT; + break; + } + /* Verify the parameters. */ + jid = actwra.ac_jid; + if (jid < 0) { + error = -EINVAL; + break; + } + + id = actwra.ac_did; + if ((id < 0) || (id >= ACCT_MAXKDS) ) { + error = -EINVAL; + break; + } + + len = actwra.ac_len; + if ((len <= 0) || (len > MAX_WRACCT) ) { + error = -EINVAL; + break; + } + + if (actwra.ac_buf == (char *)NULL) { + error = -EINVAL; + break; + } + + /* get the job table entry for this jid */ + memset(&job_acctbuf, 0, sizeof(job_acctbuf)); + retval = job_getacct(jid, JOB_ACCT_CSA, &job_acctbuf); + if (retval != 0) { + /* couldn't get accounting info stored in job table */ + error = retval; + break; + } + + /* If the daemon type is on, write out the daemon buffer. */ + if ((acct_dmd[id][A_SYS].ac_state == ACS_ON) && + (csa_acctvp != (struct file *)NULL) ) { + error = csa_write(actwra.ac_buf, id, len, + jid, A_DMD, NULL); + } + /* maybe write out daemon record to ja user accounting file */ + if (job_acctbuf.job_acctfile != NULL) { + error = csa_write(actwra.ac_buf, id, len, jid, A_CJA, + &job_acctbuf); + } + } + break; + + /* + * Return authorized state information. + */ + case AC_AUTH: + { + if (!capable(CAP_SYS_PACCT) ) { + error = -EPERM; + break; + } + /* + * Process user authorization request...If we get to this spot, + * the user is authorized. + */ + } + break; + + /* + * Process the incremental accounting request. + */ + case AC_INCACCT: + error = -EINVAL; + break; + + default: + error = -EINVAL; + + } /* end of switch(req) */ + + return(error ? error : err); +} + + +/* + * Create a configuration change accounting record. + */ +static void +csa_config_make(ac_eventtype event, struct acctcfg *cfg) +{ + int daemon = 0; + int record = 0; + int ind; + int nmsize = 0; + + memset(cfg, 0, sizeof(struct acctcfg)); + /* Setup the record and header. */ + csa_header(&cfg->ac_hdr, REV_CFG, ACCT_KERNEL_CFG, + sizeof(struct acctcfg) ); + cfg->ac_event = event; + if (!boottime) { + boottime = xtime.tv_sec - (jiffies / HZ); + } + cfg->ac_boottime = boottime; + cfg->ac_curtime = xtime.tv_sec; + + /* + * Create the masks of the types that are on. + */ + for(ind = 0; ind < ACCT_MAXKDS; ind++) { + if (acct_dmd[ind][A_SYS].ac_state == ACS_ON) { + daemon += 1<<ind; + } + } + for(ind = ACCT_RCDS; ind < ACCT_MAXRCDS; ind++) { + int tid = ind -ACCT_RCDS; + + if (acct_rcd[tid][A_SYS].ac_state == ACS_ON) { + record += 1<<tid; + } + } + cfg->ac_kdmask = daemon; + cfg->ac_rmask = record; + + nmsize = sizeof(cfg->ac_uname.sysname); + memcpy(cfg->ac_uname.sysname, system_utsname.sysname, nmsize-1); + cfg->ac_uname.sysname[nmsize-1] = '\0'; + nmsize = sizeof(cfg->ac_uname.nodename); + memcpy(cfg->ac_uname.nodename, system_utsname.nodename, nmsize-1); + cfg->ac_uname.nodename[nmsize-1] = '\0'; + nmsize = sizeof(cfg->ac_uname.release); + memcpy(cfg->ac_uname.release, system_utsname.release, nmsize-1); + cfg->ac_uname.release[nmsize-1] = '\0'; + nmsize = sizeof(cfg->ac_uname.version); + memcpy(cfg->ac_uname.version, system_utsname.version, nmsize-1); + cfg->ac_uname.version[nmsize-1] = '\0'; + nmsize = sizeof(cfg->ac_uname.machine); + memcpy(cfg->ac_uname.machine, system_utsname.machine, nmsize-1); + cfg->ac_uname.machine[nmsize-1] = '\0'; + + return; +} + + +/* + * Create and write a configuration change accounting record. + */ +static int +csa_config_write(ac_eventtype event, struct file *job_acctfile) +{ + int error = 0; /* errno */ + struct acctcfg acctcfg; + mm_segment_t fs; + + /* write record to process accounting file. */ + csa_config_make(event, &acctcfg); + + down(&csa_write_sem); + if (csa_acctvp != (struct file *)NULL) { + fs = get_fs(); + set_fs(KERNEL_DS); + error = csa_acctvp->f_op->write(csa_acctvp, (char *)&acctcfg, + sizeof(struct acctcfg), &csa_acctvp->f_pos); + set_fs(fs); + } + if (job_acctfile != (struct file *)NULL) { + fs = get_fs(); + set_fs(KERNEL_DS); + error = job_acctfile->f_op->write(job_acctfile,(char *)&acctcfg, + sizeof(struct acctcfg), &job_acctfile->f_pos); + set_fs(fs); + } + if (error >= 0) { + error = 0; + } + up(&csa_write_sem); + return(error); +} + + + +/* + * When first process in a job is created. + */ +int +csa_jstart(int event, void *data) +{ + job_csa_t *job_sojbuf = (job_csa_t *)data; + struct acctsoj acctsoj; /* start of job record */ + + /* Are we doing any accounting? */ + if (csa_acctvp == (struct file *)NULL) { + return 0; + } + + if (!job_sojbuf) { + /* bad pointer */ + printk(KERN_ERR + "csa_jstart: Received bad soj pointer, pid %d.\n", + current->pid); + return -1; + } + + memset(&acctsoj, 0, sizeof(struct acctsoj)); + csa_header(&acctsoj.ac_hdr, REV_SOJ, ACCT_KERNEL_SOJ, + sizeof(struct acctsoj)); + acctsoj.ac_jid = job_sojbuf->job_id; + acctsoj.ac_uid = job_sojbuf->job_uid; + if (event == JOB_EVENT_START) { + acctsoj.ac_type = AC_SOJ; + acctsoj.ac_btime = CT_TO_SECS(job_sojbuf->job_start) + + (xtime.tv_sec - (jiffies / HZ) ); + } else if (event == JOB_EVENT_RESTART) { + acctsoj.ac_type = AC_ROJ; + acctsoj.ac_rstime = CT_TO_SECS(job_sojbuf->job_start) + + (xtime.tv_sec - (jiffies / HZ) ); + } else { + return -1; + } + + /* + * Write the accounting record to the process accounting + * file if any accounting is enabled. + */ + if (csa_acctvp != (struct file *)NULL) { + (void)csa_write((caddr_t)&acctsoj, ACCT_KERN_CSA, + sizeof(acctsoj), job_sojbuf->job_id, A_SYS, job_sojbuf); + } + + return 0; +} + +/* + * When last process in a job is done, write an EOJ record + */ +int +csa_jexit(int event, void *data) +{ + struct achead *hdr1, *hdr2; + struct accteoj eoj; /* end of job record */ + job_csa_t *job_eojbuf = (job_csa_t *)data; + + /* Are we doing any accounting? */ + if (csa_acctvp == (struct file *)NULL) { + return 0; + } + + if (!job_eojbuf) { + /* bad pointer */ + printk(KERN_ERR + "csa_jexit: Received bad eoj pointer, pid %d.\n", + current->pid); + return -1; + } + + memset(&eoj, 0, sizeof(struct accteoj)); + + /* Set up record. */ + hdr1 = &eoj.ac_hdr1; + csa_header(hdr1, REV_EOJ, ACCT_KERNEL_EOJ, sizeof(struct accteoj) ); + hdr2 = &eoj.ac_hdr2; + csa_header(hdr2, REV_EOJ, ACCT_KERNEL_EOJ, 0 ); + hdr2->ah_magic = ~ACCT_MAGIC; + + eoj.ac_nice = current->nice; + eoj.ac_uid = job_eojbuf->job_uid; + eoj.ac_gid = current->gid; + + eoj.ac_jid = job_eojbuf->job_id; + + eoj.ac_btime = CT_TO_SECS(job_eojbuf->job_start) + + (xtime.tv_sec - (jiffies / HZ) ); + eoj.ac_etime = xtime.tv_sec; + + /* + * XXX Once we have real values in these two fields, convert them + * to Kbytes. + */ + eoj.ac_corehimem = job_eojbuf->job_corehimem; + eoj.ac_virthimem = job_eojbuf->job_virthimem; + + /* + * Write the accounting record to the process accounting + * file if job accounting is enabled. + */ + if (csa_acctvp != (struct file *)NULL) { + (void) csa_write((caddr_t)&eoj, ACCT_KERN_CSA, + sizeof(struct accteoj), job_eojbuf->job_id, A_SYS, + job_eojbuf); + } + + return 0; +} + +/* + * Write buf out to the accounting file. + * If an error occurs, return the error code to the caller + */ +int +csa_write(char *buf, int did, int nbyte, uint64_t jid, int type, + job_csa_t *jp) +{ + int error = 0; /* errno */ + int retval = 0; + struct file *vp; /* acct file */ + mm_segment_t fs; + unsigned long limit; + + down(&csa_write_sem); + /* Locate the accounting type. */ + switch (type) { + case A_SYS: + case A_DMD: + vp = csa_acctvp; + break; + + case A_CJA: + if (jp != (job_csa_t *)NULL) { + vp = jp->job_acctfile; + } else { + vp = (struct file *)NULL; + } + break; + + default: + up(&csa_write_sem); + return -EINVAL; + + } /* end of switch(type) */ + + /* Check if this type of accounting is turned on. */ + if (vp == (struct file *)NULL) { + up(&csa_write_sem); + return 0; + } + fs = get_fs(); + set_fs(KERNEL_DS); + + /* make sure we don't get hit by a process file size limit */ + limit = current->rlim[RLIMIT_FSIZE].rlim_cur; + current->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; + error = vp->f_op->write(vp,buf, nbyte, &vp->f_pos); + current->rlim[RLIMIT_FSIZE].rlim_cur = limit; + + set_fs(fs); + if (error >= 0) { + error = 0; + } + /* If an error occurred, disable this type of accounting. */ + if (error) { + switch(type) { + + case A_SYS: + case A_DMD: + csa_acctvp = (struct file *)NULL; + acct_dmd[did][A_SYS].ac_state = ACS_ERROFF; + acct_dmd[ACCT_KERN_CSA][A_SYS].ac_state = ACS_ERROFF; + printk(KERN_ALERT + "csa accounting pacct write error %d; %s disabled\n", + error, acct_dmd_name[did]); + filp_close(vp, NULL); + break; + case A_CJA: + jp->job_acctfile = (struct file *)NULL; + retval = job_setacct(jid, JOB_ACCT_CSA, + JOB_CSA_ACCTFILE, jp); + printk(KERN_WARNING JID_ERR2, error, jid); + if (retval != 0) { + printk(KERN_WARNING JID_ERR3, jid); + } else { + printk(KERN_WARNING JID_ERR4, jid); + } + filp_close(vp, NULL); + break; + } + up(&csa_write_sem); + return(error); + } + up(&csa_write_sem); + return(error); +} + +module_init(init_csa); +module_exit(cleanup_csa); diff -urN linux-2.4.2.csa+csa-patch/include/linux/csa.h linux-2.4.2.csa/include/linux/csa.h --- linux-2.4.2.csa+csa-patch/include/linux/csa.h Wed Dec 31 18:00:00 1969 +++ linux-2.4.2.csa/include/linux/csa.h Mon Mar 5 12:02:31 2001 @@ -0,0 +1,524 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc and LANL All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + */ +/* + * CSA (Comprehensive System Accounting) + * Job Accounting for Linux + * + * This header file contains the definitions needed for job + * accounting. The kernel CSA accounting module code and all + * user-level programs that try to write or process the binary job + * accounting data must include this file. + * + * + */ + +#ifndef _LINUX_CSA_H +#define _LINUX_CSA_H + +#ifndef __KERNEL__ +#include <stdint.h> +#include <sys/types.h> +#endif + +/* + * accounting flags per-process + */ +#define AFORK 0x01 /* fork, but did not exec */ +#define ASU 0x02 /* super-user privileges */ +#define ACKPT 0x04 /* process has been checkpointed */ +#define ACORE 0x08 /* produced corefile */ +#define AXSIG 0x10 /* killed by a signal */ +#define AMORE 0x20 /* more CSA acct records for this process */ +#define AINC 0x40 /* incremental accounting record */ + +#define AHZ 100 + +/* + * Magic number - for achead.ah_magic in the 1st header. The magic number + * in the 2nd header is the inverse of this. + */ +#define ACCT_MAGIC_BIG 030510 /* big-endian */ +#define ACCT_MAGIC_LITTLE 030512 /* little-endian */ +#ifdef __LITTLE_ENDIAN +#define ACCT_MAGIC ACCT_MAGIC_LITTLE +#else +#define ACCT_MAGIC ACCT_MAGIC_BIG +#endif + +/* + * Record types - for achead.ah_type in the 1st header. + */ +#define ACCT_KERNEL_CSA 0001 /* Kernel: CSA base record */ +#define ACCT_KERNEL_MEM 0002 /* Kernel: memory record */ +#define ACCT_KERNEL_IO 0004 /* Kernel: input/output record */ +#define ACCT_KERNEL_MT 0006 /* Kernel: multi-tasking record */ +#define ACCT_KERNEL_MPP 0010 /* Kernel: multi-PE appl record */ +#define ACCT_KERNEL_SOJ 0012 /* Kernel: start-of-job record */ +#define ACCT_KERNEL_EOJ 0014 /* Kernel: end-of-job record */ +#define ACCT_KERNEL_CFG 0020 /* Kernel: configuration record */ + +#define ACCT_KERNEL_SITE0 0100 /* Kernel: reserved for site */ +#define ACCT_KERNEL_SITE1 0101 /* Kernel: reserved for site */ + +#define ACCT_DAEMON_NQS 0120 /* Daemon: NQS record */ +#define ACCT_DAEMON_WKMG 0122 /* Daemon: workload management record, + i.e., LSF */ +#define ACCT_DAEMON_TAPE 0124 /* Daemon: tape record */ +#define ACCT_DAEMON_DMIG 0126 /* Daemon: data migration record */ +#define ACCT_DAEMON_SOCKET 0130 /* Daemon: socket record */ + +#define ACCT_DAEMON_SITE0 0200 /* Daemon: reserved for site */ +#define ACCT_DAEMON_SITE1 0201 /* Daemon: reserved for site */ + +#define ACCT_JOB_HEADER 0220 /* csabuild: job header record */ +#define ACCT_CACCT 0222 /* cacct: consolidated data */ +#define ACCT_CMS 0224 /* cms: command summary data */ + +/* Record types - for achead.ah_type in the 2nd header. */ +#define ACCT_MEM 1<<0 /* Process generated memory record */ +#define ACCT_IO 1<<1 /* Process generated I/O record */ +#define ACCT_MT 1<<2 /* Process used multi-tasking */ +#define ACCT_MPP 1<<3 /* Process used multi-PE */ + +/* + * Record revision levels. + * + * These are incremented to indicate that a record's format has changed since + * a previous release. + */ +#define REV_CSA 02400 /* Kernel: CSA base record */ +#define REV_MEM 02400 /* Kernel: memory record */ +#define REV_IO 02400 /* Kernel: I/O record */ +#define REV_MT 02400 /* Kernel: multi-tasking record */ +#define REV_MPP 02400 /* Kernel: multi-PE appl record */ +#define REV_SOJ 02400 /* Kernel: start-of-job record */ +#define REV_EOJ 02400 /* Kernel: end-of-job record */ +#define REV_CFG 02400 /* Kernel: configuration record */ + +#define REV_NQS 02400 /* Daemon: NQS record */ +#define REV_WKMG 02400 /* Daemon: workload management (i.e., LSF) + record */ +#define REV_TAPE 02400 /* Daemon: tape record */ +#define REV_DMIG 02400 /* Daemon: data migration record */ +#define REV_SOCKET 02400 /* Daemon: socket record */ + +#define REV_JOB 02400 /* csabuild: job header record */ +#define REV_CACCT 02400 /* cacct: consolidated data */ +#define REV_CMS 02400 /* cms: command summary data */ + +/* + * Record header + */ +struct achead +{ + unsigned int ah_magic:17; /* Magic */ + unsigned int ah_revision:15; /* Revision */ + unsigned int ah_type:8; /* Record type */ + unsigned int ah_flag:8; /* Record flags */ + unsigned int ah_size:16; /* Size of record */ +}; + +/* + * In order to keep the accounting records the same size across different + * machine types, record fields will be defined to types that won't + * vary (i.e. uint_32_t instead of uid_t). +*/ + +/* + * Per process base accounting record. + */ +struct acctcsa +{ + struct achead ac_hdr1; /* Header */ + struct achead ac_hdr2; /* 2nd header for continued records */ + double ac_sbu; /* System billing units */ + unsigned int ac_stat:8; /* Exit status */ + unsigned int ac_nice:8; /* Nice value */ + unsigned char ac_sched; /* Scheduling discipline */ + unsigned int :8; /* Unused */ + uint32_t ac_uid; /* User ID */ + uint32_t ac_gid; /* Group ID */ + uint64_t ac_ash; /* Array session handle */ + uint64_t ac_jid; /* Job ID */ + uint64_t ac_prid; /* Project ID -> account ID */ + uint32_t ac_pid; /* Process ID */ + uint32_t ac_ppid; /* Parent process ID */ + time_t ac_btime; /* Beginning time [sec since 1970] */ + char ac_comm[16]; /* Command name */ +/* CPU resource usage information. */ + uint64_t ac_etime; /* Elapsed time [usecs] */ + uint64_t ac_utime; /* User CPU time [usec] */ + uint64_t ac_stime; /* System CPU time [usec] */ + uint64_t ac_spare; /* Spare field */ + uint64_t ac_spare1; /* Spare field */ +}; + +/* + * Memory accounting structure + * This structure is part of the acctmem record. + */ +struct memint +{ + uint64_t himem; /* Hiwater memory usage [Kbytes] */ + uint64_t mem1; /* Memory integral 1 [Mbytes/uSec] */ + uint64_t mem2; /* Memory integral 2 - not used */ + uint64_t mem3; /* Memory integral 3 - not used */ +}; + +/* + * Memory accounting record + */ +struct acctmem +{ + struct achead ac_hdr; /* Header */ + double ac_sbu; /* System billing units */ + struct memint ac_core; /* Core memory integrals */ + struct memint ac_virt; /* Virtual memory integrals */ + uint64_t ac_pgswap; /* # of pages swapped */ + uint64_t ac_minflt; /* # of minor page faults */ + uint64_t ac_majflt; /* # of major page faults */ + uint64_t ac_spare; /* Spare field */ +}; + +/* + * Input/Output accounting record + */ +struct acctio +{ + struct achead ac_hdr; /* Header */ + double ac_sbu; /* System billing units */ + uint64_t ac_bwtime; /* Block I/O wait time [usecs] */ + uint64_t ac_rwtime; /* Raw I/O wait time [usecs] */ + uint64_t ac_chr; /* Number of chars (bytes) read */ + uint64_t ac_chw; /* Number of chars (bytes) written */ + uint64_t ac_bkr; /* Number of blocks read */ + uint64_t ac_bkw; /* Number of blocks written */ + uint64_t ac_scr; /* Number of read system calls */ + uint64_t ac_scw; /* Number of write system calls */ + uint64_t ac_spare; /* Spare field */ +}; + +/* + * Multi-tasking accounting structure + * This structure is part of the acctmt record. + */ +struct mtask +{ + uint64_t mt; /* CPU+1 connect time [usecs] */ + uint64_t spare1; /* Spare field */ + uint64_t spare2; /* Spare field */ +}; + +/* + * Multi-tasking accounting record - currently not used, adapted from UNICOS. + */ +#define ACCT_MAXCPUS 512 /* Maximum number of CPUs supported */ + +struct acctmt +{ + struct achead ac_hdr; /* Header */ + double ac_sbu; /* System billing units */ + unsigned int ac_numcpu:16; /* Max number of CPUs used */ + unsigned int ac_maxcpu:16; /* Max number of CPUs available */ + unsigned int :32; /* Unused */ + int64_t ac_smwtime; /* Semaphore wait time [usec] */ + struct mtask ac_mttime[ACCT_MAXCPUS]; /* Time connected to (i+1) + CPUs [usec] */ +}; + +/* + * MPP PE accounting structure - MPP hardware specific. + * This structure is part of the acctmpp record. + */ +struct acctpe +{ + uint64_t utime; /* User CPU time [usecs] */ + uint64_t srtime; /* System & remote CPU time [usecs] */ + uint64_t io; /* Number of chars transferred */ +}; + +/* + * MPP accounting record - MPP hardware specific; currently not used. + */ +#define ACCT_MAXPES 1024 /* Maximum number of PEs */ + +struct acctmpp +{ + struct achead ac_hdr; /* Header */ + double ac_sbu; /* System billing units */ + unsigned int ac_mpbesu:8; /* Number of BESUs used */ + unsigned int ac_mppe:24; /* Number of PEs used */ + uint64_t ac_himem; /* Maximum memory hiwater [Mbytes] */ + + struct acctpe ac_mpp[ACCT_MAXPES]; /* Per PE information */ +}; + +/* + * MPP Detailed PE accounting structure - currently not used + */ +struct acctdpe +{ + struct achead ac_hdr; /* Header */ + + uint64_t utime; /* User CPU time [usecs] */ + uint64_t stime; /* System CPU time [usecs] */ + uint64_t rtime; /* Remote CPU time [usecs] */ + + uint64_t ctime; /* Connect CPU time [usecs] */ + uint64_t io; /* Number of chars transferred */ + uint64_t spare; /* Spare field */ +}; + +/* + * Start-of-job record + * Written when a job is created. + */ + +typedef enum +{ + AC_INIT_LOGIN, /* Initiated by login */ + AC_INIT_NQS, /* Initiated by NQS */ + AC_INIT_LSF, /* Initiated by LSF */ + AC_INIT_CROND, /* Initiated by crond */ + AC_INIT_FTPD, /* Initiated by ftpd */ + AC_INIT_INETD, /* Initiated by inetd */ + AC_INIT_TELNETD, /* Initiated by telnetd */ + AC_INIT_MAX +} ac_inittype; + + +#define AC_SOJ 1 /* Start-of-job record type */ +#define AC_ROJ 2 /* Restart-of-job record type */ + +struct acctsoj +{ + struct achead ac_hdr; /* Header */ + unsigned int ac_type:8; /* Record type (AC_SOJ, AC_ROJ) */ + ac_inittype ac_init:8; /* Initiator - currently not used */ + unsigned int :16; /* Unused */ + uint32_t ac_uid; /* User ID */ + uint64_t ac_jid; /* Job ID */ + time_t ac_btime; /* Start time [secs since 1970] */ + time_t ac_rstime; /* Restart time [secs since 1970] */ +}; + +/* + * End-of-job record + * Written when the last process of a job exits. + */ +struct accteoj +{ + struct achead ac_hdr1; /* Header */ + struct achead ac_hdr2; /* 2nd header for continued records */ + double ac_sbu; /* System billing units */ + ac_inittype ac_init:8; /* Initiator - currently not used */ + unsigned int ac_nice:8; /* Nice value */ + unsigned int :16; /* Unused */ + uint32_t ac_uid; /* User ID */ + uint32_t ac_gid; /* Group ID */ + uint64_t ac_ash; /* Array session handle; not used */ + uint64_t ac_jid; /* Job ID */ + uint64_t ac_prid; /* Project ID; not used */ + time_t ac_btime; /* Job start time [secs since 1970] */ + time_t ac_etime; /* Job end time [secs since 1970] */ + uint64_t ac_corehimem; /* Hiwater core mem [Kbytes] */ + uint64_t ac_virthimem; /* Hiwater virt mem [Kbytes] */ +/* CPU resource usage information. */ + uint64_t ac_utime; /* User CPU time [usec] */ + uint64_t ac_stime; /* System CPU time [usec] */ + uint32_t ac_spare; +}; + +/* + * Accounting configuration uname structure + * This structure is part of the acctcfg record. + */ +struct ac_utsname +{ + char sysname[26]; + char nodename[26]; + char release[42]; + char version[41]; + char machine[26]; +}; + +/* + * Accounting configuration record + * Written for accounting configuration changes. + */ +typedef enum +{ + AC_CONFCHG_BOOT, /* Boot time (always first) */ + AC_CONFCHG_FILE, /* Reporting pacct file change */ + AC_CONFCHG_ON, /* Reporting xxx ON */ + AC_CONFCHG_OFF, /* Reporting xxx OFF */ + AC_CONFCHG_INC_DELTA, /* Report incremental acct clock delta change */ + AC_CONFCHG_INC_EVENT, /* Report incremental accounting event */ + AC_CONFCHG_MAX +} ac_eventtype; + +struct acctcfg +{ + struct achead ac_hdr; /* Header */ + unsigned int ac_kdmask; /* Kernel and daemon config mask */ + unsigned int ac_rmask; /* Record configuration mask */ + int64_t ac_uptimelen; /* Bytes from the end of the boot + record to the next boot record */ + ac_eventtype ac_event:8; /* Accounting configuration event */ + unsigned int :24; /* Unused */ + time_t ac_boottime; /* System boot time [secs since 1970]*/ + time_t ac_curtime; /* Current time [secs since 1970] */ + struct ac_utsname ac_uname; /* Condensed uname information */ +}; + + +/* + * Accounting control status values. + */ +typedef enum +{ + ACS_OFF, /* Accounting stopped for this entry */ + ACS_ERROFF, /* Accounting turned off by kernel */ + ACS_ON /* Accounting started for this entry */ +} ac_status; + +/* + * Function codes for acctctl(int, void *) system call. + */ +typedef enum +{ + AC_START, /* Start kernel, daemon, or record accounting */ + AC_STOP, /* Stop kernel, daemon, or record accounting */ + AC_HALT, /* Stop all kernel, daemon, and record accounting */ + AC_CHECK, /* Check a kernel, daemon, or record accounting state*/ + AC_KDSTAT, /* Check all kernel & daemon accounting states */ + AC_RCDSTAT, /* Check all record accounting states */ + AC_JASTART, /* Start user job accounting */ + AC_JASTOP, /* Stop user job accounting */ + AC_WRACCT, /* Write accounting record for daemon */ + AC_AUTH, /* Verify executing user is authorized */ + AC_INCACCT, /* Control incremental accounting */ + AC_MREQ +} ac_request; + +/* + * Define the acctctl(int, void *) accounting record indices. + */ +typedef enum +{ + ACCT_KERN_CSA, /* Kernel CSA accounting */ + ACCT_KERN_JOB_PROC, /* Kernel job process summary accounting */ + ACCT_KERN_ASH, /* Kernel array session summary accounting */ + ACCT_DMD_NQS, /* Daemon NQS accounting */ + ACCT_DMD_WKMG, /* Daemon workload management (i.e. LSF) acct */ + ACCT_DMD_TAPE, /* Daemon tape accounting */ + ACCT_DMD_DMIG, /* Daemon data migration accounting */ + ACCT_DMD_SOCKET, /* Daemon socket accounting */ + ACCT_DMD_SITE1, /* Site reserved daemon acct */ + ACCT_DMD_SITE2, /* Site reserved daemon acct */ + ACCT_MAXKDS, /* Max # kernel and daemon entries */ + + ACCT_RCD_MPPDET, /* Record acct for MPP detail exit info */ + ACCT_RCD_MEM, /* Record acct for memory */ + ACCT_RCD_IO, /* Record acct for input/output */ + ACCT_RCD_MT, /* Record acct for multi-tasking */ + ACCT_RCD_MPP, /* Record acct for MPP accumulated info */ + ACCT_THD_MEM, /* Record acct for memory size threshhold */ + ACCT_THD_TIME, /* Record acct for CPU time threshhold */ + ACCT_RCD_INCACCT, /* Record acct for incremental accounting */ + ACCT_RCD_APPACCT, /* Record acct for application accounting */ + ACCT_RCD_SITE1, /* Site reserved record acct */ + ACCT_RCD_SITE2, /* Site reserved record acct */ + ACCT_MAXRCDS /* Max # record entries */ +} ac_kdrcd; + +#define ACCT_RCDS ACCT_RCD_MPPDET /* Record acct low range definition */ +#define NUM_KDS (ACCT_MAXKDS - ACCT_KERN_CSA) +#define NUM_RCDS (ACCT_MAXRCDS - ACCT_RCDS) +#define NUM_KDRCDS (NUM_KDS + NUM_RCDS) + + +/* + * The following structures are used by the acctctl system call. + */ + +/* + * Accounting entry status structure + */ +struct actstat +{ + ac_kdrcd ac_ind; /* Entry index */ + ac_status ac_state; /* Entry status */ + int64_t ac_param; /* Entry parameter */ +}; + +/* + * Accounting control and status structure + */ +#define ACCT_PATH 128 /* Max path length for accounting file */ + +struct actctl +{ + int ac_sttnum; /* Number of status array entries */ + char ac_path[ACCT_PATH]; /* Path name for accounting file */ + struct actstat ac_stat[NUM_KDRCDS]; /* Entry status array */ +}; + +/* + * Function codes for incremental accounting; currently not used + */ +typedef enum +{ + IA_NONE, /* Zero entry place holder */ + IA_DELTA, /* Change clock delta for incremental accounting */ + IA_EVENT, /* Cause incremental accounting event now */ + IA_MAX +} ac_iafnc; + +/* + * Incremental accounting structure; currently not used + */ +struct actinc +{ + int ac_ind; /* Entry index */ + ac_iafnc ac_fnc; /* Entry function */ + int64_t ac_param; /* Entry parameter */ +}; + +/* + * Daemon write accounting structure + */ +#define MAX_WRACCT 1024 /* Maximum buffer size of wracct() */ + +struct actwra +{ + int ac_did; /* Daemon index */ + int ac_len; /* Length of buffer (bytes) */ + uint64_t ac_jid; /* Job ID */ + char *ac_buf; /* Daemon accounting buffer */ +}; + +#ifndef __KERNEL__ +extern int acctctl(int func, void *act); +#endif + +#endif /* _LINUX_CSA_H */ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/