Hi all,
Attached is a non-perfect patch for review. This is a migration of the wineserver to use epoll instead of poll (if it's available).
current known issue with this patch:
1. Will not compile if HAVE_SYS_EPOLL_H is not 1 (i.e. - won't compile if epoll not available at compile time)
2. Segfaults on wine exit.
3. Lots of debug asserts.
Comments welcome. Shachar
-- Shachar Shemesh Lingnu Open Source Consulting ltd. http://www.lingnu.com/
Index: configure =================================================================== RCS file: /home/sun/sources/cvs/wine/configure,v retrieving revision 1.573 diff -u -r1.573 configure --- configure 17 Jul 2004 00:52:37 -0000 1.573 +++ configure 24 Aug 2004 12:03:32 -0000 @@ -6727,6 +6727,79 @@ fi +echo "$as_me:$LINENO: checking for epoll_create in -lepoll" >&5 +echo $ECHO_N "checking for epoll_create in -lepoll... $ECHO_C" >&6 +if test "${ac_cv_lib_epoll_epoll_create+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lepoll $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char epoll_create (); +int +main () +{ +epoll_create (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_epoll_epoll_create=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_epoll_epoll_create=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +echo "$as_me:$LINENO: result: $ac_cv_lib_epoll_epoll_create" >&5 +echo "${ECHO_T}$ac_cv_lib_epoll_epoll_create" >&6 +if test $ac_cv_lib_epoll_epoll_create = yes; then + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBEPOLL 1 +_ACEOF + + LIBS="-lepoll $LIBS" + +fi + + echo "$as_me:$LINENO: checking for res_9_init in -lresolv" >&5 echo $ECHO_N "checking for res_9_init in -lresolv... $ECHO_C" >&6 if test "${ac_cv_lib_resolv_res_9_init+set}" = set; then @@ -16190,6 +16263,7 @@ + for ac_func in \ _lwp_create \ _lwp_self \ @@ -16202,6 +16276,7 @@ _vsnprintf \ chsize \ clone \ + epoll_create \ finite \ fpclass \ fstatfs \ @@ -16435,6 +16510,7 @@ + for ac_header in \ arpa/inet.h \ arpa/nameser.h \ @@ -16495,6 +16571,7 @@ sys/msg.h \ sys/param.h \ sys/poll.h \ + sys/epoll.h \ sys/ptrace.h \ sys/reg.h \ sys/scsiio.h \ Index: configure.ac =================================================================== RCS file: /home/sun/sources/cvs/wine/configure.ac,v retrieving revision 1.285 diff -u -r1.285 configure.ac --- configure.ac 6 Jul 2004 21:01:19 -0000 1.285 +++ configure.ac 24 Aug 2004 12:01:22 -0000 @@ -140,6 +140,8 @@ AC_CHECK_LIB(xpg4,_xpg4_setrunelocale) dnl Check for -lpoll for Mac OS X/Darwin AC_CHECK_LIB(poll,poll) +dnl Check for -lepoll +AC_CHECK_LIB(epoll,epoll_create) dnl Check for -lresolv for Mac OS X/Darwin AC_CHECK_LIB(resolv,res_9_init) dnl Check for -lpthread @@ -1036,6 +1038,7 @@ _vsnprintf \ chsize \ clone \ + epoll_create \ finite \ fpclass \ fstatfs \ @@ -1146,6 +1149,7 @@ sys/msg.h \ sys/param.h \ sys/poll.h \ + sys/epoll.h \ sys/ptrace.h \ sys/reg.h \ sys/scsiio.h \ Index: include/config.h.in =================================================================== RCS file: /home/sun/sources/cvs/wine/include/config.h.in,v retrieving revision 1.190 diff -u -r1.190 config.h.in --- include/config.h.in 18 Jun 2004 19:36:26 -0000 1.190 +++ include/config.h.in 24 Aug 2004 12:05:42 -0000 @@ -80,6 +80,9 @@ /* Define to 1 if you have the <elf.h> header file. */ #undef HAVE_ELF_H +/* Define to 1 if you have the `epoll_create' function. */ +#undef HAVE_EPOLL_CREATE + /* Define to 1 if you have the `finite' function. */ #undef HAVE_FINITE @@ -248,6 +251,9 @@ /* Define if you have the curses library (-lcurses) */ #undef HAVE_LIBCURSES +/* Define to 1 if you have the `epoll' library (-lepoll). */ +#undef HAVE_LIBEPOLL + /* Define if you have libgif/libungif including devel headers */ #undef HAVE_LIBGIF @@ -608,6 +614,9 @@ /* Define to 1 if you have the <sys/elf32.h> header file. */ #undef HAVE_SYS_ELF32_H +/* Define to 1 if you have the <sys/epoll.h> header file. */ +#undef HAVE_SYS_EPOLL_H + /* Define to 1 if you have the <sys/errno.h> header file. */ #undef HAVE_SYS_ERRNO_H Index: server/fd.c =================================================================== RCS file: /home/sun/sources/cvs/wine/server/fd.c,v retrieving revision 1.21 diff -u -r1.21 fd.c --- server/fd.c 22 May 2004 03:15:04 -0000 1.21 +++ server/fd.c 24 Aug 2004 21:20:53 -0000 @@ -33,6 +33,12 @@ #ifdef HAVE_SYS_POLL_H #include <sys/poll.h> #endif +#ifdef HAVE_SYS_EPOLL_H +#include <sys/epoll.h> +#endif +#ifdef HAVE_SYS_SYSCALL_H +#include <sys/syscall.h> +#endif #include <sys/stat.h> #include <sys/time.h> #include <sys/types.h> @@ -48,6 +54,9 @@ #include "winreg.h" #include "winternl.h" +#undef NDEBUG +#include <assert.h> + /* Because of the stupid Posix locking semantics, we need to keep * track of all file descriptors referencing a given file, and not * close a single one until all the locks are gone (sigh). @@ -255,51 +264,107 @@ /****************************************************************/ -/* poll support */ +/* poll and epoll support */ + +/* Vars used by both */ +static int active_users; /* current number of active users */ +/* Vars used by poll */ static struct fd **poll_users; /* users array */ static struct pollfd *pollfd; /* poll fd array */ static int nb_users; /* count of array entries actually in use */ -static int active_users; /* current number of active users */ static int allocated_users; /* count of allocated entries in the array */ static struct fd **freelist; /* list of free entries in the array */ +/* Vars used by epoll */ +static int epoll_fd=-1; /* File descriptor for the epoll */ +static const int EPOLL_SIZE=1024; /* "Maximal size" passed to epoll_create. This is an optimization, not a real maximum */ +static struct epoll_event *epoll_revents; /* Array for event results */ +static int epoll_revents_size; /* Number of elements in epoll_revents */ + +#if defined(HAVE_EPOLL_CREATE) && defined(HAVE_SYS_SYSCALL_H) +/* To prevent wine from not loading on non-epoll supporting platforms, + * we need to dynamically call the epoll functions */ +static inline int _epoll_create( int size ) +{ + return syscall(__NR_epoll_create, size); +} + +static inline int _epoll_ctl(int epfd, int op, int fd, struct epoll_event *event) +{ + return syscall(__NR_epoll_ctl, epfd, op, fd, event); +} + +static inline int _epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout) +{ + return syscall(__NR_epoll_wait, epfd, events, maxevents, timeout); +} +#else +static inline int _epoll_create( int size ) +{ + errno=ENOSYS; + return -1; +} + +static inline int _epoll_ctl(int epfd, int op, int fd, struct epoll_event *event) +{ + errno=ENOSYS; + return -1; +} + +static inline int _epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout) +{ + errno=ENOSYS; + return -1; +} +#endif + + /* add a user in the poll array and return its index, or -1 on failure */ static int add_poll_user( struct fd *fd ) { int ret; - if (freelist) + if( epoll_fd>=0 ) /* We are using the epoll interface */ { - ret = freelist - poll_users; - freelist = (struct fd **)poll_users[ret]; + /* No poll array when using epoll - always return 1 */ + ret=1; } else - { - if (nb_users == allocated_users) + { /* We are using the poll interface */ + if (freelist) { - struct fd **newusers; - struct pollfd *newpoll; - int new_count = allocated_users ? (allocated_users + allocated_users / 2) : 16; - if (!(newusers = realloc( poll_users, new_count * sizeof(*poll_users) ))) return -1; - if (!(newpoll = realloc( pollfd, new_count * sizeof(*pollfd) ))) + ret = freelist - poll_users; + freelist = (struct fd **)poll_users[ret]; + } + else + { + if (nb_users == allocated_users) { - if (allocated_users) - poll_users = newusers; - else - free( newusers ); - return -1; + struct fd **newusers; + struct pollfd *newpoll; + int new_count = allocated_users ? (allocated_users + allocated_users / 2) : 16; + if (!(newusers = realloc( poll_users, new_count * sizeof(*poll_users) ))) return -1; + if (!(newpoll = realloc( pollfd, new_count * sizeof(*pollfd) ))) + { + if (allocated_users) + poll_users = newusers; + else + free( newusers ); + return -1; + } + poll_users = newusers; + pollfd = newpoll; + allocated_users = new_count; } - poll_users = newusers; - pollfd = newpoll; - allocated_users = new_count; - } - ret = nb_users++; - } - pollfd[ret].fd = -1; - pollfd[ret].events = 0; - pollfd[ret].revents = 0; - poll_users[ret] = fd; + ret = nb_users++; + } + pollfd[ret].fd = -1; + pollfd[ret].events = 0; + pollfd[ret].revents = 0; + poll_users[ret] = fd; + } active_users++; + return ret; } @@ -307,15 +372,37 @@ static void remove_poll_user( struct fd *fd, int user ) { assert( user >= 0 ); - assert( poll_users[user] == fd ); - pollfd[user].fd = -1; - pollfd[user].events = 0; - pollfd[user].revents = 0; - poll_users[user] = (struct fd *)freelist; - freelist = &poll_users[user]; + if( epoll_fd>=0 ) + { + if( fd->unix_fd!=-1 ) + { + struct epoll_event event; /* This struct needn't actually contain anything meaningful, as we are merely calling DEL */ + if(!(_epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd->unix_fd, &event)==0 || errno==ENOENT)) + { + int err; + err=errno; + } + } + } + else + { + assert( poll_users[user] == fd ); + pollfd[user].fd = -1; + pollfd[user].events = 0; + pollfd[user].revents = 0; + poll_users[user] = (struct fd *)freelist; + freelist = &poll_users[user]; + } active_users--; } +/* initialize the fd related state */ +void init_fd() +{ + /* If this function fails, epoll_fd will be -1, which will signal + * the rest of the functions to use poll instead */ + epoll_fd=_epoll_create(EPOLL_SIZE); +} /* server main poll() loop */ void main_loop(void) @@ -341,16 +428,44 @@ } if (!active_users) break; /* last user removed by a timeout */ } - ret = poll( pollfd, nb_users, diff ); - if (ret > 0) + if( epoll_fd>=0 ) { - int i; - for (i = 0; i < nb_users; i++) + /* if the event handlers added a lot of new events, we may want to increase the returned + * events space */ + if( epoll_revents_size<active_users ) { - if (pollfd[i].revents) + /* If epoll_revents_size is below active_users, there is a (very slim) potential for starvation. + * It is not, strictly speaking, an error though. As such, if the allocation fails, we just go on + * with the smaller array size. */ + struct epoll_event *new_revents=malloc(sizeof(struct epoll_event)*active_users); + if( new_revents!=NULL ) { - fd_poll_event( poll_users[i], pollfd[i].revents ); - if (!--ret) break; + free(epoll_revents); + epoll_revents=new_revents; + epoll_revents_size=active_users; + } + } + + ret = epoll_wait(epoll_fd, epoll_revents, epoll_revents_size, diff ); + while( (ret--)>0 ) + { + fd_poll_event((struct fd *)epoll_revents[ret].data.ptr, epoll_revents[ret].events); + } + } + else + { + /* No epoll support - use poll */ + ret = poll( pollfd, nb_users, diff ); + if (ret > 0) + { + int i; + for (i = 0; i < nb_users; i++) + { + if (pollfd[i].revents) + { + fd_poll_event( poll_users[i], pollfd[i].revents ); + if (!--ret) break; + } } } } @@ -835,18 +950,46 @@ /* set the events that select waits for on this fd */ void set_fd_events( struct fd *fd, int events ) { - int user = fd->poll_index; - assert( poll_users[user] == fd ); - if (events == -1) /* stop waiting on this fd completely */ + if( epoll_fd>=0 ) { - pollfd[user].fd = -1; - pollfd[user].events = POLLERR; - pollfd[user].revents = 0; + struct epoll_event event; + event.events=events; + event.data.ptr=fd; + + assert(fd->unix_fd>=0); + + if( events != -1 ) + { + int epoll_res; + if( (epoll_res=_epoll_ctl(epoll_fd, EPOLL_CTL_MOD, fd->unix_fd, &event )!=0) && errno==ENOENT ) + { + /* For one reason or another this file descriptor isn't being watched at the moment. Just add it */ + assert(_epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd->unix_fd, &event )==0); + } + else + assert(epoll_res==0); + } + else + { + /* Stop waiting on this fd completely */ + assert(_epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd->unix_fd, &event )==0); + } } - else if (pollfd[user].fd != -1 || !pollfd[user].events) + else { - pollfd[user].fd = fd->unix_fd; - pollfd[user].events = events; + int user = fd->poll_index; + assert( poll_users[user] == fd ); + if (events == -1) /* stop waiting on this fd completely */ + { + pollfd[user].fd = -1; + pollfd[user].events = POLLERR; + pollfd[user].revents = 0; + } + else if (pollfd[user].fd != -1 || !pollfd[user].events) + { + pollfd[user].fd = fd->unix_fd; + pollfd[user].events = events; + } } } Index: server/file.h =================================================================== RCS file: /home/sun/sources/cvs/wine/server/file.h,v retrieving revision 1.16 diff -u -r1.16 file.h --- server/file.h 16 Apr 2004 04:31:35 -0000 1.16 +++ server/file.h 24 Aug 2004 13:34:21 -0000 @@ -44,6 +44,7 @@ /* file descriptor functions */ +extern void init_fd(); extern struct fd *alloc_fd( const struct fd_ops *fd_user_ops, struct object *user ); extern struct fd *open_fd( struct fd *fd, const char *name, int flags, mode_t *mode, unsigned int access, unsigned int sharing, unsigned int options ); Index: server/main.c =================================================================== RCS file: /home/sun/sources/cvs/wine/server/main.c,v retrieving revision 1.31 diff -u -r1.31 main.c --- server/main.c 26 Mar 2003 01:32:18 -0000 1.31 +++ server/main.c 24 Aug 2004 15:49:33 -0000 @@ -122,6 +122,7 @@ signal( SIGTERM, sigterm_handler ); signal( SIGABRT, sigterm_handler ); + init_fd(); sock_init(); open_master_socket(); sync_namespace = create_namespace( 37, TRUE );