User: patriot1burke Date: 01/06/11 12:52:26 Modified: src/main/org/jboss/ejb/plugins EntitySynchronizationInterceptor.java EntityInstanceInterceptor.java EntityInstanceCache.java AbstractInstanceCache.java Log: race condition: - When a rollback happens within a transaction, InstanceSynchronization.afterCompletion would call ctx.setTransaction(null) and would not protect the EntityEnterpriseContext with a mutex lock. There it was quite possible for another thread waiting for the entity bean to leave it's transaction to wakeup, do a cache.get(id) before afterCompletion did a cache.remove and an InstancePool.free.(I actually wrote a little test that did a sleep after ctx.setTransaction(null) to verify this). Really bad things can happen like the context being put back into the instance pool while another thread thinks the context is still valid. invalid mutexes: - When an EntityInstanceCache.remove is called, the Entity's mutex is disassociated with the id of the Entity. Thus, all threads waiting on the same bean will be locking on an invalid mutex if EntityInstanceCache.remove is called. ctx.setTransaction not synchronized - setTransaction is called within EntityContainer.ContainerInterceptor. The mutex of the Entity is NOT acquire before this is done, therefore, with re-entrant beans, you can possibly have 2 transaction working on the same entity instance. Too many LOCKING-WAITING messages: - You server.log file can grow to 100 megabytes or more very quickly when beans are in contention. FIXES: - Wherever there is an EntityInstanceCache.remove(), I make sure that I acquire the mutex of the entity before doing this. - I added the ability to invalidate a mutex. When AbstractInstanceCache.removeLock is called, the mutex is flagged as invalid. EntityInstanceInterceptor checks for this and does a re-lookup of the mutex. - I do ctx.setTransaction within the EntityInstanceInterceptor to ensure that a transactional lock happens. THis will fix the re-entrant bean problem I discussed earlier. - I put in some code in EntityInstanceInterceptor so that LOCKING-WAITING messages don't happen continuously. - Put in a Thread.yield() in the infamous EntityInstanceInterceptor do..while loop. - Added an extension of org.jboss.util.Semaphore, BeanSemaphore, for invalidation of mutexes. - Added an id check in EntityInstanceCache.get to make sure that the key you're looking up on is equal the the EntityEnterpriseContext.getId() Revision Changes Path 1.35 +23 -5 jboss/src/main/org/jboss/ejb/plugins/EntitySynchronizationInterceptor.java Index: EntitySynchronizationInterceptor.java =================================================================== RCS file: /cvsroot/jboss/jboss/src/main/org/jboss/ejb/plugins/EntitySynchronizationInterceptor.java,v retrieving revision 1.34 retrieving revision 1.35 diff -u -r1.34 -r1.35 --- EntitySynchronizationInterceptor.java 2001/06/05 05:11:22 1.34 +++ EntitySynchronizationInterceptor.java 2001/06/11 19:52:26 1.35 @@ -37,6 +37,7 @@ import org.jboss.ejb.MethodInvocation; import org.jboss.metadata.ConfigurationMetaData; import org.jboss.logging.Logger; +import org.jboss.util.Sync; /** * This container filter takes care of EntityBean persistance. @@ -49,7 +50,7 @@ * @see <related> * @author Rickard Öberg ([EMAIL PROTECTED]) * @author <a href="mailto:[EMAIL PROTECTED]">Marc Fleury</a> -* @version $Revision: 1.34 $ +* @version $Revision: 1.35 $ */ public class EntitySynchronizationInterceptor extends AbstractInterceptor @@ -411,22 +412,39 @@ // If rolled back -> invalidate instance // If removed -> send back to the pool if (status == Status.STATUS_ROLLEDBACK || ctx.getId() == null) { - + EntityContainer container = (EntityContainer)ctx.getContainer(); + AbstractInstanceCache cache = (AbstractInstanceCache)container.getInstanceCache(); + Object id = ctx.getCacheKey(); + // Hopefully this transaction has an exclusive lock + // on this id so that cache.getLock returns a mutex that is being + // used by every other thread on this id. + Sync mutex = (Sync)cache.getLock(id); try { - + // We really should be acquiring a mutex before + // mucking with the InstanceCache or InstancePool + mutex.acquire(); // finish the transaction association ctx.setTransaction(null); // remove from the cache + // removing from the cache should also invalidate the mutex thus waking up + // other threads. container.getInstanceCache().remove(ctx.getCacheKey()); // return to pool - container.getInstancePool().free(ctx); + // REVISIT: FIXME: + // We really should only let passivation free an instance because it is + // quite possible that another thread is working with + // the same context, but let's do it anyways. + container.getInstancePool().free(ctx); } catch (Exception e) { // Ignore } - + finally + { + mutex.release(); + } } else { // We are afterCompletion so the invoked can be set to false (db sync is done) 1.29 +198 -107 jboss/src/main/org/jboss/ejb/plugins/EntityInstanceInterceptor.java Index: EntityInstanceInterceptor.java =================================================================== RCS file: /cvsroot/jboss/jboss/src/main/org/jboss/ejb/plugins/EntityInstanceInterceptor.java,v retrieving revision 1.28 retrieving revision 1.29 diff -u -r1.28 -r1.29 --- EntityInstanceInterceptor.java 2001/01/12 00:05:53 1.28 +++ EntityInstanceInterceptor.java 2001/06/11 19:52:26 1.29 @@ -45,7 +45,7 @@ * @author Rickard Öberg ([EMAIL PROTECTED]) * @author <a href="mailto:[EMAIL PROTECTED]">Marc Fleury</a> * @author <a href="mailto:[EMAIL PROTECTED]">Sebastien Alborini</a> -* @version $Revision: 1.28 $ +* @version $Revision: 1.29 $ */ public class EntityInstanceInterceptor extends AbstractInterceptor @@ -106,22 +106,44 @@ // Get cache AbstractInstanceCache cache = (AbstractInstanceCache)container.getInstanceCache(); - Sync mutex = (Sync)cache.getLock(key); + BeanSemaphore mutex = (BeanSemaphore)cache.getLock(key); EnterpriseContext ctx = null; + boolean exceptionThrown = false; try { + boolean waitingOnTransaction = false; // So we don't output LOCKING-WAITING all the time + boolean waitingOnContext = false; // So we don't output LOCKING-WAITING all the time do { if (mi.getTransaction() != null && mi.getTransaction().getStatus() == Status.STATUS_MARKED_ROLLBACK) throw new RuntimeException("Transaction marked for rollback, possibly a timeout"); - try - { - - mutex.acquire(); - + try + { + + mutex.acquire(); + + // This loop guarantees a mutex lock for the specified object id + // When a cache.remove happens, it disassociates the mutex from the bean's id, + // Thus, the mutex in this code could be invalid. We avoid this problem with the following loop. + // + // Also we should have a while loop so we don't mess up the finally clause that does + // mutex.release() + // + while (!mutex.isValid()) + { + BeanSemaphore newmutex = (BeanSemaphore)cache.getLock(key); + mutex.release(); + mutex = newmutex; + + // Avoid infinite loop. + if (mi.getTransaction() != null && mi.getTransaction().getStatus() == Status.STATUS_MARKED_ROLLBACK) + throw new RuntimeException("Transaction marked for rollback, possibly a timeout"); + + mutex.acquire(); + } // Get context ctx = cache.get(key); @@ -133,50 +155,95 @@ { // Let's put the thread to sleep a lock release will wake the thread // Possible deadlock - Logger.debug("LOCKING-WAITING (TRANSACTION) for id "+ctx.getId()+" ctx.hash "+ctx.hashCode()+" tx:"+((tx == null) ? "null" : tx.toString())); + if (!waitingOnTransaction) + { + Logger.debug("LOCKING-WAITING (TRANSACTION) in Thread " + + Thread.currentThread().getName() + + " for id "+ctx.getId()+" ctx.hash "+ctx.hashCode() + +" tx:"+((tx == null) ? "null" : tx.toString())); + waitingOnTransaction = true; + } // Try your luck again ctx = null; + Thread.yield(); // Give the OS some help. continue; } - else - { - // If we get here it's the right tx, or no tx - if (!ctx.isLocked()) - { - //take it! - ctx.lock(); - } - else - { - if (!isCallAllowed(mi)) { - - // Go to sleep and wait for the lock to be released - // This is not one of the "home calls" so we need to wait for the lock - - // Possible deadlock - Logger.debug("LOCKING-WAITING (CTX) for id "+ctx.getId()+" ctx.hash "+ctx.hashCode()); - - // Try your luck again - ctx = null; - continue; - // Not allowed reentrant call - //throw new RemoteException("Reentrant call"); - } - else - { + if (waitingOnTransaction) + { + Logger.debug("FINISHED-LOCKING-WAITING (TRANSACTION) in Thread " + + Thread.currentThread().getName() + + " for id "+ctx.getId() + +" ctx.hash "+ctx.hashCode() + +" tx:"+((tx == null) ? "null" : tx.toString())); + waitingOnTransaction = false; + } + // OK so we now know that for this PrimaryKey, no other + // thread has a transactional lock on the bean. + // So, let's setTransaction of the ctx here instead of in later code. + // I really don't understand why it wasn't done here anyways before. + // Later on, in the finally clause, I'll check to see if the ctx was + // invoked upon and if not, dissassociate the transaction with the ctx. + // If there is no transactional assocation here, there is a race condition + // for re-entrant entity beans, so don't remove the code below. + // + // If this "waitingOnTransaction" loop is ever removed in favor of + // something like using db locks instead, this transactional assocation + // must be removed. + if (mi.getTransaction() != null && tx != null && !tx.equals(mi.getTransaction())) + { + // Do transactional "lock" on ctx right now! + ctx.setTransaction(mi.getTransaction()); + } + // If we get here it's the right tx, or no tx + if (!ctx.isLocked()) + { + //take it! + ctx.lock(); + } + else + { + if (!isCallAllowed(mi)) + { + // Go to sleep and wait for the lock to be released + // This is not one of the "home calls" so we need to wait for the lock + + // Possible deadlock + if (!waitingOnContext) + { + Logger.debug("LOCKING-WAITING (CTX) in Thread " + + Thread.currentThread().getName() + + " for id "+ctx.getId() + +" ctx.hash "+ctx.hashCode()); + waitingOnContext = true; + } + + // Try your luck again + ctx = null; + Thread.yield(); // Help out the OS. + continue; + } + else + { //We are in a home call so take the lock, take it! - ctx.lock(); - } - } - } + ctx.lock(); + } + } + if (waitingOnContext) + { + Logger.debug("FINISHED-LOCKING-WAITING (CTX) in Thread " + + Thread.currentThread().getName() + + " for id " + + ctx.getId() + + " ctx.hash " + ctx.hashCode()); + waitingOnContext = false; + } } - catch (InterruptedException ignored) {} - finally - { - mutex.release(); - } - + catch (InterruptedException ignored) {} + finally + { + mutex.release(); + } } while (ctx == null); // Set context on the method invocation @@ -188,79 +255,103 @@ } catch (RemoteException e) { - // Discard instance - // EJB 1.1 spec 12.3.1 - cache.remove(key); - + exceptionThrown = true; throw e; } catch (RuntimeException e) { - // Discard instance - // EJB 1.1 spec 12.3.1 - cache.remove(key); - + exceptionThrown = true; throw e; } catch (Error e) { - // Discard instance - // EJB 1.1 spec 12.3.1 - cache.remove(key); - + exceptionThrown = true; throw e; - } finally + } + finally { - // Logger.debug("Release instance for "+id); - - // ctx can be null if cache.get throws an Exception, for - // example when activating a bean. - if (ctx != null) + try + { + mutex.acquire(); + // Logger.debug("Release instance for "+id); + // ctx can be null if cache.get throws an Exception, for + // example when activating a bean. + if (ctx != null) + { + // unlock the context + ctx.unlock(); + + Transaction tx = ctx.getTransaction(); + if (tx != null + && mi.getTransaction() != null + && tx.equals(mi.getTransaction()) + && !((EntityEnterpriseContext)ctx).isInvoked()) + { + // The context has been associated with this method's transaction + // but the entity has not been invoked upon yet, so let's + // disassociate the transaction from the ctx. + // I'm doing this because I'm assuming that the bean hasn't been registered with + // the TxManager. + ctx.setTransaction(null); + } + + if (exceptionThrown) + { + // Discard instance + // EJB 1.1 spec 12.3.1 + // + cache.remove(key); + } + else if (ctx.getId() == null) + { + // Work only if no transaction was encapsulating this remove() + if (ctx.getTransaction() == null) { - try - { - mutex.acquire(); - - // unlock the context - ctx.unlock(); - - if (ctx.getId() == null) - { - - // Work only if no transaction was encapsulating this remove() - if (ctx.getTransaction() == null) - { - // Here we arrive if the bean has been removed and no - // transaction was associated with the remove, or if - // the bean has been passivated - - // Remove from cache - cache.remove(key); - - // It has been removed -> send to the pool - container.getInstancePool().free(ctx); - } - else - { - // We want to remove the bean, but it has a Tx associated with - // the remove() method. We remove it from the cache, to avoid - // that a successive insertion with same pk will break the - // cache. Anyway we don't free the context, since the tx must - // finish. The EnterpriseContext instance will be GC and not - // recycled. - cache.remove(key); - } - } - else - { - // Yeah, do nothing - } - } - catch (InterruptedException ignored) {} - finally - { - mutex.release(); - } + // Here we arrive if the bean has been removed and no + // transaction was associated with the remove, or if + // the bean has been passivated + + // Remove from cache + cache.remove(key); + + // It has been removed -> send to the pool + // REVISIT: FIXME: + // We really should only let passivation free an instance because it is + // quite possible that another thread is working with + // the same context, but let's do it anyways. + // + container.getInstancePool().free(ctx); } - } + else + { + // We want to remove the bean, but it has a Tx associated with + // the remove() method. We remove it from the cache, to avoid + // that a successive insertion with same pk will break the + // cache. Anyway we don't free the context, since the tx must + // finish. The EnterpriseContext instance will be GC and not + // recycled. + cache.remove(key); + } + } + } + else + { + /* + * This used to happen in the old code. + * Why? If the ctx is null, why should we remove it? Another thread could + * be screwed up by this. + if (exceptionThrown) + { + // Discard instance + // EJB 1.1 spec 12.3.1 + cache.remove(key); + } + */ + } + } + finally + { + mutex.release(); + } + } } // Private -------------------------------------------------------- 1.4 +19 -6 jboss/src/main/org/jboss/ejb/plugins/EntityInstanceCache.java Index: EntityInstanceCache.java =================================================================== RCS file: /cvsroot/jboss/jboss/src/main/org/jboss/ejb/plugins/EntityInstanceCache.java,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- EntityInstanceCache.java 2000/12/12 09:40:27 1.3 +++ EntityInstanceCache.java 2001/06/11 19:52:26 1.4 @@ -20,7 +20,7 @@ * Cache subclass for entity beans. * * @author Simone Bordet ([EMAIL PROTECTED]) - * @version $Revision: 1.3 $ + * @version $Revision: 1.4 $ */ public class EntityInstanceCache extends AbstractInstanceCache @@ -51,11 +51,24 @@ public EnterpriseContext get(Object id) throws RemoteException, NoSuchObjectException { - if (!(id instanceof CacheKey)) - { - throw new IllegalArgumentException("cache.get for entity beans must have a CacheKey object as argument instead of " + id); - } - return super.get(id); + if (!(id instanceof CacheKey)) + { + throw new IllegalArgumentException("cache.get for entity beans must have a CacheKey object as argument instead of " + id); + } + EnterpriseContext rtn = null; + rtn = super.get(id); + // + // FIXME: (Bill Burke) We were running into problems where CMP EntityBeans + // were out of sync with the database + // The problem went away after a few minutes of inactivity leading us to + // believe that the bean in cache was passivated and the CacheKey was out + // of sync with the Context's key. So I put this defensive check in to + // flag the problem. + if (rtn != null && !rtn.getId().equals(((CacheKey)id).id)) + { + throw new IllegalStateException("somehow the cache is out of synch with the context ctx.id != lookup.id"); + } + return rtn; } public void remove(Object id) { 1.7 +7 -3 jboss/src/main/org/jboss/ejb/plugins/AbstractInstanceCache.java Index: AbstractInstanceCache.java =================================================================== RCS file: /cvsroot/jboss/jboss/src/main/org/jboss/ejb/plugins/AbstractInstanceCache.java,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- AbstractInstanceCache.java 2001/03/26 12:38:59 1.6 +++ AbstractInstanceCache.java 2001/06/11 19:52:26 1.7 @@ -56,7 +56,7 @@ * </ul> * * @author Simone Bordet ([EMAIL PROTECTED]) - * @version $Revision: 1.6 $ + * @version $Revision: 1.7 $ */ public abstract class AbstractInstanceCache implements InstanceCache, XmlLoadable, Monitorable, MetricsConstants @@ -263,7 +263,7 @@ Sync mutex = (Sync)m_lockMap.get(id); if (mutex == null) { - mutex = new Semaphore(1); + mutex = new BeanSemaphore(1); m_lockMap.put(id, mutex); } return mutex; @@ -277,7 +277,11 @@ Object mutex = m_lockMap.get(id); if (mutex != null) { - m_lockMap.remove(id); + if (mutex instanceof BeanSemaphore) + { + ((BeanSemaphore)mutex).invalidate(); + } + m_lockMap.remove(id); } } _______________________________________________ Jboss-development mailing list [EMAIL PROTECTED] http://lists.sourceforge.net/lists/listinfo/jboss-development