Monty, Apparently you pushed this patch into 10.0, even though I explained that it is incorrect, and why. That's not cool, and you can even see it failing in Buildbot now.
Can you please fix it ASAP? - Kristian. Kristian Nielsen <[email protected]> writes: > Monty writes: > >> Can you please check if this looks correct? > >> diff --git a/sql/mdl.cc b/sql/mdl.cc >> index 28d2006..3b1f9f2 100644 >> --- a/sql/mdl.cc >> +++ b/sql/mdl.cc >> @@ -17,6 +17,7 @@ >> #include "sql_class.h" >> #include "debug_sync.h" >> #include "sql_array.h" >> +#include "rpl_rli.h" >> #include <hash.h> >> #include <mysqld_error.h> >> #include <mysql/plugin.h> >> @@ -442,6 +443,7 @@ class MDL_lock >> virtual void notify_conflicting_locks(MDL_context *ctx) = 0; >> >> virtual bitmap_t hog_lock_types_bitmap() const = 0; >> + bool check_if_conflicting_replication_locks(MDL_context *ctx); > > This should probably be #ifdef NDEBUG, since you're only using it in debug > build? > >> >> /** List of granted tickets for this lock. */ >> Ticket_list m_granted; >> @@ -2290,6 +2292,44 @@ void >> MDL_scoped_lock::notify_conflicting_locks(MDL_context *ctx) >> } >> } >> >> +/** >> + Check if there is any conflicting lock that could cause this thread >> + to wait for another thread which is not ready to commit. >> + This is always an error, as the upper level of parallel replication >> + should not allow a scheduling of a conflicting DDL until all earlier >> + transactions has commited. >> + >> + This function is only called for a slave using parallel replication >> + and trying to get an exclusive lock for the table. >> +*/ >> + >> +bool MDL_lock::check_if_conflicting_replication_locks(MDL_context *ctx) >> +{ >> + Ticket_iterator it(m_granted); >> + MDL_ticket *conflicting_ticket; >> + >> + while ((conflicting_ticket= it++)) >> + { >> + if (conflicting_ticket->get_ctx() != ctx) >> + { >> + MDL_context *conflicting_ctx= conflicting_ticket->get_ctx(); >> + >> + /* >> + If the conflicting thread is another parallel replication >> + thread for the same master and it's not in commit stage, then >> + the current transaction has started too early and something is >> + seriously wrong. >> + */ >> + if (conflicting_ctx->get_thd()->rgi_slave && >> + conflicting_ctx->get_thd()->rgi_slave->rli == >> + ctx->get_thd()->rgi_slave->rli && >> + !conflicting_ctx->get_thd()->rgi_slave->did_mark_start_commit) >> + return 1; // Fatal error > > No, this is not correct. For example, the threads might be in different > domains, or using different multi-source connections. You need a check like > in thd_report_wait_for(). _If_ T1 and T2 are in the same parallel > replication domain, _and_ T1 goes before T2, _and_ there is a DDL lock > conflict but T1 has not started to commit, then it indicates a potential > problem. > > Also, the comments are too vague, "Something is seriously wrong" is not a > good description. What is wrong? Please describe the issue being checked > more precisely. What is the role of each of the two threads involved? What > is the situation being checked? Why is it wrong? > > Also, use some temporaries for conflicting_ctx->get_thd()->rgi_slave etc, so > the condition becomes more readable. > >> + } >> + } >> + return 0; >> +} >> + >> >> /** >> Acquire one lock with waiting for conflicting locks to go away if needed. >> @@ -2355,6 +2395,11 @@ MDL_context::acquire_lock(MDL_request *mdl_request, >> ulong lock_wait_timeout) >> if (lock->needs_notification(ticket) && lock_wait_timeout) >> lock->notify_conflicting_locks(this); >> >> + DBUG_ASSERT(mdl_request->type != MDL_INTENTION_EXCLUSIVE || >> + !(get_thd()->rgi_slave && >> + get_thd()->rgi_slave->is_parallel_exec && >> + lock->check_if_conflicting_replication_locks(this))); >> + >> mysql_prlock_unlock(&lock->m_rwlock); >> >> will_wait_for(ticket); >> diff --git a/sql/mdl.h b/sql/mdl.h >> index c4d792a..13de602 100644 >> --- a/sql/mdl.h >> +++ b/sql/mdl.h >> @@ -910,7 +910,6 @@ class MDL_context >> */ >> MDL_wait_for_subgraph *m_waiting_for; >> private: >> - THD *get_thd() const { return m_owner->get_thd(); } >> MDL_ticket *find_ticket(MDL_request *mdl_req, >> enum_mdl_duration *duration); >> void release_locks_stored_before(enum_mdl_duration duration, MDL_ticket >> *sentinel); >> @@ -919,6 +918,7 @@ class MDL_context >> MDL_ticket **out_ticket); >> >> public: >> + THD *get_thd() const { return m_owner->get_thd(); } >> void find_deadlock(); >> >> ulong get_thread_id() const { return thd_get_thread_id(get_thd()); } > > - Kristian. > > _______________________________________________ > Mailing list: https://launchpad.net/~maria-developers > Post to : [email protected] > Unsubscribe : https://launchpad.net/~maria-developers > More help : https://help.launchpad.net/ListHelp _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : [email protected] Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp

