Hi Sascha, On 05.08.2016 16:27, Sascha Steinbiss wrote: >> So I finally got round to submit my patches for MUMmer. One can be found >> in the repo, half of the other is attached to this mail. Unfortunately, >> the other half got lost in a code reindentation. Applying the changes >> requires refactoring src/tigr/sw_align.cc for the changes in the header. > > Not really sure I understand what you mean... the patches in the repo's > series all apply without a problem and the one attached to your mail > doesn't seem to do a lot. Are you sure it is complete? Or where is the > other half one would be required to refactor?
Right, the patch for sw_align.hh changes some structures and now the algorithms in sw_align.cc have to be adapted, too. The attached patch may give you an idea of the necessary changes. I may tackle the issue next week. >> If someone has a lot of spare time on the weekend, feel free to go ahead. > > Also, are the patches Debian-specific? Given that they don't seem to be > fixes but rather improve performance, I could imagine Stefan Kurtz > (MUMmer upstream) would probably appreciate receiving these patches as well. No, these are not Debian specific. However, the one commited patch applies to a tool which comes from mugsy, not mummer. If we get the other one cleaned up, we could drop Stefan a note. Best, Fabian
1,2d0 < #include <math.h> < #include <string.h> 3a2,3 > #include <math.h> > 17a18 > 21a23,26 > > > > 23,31c28,46 < static void generateDelta(const Diagonal *Diag, long int FinishCt, < long int FinishCDi, long int N, < vector<long int> &Delta); < < static inline unsigned char maxScore(const Node &); < < static inline long int scoreMatch(const Diagonal Diag, long int Dct, < long int CDi, const char *A, const char *B, < long int N, unsigned int m_o); --- > static void generateDelta > (const Diagonal * Diag, long int FinishCt, long int FinishCDi, > long int N, vector<long int> & Delta); > > > static inline Score * maxScore > (Score S[3]); > > > static inline long int scoreMatch > (const Diagonal Diag, long int Dct, long int CDi, > const char * A, const char * B, long int N, unsigned int m_o); > > > static inline void scoreEdit > (Score & curr, const long int del, const long int ins, const long int mat); > > > 33,34d47 < static inline Score scoreEdit(const long int del, const long int ins, < const long int mat); 37c50,51 < bool _alignEngine(const char *A0, long int Astart, long int &Aend, --- > bool _alignEngine > (const char * A0, long int Astart, long int & Aend, 53,65d66 < /*-- < The code in this function looks bad, but its performance is 'ok', as < far as I can tell. I tried to replace it with the Gotoh() implementation < from seqan, but that made the program *much* slower. It is probably due < to the break off defined by `_break_len` this cuts things short, if < there is no reasonable expectation to reach a decent alignment. < < -- Fabian Klötzl, 2016-03-08 < */ < < // most common modus operandi: Forward Align < < 108c109,110 < if (m_o & DIRECTION_BIT) { --- > if ( m_o & DIRECTION_BIT ) > { 113c115,117 < } else { --- > } > else > { 128,135c132,139 < Diag[0].I[0].values[DELETE] = min_score; < Diag[0].I[0].values[INSERT] = min_score; < Diag[0].I[0].values[MATCH] = 0; < Diag[0].I[0].maxi = MATCH; // = Diag[0].I[0].S + MATCH; < < Diag[0].I[0].used[DELETE] = NONE; < Diag[0].I[0].used[INSERT] = NONE; < Diag[0].I[0].used[MATCH] = START; --- > Diag[0] . I[0] . S[DELETE] . value = min_score; > Diag[0] . I[0] . S[INSERT] . value = min_score; > Diag[0] . I[0] . S[MATCH] . value = 0; > Diag[0] . I[0] . max = Diag[0] . I[0] . S + MATCH; > > Diag[0] . I[0] . S[DELETE] . used = NONE; > Diag[0] . I[0] . S[INSERT] . used = NONE; > Diag[0] . I[0] . S[MATCH] . used = START; 141,143c145,148 < for (Dct = 1; < Dct <= N + M && (Dct - FinishCt) <= _break_len && lbound <= rbound; < Dct++) { --- > for ( Dct = 1; Dct <= N + M && > (Dct - FinishCt) <= _break_len && > lbound <= rbound; Dct++ ) > { 145c150,151 < if (Dct >= Ll) { --- > if ( Dct >= Ll ) > { 147c153,154 < Diag = (Diagonal *)Safe_realloc(Diag, sizeof(Diagonal) * Ll); --- > Diag = (Diagonal *) Safe_realloc > ( Diag, sizeof(Diagonal) * Ll ); 155c162,163 < Diag[Dct].I = (Node *)Safe_malloc(Ds * sizeof(Node)); --- > Diag[Dct] . I = (Node *) Safe_malloc > ( Ds * sizeof(Node) ); 161c169,170 < if (Ds > MaxL) MaxL = Ds; --- > if ( Ds > MaxL ) > MaxL = Ds; 165c174,175 < if (Dct <= N) { --- > if ( Dct <= N ) > { 168c178,180 < } else { --- > } > else > { 182c194,195 < if (PPDct >= 0) { --- > if ( PPDct >= 0 ) > { 186c199,200 < } else --- > } > else 190c204,205 < if (m_o & FORCED_BIT) high_score = min_score; --- > if ( m_o & FORCED_BIT ) > high_score = min_score; 194c209,210 < for (CDi = lbound; CDi <= rbound; CDi++) { --- > for ( CDi = lbound; CDi <= rbound; CDi ++ ) > { 198,200d213 < auto D_DPct = Diag[PDct]; < auto I_PDi = D_DPct.I[PDi]; < 202,217c215,233 < if (PDi >= 0 && PDi < PDs) { < Score dummy = scoreEdit( < I_PDi.used[DELETE] == NONE < ? I_PDi.values[DELETE] < : I_PDi.values[DELETE] + CONT_GAP_SCORE[_matrix_type], < I_PDi.used[INSERT] == NONE < ? I_PDi.values[INSERT] < : I_PDi.values[INSERT] + OPEN_GAP_SCORE[_matrix_type], < I_PDi.used[MATCH] == NONE < ? I_PDi.values[MATCH] < : I_PDi.values[MATCH] + OPEN_GAP_SCORE[_matrix_type]); < Diag[Dct].I[Di].values[DELETE] = dummy.value; < Diag[Dct].I[Di].used[DELETE] = dummy.used; < } else { < Diag[Dct].I[Di].values[DELETE] = min_score; < Diag[Dct].I[Di].used[DELETE] = NONE; --- > if ( PDi >= 0 && PDi < PDs ) > scoreEdit > (Diag[Dct] . I[Di] . S[DELETE], > Diag[PDct] . I[PDi] . S[DELETE] . used == NONE ? > Diag[PDct] . I[PDi] . S[DELETE] . value : > Diag[PDct] . I[PDi] . S[DELETE] . value + > CONT_GAP_SCORE [_matrix_type], > Diag[PDct] . I[PDi] . S[INSERT] . used == NONE ? > Diag[PDct] . I[PDi] . S[INSERT] . value : > Diag[PDct] . I[PDi] . S[INSERT] . value + > OPEN_GAP_SCORE [_matrix_type], > Diag[PDct] . I[PDi] . S[MATCH] . used == NONE ? > Diag[PDct] . I[PDi] . S[MATCH] . value : > Diag[PDct] . I[PDi] . S[MATCH] . value + > OPEN_GAP_SCORE [_matrix_type]); > else > { > Diag[Dct] . I[Di] . S[DELETE] . value = min_score; > Diag[Dct] . I[Di] . S[DELETE] . used = NONE; 223,239c239,257 < if (PDi >= 0 && PDi < PDs) { < auto parent = Diag[PDct].I[PDi]; < auto dummy = scoreEdit( < parent.used[DELETE] == NONE < ? parent.values[DELETE] < : parent.values[DELETE] + OPEN_GAP_SCORE[_matrix_type], < parent.used[INSERT] == NONE < ? parent.values[INSERT] < : parent.values[INSERT] + CONT_GAP_SCORE[_matrix_type], < parent.used[MATCH] == NONE < ? parent.values[MATCH] < : parent.values[MATCH] + OPEN_GAP_SCORE[_matrix_type]); < Diag[Dct].I[Di].values[INSERT] = dummy.value; < Diag[Dct].I[Di].used[INSERT] = dummy.used; < } else { < Diag[Dct].I[Di].values[INSERT] = min_score; < Diag[Dct].I[Di].used[INSERT] = NONE; --- > if ( PDi >= 0 && PDi < PDs ) > scoreEdit > (Diag[Dct] . I[Di] . S[INSERT], > Diag[PDct] . I[PDi] . S[DELETE] . used == NONE ? > Diag[PDct] . I[PDi] . S[DELETE] . value : > Diag[PDct] . I[PDi] . S[DELETE] . value + > OPEN_GAP_SCORE [_matrix_type], > Diag[PDct] . I[PDi] . S[INSERT] . used == NONE ? > Diag[PDct] . I[PDi] . S[INSERT] . value : > Diag[PDct] . I[PDi] . S[INSERT] . value + > CONT_GAP_SCORE [_matrix_type], > Diag[PDct] . I[PDi] . S[MATCH] . used == NONE ? > Diag[PDct] . I[PDi] . S[MATCH] . value : > Diag[PDct] . I[PDi] . S[MATCH] . value + > OPEN_GAP_SCORE [_matrix_type]); > else > { > Diag[Dct] . I[Di] . S[INSERT] . value = min_score; > Diag[Dct] . I[Di] . S[INSERT] . used = NONE; 243,252c261,274 < if (PPDi >= 0 && PPDi < PPDs) { < auto dummy = scoreEdit(Diag[PPDct].I[PPDi].values[DELETE], < Diag[PPDct].I[PPDi].values[INSERT], < Diag[PPDct].I[PPDi].values[MATCH]); < Diag[Dct].I[Di].values[MATCH] = < dummy.value + scoreMatch(Diag[Dct], Dct, CDi, A, B, N, m_o); < Diag[Dct].I[Di].used[MATCH] = dummy.used; < } else { < Diag[Dct].I[Di].values[MATCH] = min_score; < Diag[Dct].I[Di].used[MATCH] = NONE; --- > if ( PPDi >= 0 && PPDi < PPDs ) > { > scoreEdit > (Diag[Dct] . I[Di] . S[MATCH], > Diag[PPDct] . I[PPDi] . S[DELETE] . value, > Diag[PPDct] . I[PPDi] . S[INSERT] . value, > Diag[PPDct] . I[PPDi] . S[MATCH] . value); > Diag[Dct] . I[Di] . S[MATCH] . value += > scoreMatch (Diag[Dct], Dct, CDi, A, B, N, m_o); > } > else > { > Diag[Dct] . I[Di] . S[MATCH] . value = min_score; > Diag[Dct] . I[Di] . S[MATCH] . used = NONE; 257c279 < Diag[Dct].I[Di].maxi = maxScore(Diag[Dct].I[Di]); --- > Diag[Dct] . I[Di] . max = maxScore (Diag[Dct] . I[Di] . S); 260,262c282,284 < auto herp = Diag[Dct].I[Di]; < if (herp.values[herp.maxi] >= high_score) { < high_score = herp.values[herp.maxi]; --- > if ( Diag[Dct] . I[Di] . max->value >= high_score ) > { > high_score = Diag[Dct] . I[Di] . max->value; 268a291 > 270,275c293,301 < if (m_o & SEQEND_BIT && Dct >= L) { < if (L == N) { < if (lbound == 0) { < auto maxi = Diag[Dct].I[0].maxi; < if (Diag[Dct].I[0].values[maxi] >= xhigh_score) { < xhigh_score = Diag[Dct].I[0].values[maxi]; --- > if ( m_o & SEQEND_BIT && Dct >= L ) > { > if ( L == N ) > { > if ( lbound == 0 ) > { > if ( Diag[Dct] . I[0] . max->value >= xhigh_score ) > { > xhigh_score = Diag[Dct] . I[0] . max->value; 280c306,307 < } else // L == M --- > } > else // L == M 282,286c309,315 < if (rbound == M) { < auto derp = Diag[Dct].I[M - Diag[Dct].lbound]; < auto maxi = derp.maxi; < if (derp.values[maxi] >= xhigh_score) { < xhigh_score = derp.values[maxi]; --- > if ( rbound == M ) > { > if ( Diag[Dct] . I[M-Diag[Dct].lbound] . > max->value >= xhigh_score ) > { > xhigh_score = Diag[Dct] . I[M-Diag[Dct].lbound] . > max->value; 294,296d322 < //-- If in extender modus operandi, free soon to be greatgrandparent < // diag < if (m_o & SEARCH_BIT && Dct > 1) free(Diag[PPDct].I); 298c324,328 < auto current_diagonal = Diag[Dct]; --- > //-- If in extender modus operandi, free soon to be greatgrandparent diag > if ( m_o & SEARCH_BIT && Dct > 1 ) > free ( Diag[PPDct] . I ); > > 300,303c330,332 < for (Di = 0; Di < Ds; Di++) { < auto current_node = current_diagonal.I[Di]; < auto maxi = current_node.maxi; < if (high_score - current_node.values[maxi] > max_diff) --- > for ( Di = 0; Di < Ds; Di ++ ) > { > if ( high_score - Diag[Dct] . I[Di] . max->value > max_diff ) 308,311c337,339 < for (Di = Ds - 1; Di >= 0; Di--) { < auto current_node = current_diagonal.I[Di]; < auto maxi = current_node.maxi; < if (high_score - current_node.values[maxi] > max_diff) --- > for ( Di = Ds - 1; Di >= 0; Di -- ) > { > if ( high_score - Diag[Dct] . I[Di] . max->value > max_diff ) 318,332c346,353 < if (Dct < N && Dct < M) { < Dl++; < rbound++; < Dmid = (Dct + 1) / 2.0; < } else if (Dct >= N && Dct >= M) { < Dl--; < lbound--; < Dmid = N - (Dct + 1) / 2.0; < } else if (Dct >= N) { < lbound--; < Dmid = N - (Dct + 1) / 2.0; < } else { < rbound++; < Dmid = (Dct + 1) / 2.0; < } --- > if ( Dct < N && Dct < M ) > { Dl ++; rbound ++; Dmid = (Dct+1)/2.0; } > else if ( Dct >= N && Dct >= M ) > { Dl --; lbound --; Dmid = N - (Dct+1)/2.0; } > else if ( Dct >= N ) > { lbound --; Dmid = N - (Dct+1)/2.0; } > else > { rbound ++; Dmid = (Dct+1)/2.0; } 335c356,357 < if (Dband > 0) { --- > if ( Dband > 0 ) > { 337c359,360 < if (lbound < tlb) lbound = tlb; --- > if ( lbound < tlb ) > lbound = tlb; 339c362,363 < if (rbound > trb) rbound = trb; --- > if ( rbound > trb ) > rbound = trb; 342,343c366,369 < if (lbound < 0) lbound = 0; < if (rbound >= Dl) rbound = Dl - 1; --- > if ( lbound < 0 ) > lbound = 0; > if ( rbound >= Dl ) > rbound = Dl - 1; 351,352c377,380 < if (Dct == N + M) { < if (~m_o & OPTIMAL_BIT || m_o & SEQEND_BIT) { --- > if ( Dct == N + M ) > { > if ( ~m_o & OPTIMAL_BIT || m_o & SEQEND_BIT ) > { 356c384,385 < } else if (FinishCt == Dct) --- > } > else if ( FinishCt == Dct ) 358c387,389 < } else if (m_o & SEQEND_BIT && xFinishCt != 0) { --- > } > else if ( m_o & SEQEND_BIT && xFinishCt != 0 ) > { 365,369c396,399 < long int Aadj = < FinishCt <= N ? FinishCt - FinishCDi - 1 : N - FinishCDi - 1; < long int Badj = < FinishCt <= N ? FinishCDi - 1 : FinishCt - N + FinishCDi - 1; < if (~m_o & DIRECTION_BIT) { --- > long int Aadj = FinishCt <= N ? FinishCt - FinishCDi - 1 : N - FinishCDi - 1; > long int Badj = FinishCt <= N ? FinishCDi - 1 : FinishCt - N + FinishCDi - 1; > if ( ~m_o & DIRECTION_BIT ) > { 386,387c416 < fprintf(stderr, "%ld nodes calculated, %ld nodes trimmed\n", CalcCt, < TrimCt); --- > fprintf(stderr, "%ld nodes calculated, %ld nodes trimmed\n", CalcCt, TrimCt); 390,391c419 < (long int)sizeof(Diagonal) * Dct + < (long int)sizeof(Node) * CalcCt); --- > (long int)sizeof(Diagonal) * Dct + (long int)sizeof(Node) * CalcCt); 394,395c422 < ((long int)sizeof(Diagonal) + (long int)sizeof(Node) * MaxL) * < 2); --- > ((long int)sizeof(Diagonal) + (long int)sizeof(Node) * MaxL) * 2); 399c426,427 < if (~m_o & SEARCH_BIT) generateDelta(Diag, FinishCt, FinishCDi, N, Delta); --- > if ( ~m_o & SEARCH_BIT ) > generateDelta (Diag, FinishCt, FinishCDi, N, Delta); 409,411c437,442 < static void generateDelta(const Diagonal *Diag, long int FinishCt, < long int FinishCDi, long int N, < vector<long int> &Delta) --- > > > > static void generateDelta > (const Diagonal * Diag, long int FinishCt, long int FinishCDi, > long int N, vector<long int> & Delta) 446c477 < edit = Diag[Dct].I[Di].maxi; --- > edit = Diag[Dct] . I[Di] . max - Diag[Dct] . I[Di] . S; 449c480,481 < while (Dct >= 0) { --- > while ( Dct >= 0 ) > { 451c483,484 < if (Pi >= PSize) { --- > if ( Pi >= PSize ) > { 453,454c486,487 < Reverse_Path = < (char *)Safe_realloc(Reverse_Path, sizeof(char) * PSize); --- > Reverse_Path = (char *) Safe_realloc > ( Reverse_Path, sizeof(char) * PSize ); 458,459c491 < curr_score = Score{Diag[Dct].I[Di].values[edit], < Diag[Dct].I[Di].used[edit]}; // FIXME --- > curr_score = Diag[Dct] . I[Di] . S[edit]; 462,464c494,501 < switch (edit) { < case DELETE: CDi = Dct-- <= N ? CDi - 1 : CDi; break; < case INSERT: CDi = Dct-- <= N ? CDi : CDi + 1; break; --- > switch ( edit ) > { > case DELETE : > CDi = Dct -- <= N ? CDi - 1 : CDi; > break; > case INSERT : > CDi = Dct -- <= N ? CDi : CDi + 1; > break; 469c506,508 < case START: Dct = -1; break; --- > case START : > Dct = -1; > break; 481,482c520,523 < for (Pi -= 2; Pi >= 0; Pi--) { < switch (Reverse_Path[Pi]) { --- > for (Pi -= 2; Pi >= 0; Pi --) > { > switch ( Reverse_Path[Pi] ) > { 491,492c532,536 < case MATCH: Count++; break; < case START: break; --- > case MATCH : > Count ++; > break; > case START : > break; 505c549,553 < static inline unsigned char maxScore(const Node &n) --- > > > > static inline Score * maxScore > (Score S[3]) 510,512c558,561 < if (n.values[DELETE] > n.values[INSERT]) { < if (n.values[DELETE] > n.values[MATCH]) < return DELETE; --- > if ( S[DELETE] . value > S[INSERT] . value ) > { > if ( S[DELETE] . value > S[MATCH] . value ) > return S + DELETE; 514,516c563,566 < return MATCH; < } else if (n.values[INSERT] > n.values[MATCH]) < return INSERT; --- > return S + MATCH; > } > else if ( S[INSERT] . value > S[MATCH] . value ) > return S + INSERT; 518c568 < return MATCH; --- > return S + MATCH; 521,522c571,575 < static inline Score scoreEdit(const long int del, const long int ins, < const long int mat) --- > > > > static inline void scoreEdit > (Score & curr, const long int del, const long int ins, const long int mat) 527,529c580,583 < Score curr = {0, 0}; < if (del > ins) { < if (del > mat) { --- > if ( del > ins ) > { > if ( del > mat ) > { 532c586,588 < } else { --- > } > else > { 536c592,594 < } else if (ins > mat) { --- > } > else if ( ins > mat ) > { 539c597,599 < } else { --- > } > else > { 543c603,604 < return curr; --- > > return; 546,548c607,612 < static inline long int scoreMatch(const Diagonal Diag, long int Dct, < long int CDi, const char *A, const char *B, < long int N, unsigned int m_o) --- > > > > static inline long int scoreMatch > (const Diagonal Diag, long int Dct, long int CDi, > const char * A, const char * B, long int N, unsigned int m_o) 566c630,631 < if (Dct <= N) { --- > if ( Dct <= N ) > { 569c634,636 < } else { --- > } > else > { 574,575c641,644 < if (!isalpha(Ac)) Ac = STOP_CHAR; < if (!isalpha(Bc)) Bc = STOP_CHAR; --- > if ( ! isalpha(Ac) ) > Ac = STOP_CHAR; > if ( ! isalpha(Bc) ) > Bc = STOP_CHAR;