Hi Sascha,

On 05.08.2016 16:27, Sascha Steinbiss wrote:
>> So I finally got round to submit my patches for MUMmer. One can be found
>> in the repo, half of the other is attached to this mail. Unfortunately,
>> the other half got lost in a code reindentation. Applying the changes
>> requires refactoring src/tigr/sw_align.cc for the changes in the header.
> 
> Not really sure I understand what you mean... the patches in the repo's
> series all apply without a problem and the one attached to your mail
> doesn't seem to do a lot. Are you sure it is complete? Or where is the
> other half one would be required to refactor?

Right, the patch for sw_align.hh changes some structures and now the
algorithms in sw_align.cc have to be adapted, too. The attached patch
may give you an idea of the necessary changes. I may tackle the issue
next week.

>> If someone has a lot of spare time on the weekend, feel free to go ahead.
> 
> Also, are the patches Debian-specific? Given that they don't seem to be
> fixes but rather improve performance, I could imagine Stefan Kurtz
> (MUMmer upstream) would probably appreciate receiving these patches as well.

No, these are not Debian specific. However, the one commited patch
applies to a tool which comes from mugsy, not mummer. If we get the
other one cleaned up, we could drop Stefan a note.

Best,
Fabian
1,2d0
< #include <math.h>
< #include <string.h>
3a2,3
> #include <math.h>
> 
17a18
> 
21a23,26
> 
> 
> 
> 
23,31c28,46
< static void generateDelta(const Diagonal *Diag, long int FinishCt,
< 						  long int FinishCDi, long int N,
< 						  vector<long int> &Delta);
< 
< static inline unsigned char maxScore(const Node &);
< 
< static inline long int scoreMatch(const Diagonal Diag, long int Dct,
< 								  long int CDi, const char *A, const char *B,
< 								  long int N, unsigned int m_o);
---
> static void generateDelta
>      (const Diagonal * Diag, long int FinishCt, long int FinishCDi,
>       long int N, vector<long int> & Delta);
> 
> 
> static inline Score * maxScore
>      (Score S[3]);
> 
> 
> static inline long int scoreMatch
>      (const Diagonal Diag, long int Dct, long int CDi, 
>       const char * A, const char * B, long int N, unsigned int m_o);
> 
> 
> static inline void scoreEdit
>      (Score & curr, const long int del, const long int ins, const long int mat);
> 
> 
> 
33,34d47
< static inline Score scoreEdit(const long int del, const long int ins,
< 							  const long int mat);
37c50,51
< bool _alignEngine(const char *A0, long int Astart, long int &Aend,
---
> bool _alignEngine
>      (const char * A0, long int Astart, long int & Aend,
53,65d66
< 	/*--
< 		The code in this function looks bad, but its performance is 'ok', as
< 		far as I can tell. I tried to replace it with the Gotoh() implementation
< 		from seqan, but that made the program *much* slower. It is probably due
< 		to the break off defined by `_break_len` this cuts things short, if
< 		there is no reasonable expectation to reach a decent alignment.
< 
< 	-- Fabian Klötzl, 2016-03-08
< 	*/
< 
< 	// most common modus operandi: Forward Align
< 
< 
108c109,110
< 	if (m_o & DIRECTION_BIT) {
---
>   if ( m_o & DIRECTION_BIT )
>     {
113c115,117
< 	} else {
---
>     }
>   else
>     {
128,135c132,139
< 	Diag[0].I[0].values[DELETE] = min_score;
< 	Diag[0].I[0].values[INSERT] = min_score;
< 	Diag[0].I[0].values[MATCH] = 0;
< 	Diag[0].I[0].maxi = MATCH; // = Diag[0].I[0].S + MATCH;
< 
< 	Diag[0].I[0].used[DELETE] = NONE;
< 	Diag[0].I[0].used[INSERT] = NONE;
< 	Diag[0].I[0].used[MATCH] = START;
---
>   Diag[0] . I[0] . S[DELETE] . value = min_score;
>   Diag[0] . I[0] . S[INSERT] . value = min_score;
>   Diag[0] . I[0] . S[MATCH] . value = 0;
>   Diag[0] . I[0] . max = Diag[0] . I[0] . S + MATCH;
> 
>   Diag[0] . I[0] . S[DELETE] . used = NONE;
>   Diag[0] . I[0] . S[INSERT] . used = NONE;
>   Diag[0] . I[0] . S[MATCH] . used = START;
141,143c145,148
< 	for (Dct = 1;
< 		 Dct <= N + M && (Dct - FinishCt) <= _break_len && lbound <= rbound;
< 		 Dct++) {
---
>   for ( Dct = 1; Dct <= N + M  &&
>           (Dct - FinishCt) <= _break_len  &&
>           lbound <= rbound; Dct++ )
>     {
145c150,151
< 		if (Dct >= Ll) {
---
>       if ( Dct >= Ll )
>         {
147c153,154
< 			Diag = (Diagonal *)Safe_realloc(Diag, sizeof(Diagonal) * Ll);
---
>           Diag = (Diagonal *) Safe_realloc
>             ( Diag, sizeof(Diagonal) * Ll );
155c162,163
< 		Diag[Dct].I = (Node *)Safe_malloc(Ds * sizeof(Node));
---
>       Diag[Dct] . I = (Node *) Safe_malloc
> 	( Ds * sizeof(Node) );
161c169,170
< 		if (Ds > MaxL) MaxL = Ds;
---
>       if ( Ds > MaxL )
> 	MaxL = Ds;
165c174,175
< 		if (Dct <= N) {
---
>       if ( Dct <= N )
> 	{
168c178,180
< 		} else {
---
> 	}
>       else
> 	{
182c194,195
< 		if (PPDct >= 0) {
---
>       if ( PPDct >= 0 )
> 	{
186c199,200
< 		} else
---
> 	}
>       else
190c204,205
< 		if (m_o & FORCED_BIT) high_score = min_score;
---
>       if ( m_o & FORCED_BIT )
> 	high_score = min_score;
194c209,210
< 		for (CDi = lbound; CDi <= rbound; CDi++) {
---
>       for ( CDi = lbound; CDi <= rbound; CDi ++ )
> 	{
198,200d213
< 			auto D_DPct = Diag[PDct];
< 			auto I_PDi = D_DPct.I[PDi];
< 
202,217c215,233
< 			if (PDi >= 0 && PDi < PDs) {
< 				Score dummy = scoreEdit(
< 					I_PDi.used[DELETE] == NONE
< 						? I_PDi.values[DELETE]
< 						: I_PDi.values[DELETE] + CONT_GAP_SCORE[_matrix_type],
< 					I_PDi.used[INSERT] == NONE
< 						? I_PDi.values[INSERT]
< 						: I_PDi.values[INSERT] + OPEN_GAP_SCORE[_matrix_type],
< 					I_PDi.used[MATCH] == NONE
< 						? I_PDi.values[MATCH]
< 						: I_PDi.values[MATCH] + OPEN_GAP_SCORE[_matrix_type]);
< 				Diag[Dct].I[Di].values[DELETE] = dummy.value;
< 				Diag[Dct].I[Di].used[DELETE] = dummy.used;
< 			} else {
< 				Diag[Dct].I[Di].values[DELETE] = min_score;
< 				Diag[Dct].I[Di].used[DELETE] = NONE;
---
> 	  if ( PDi >= 0  &&  PDi < PDs )
> 	    scoreEdit
> 	      (Diag[Dct] . I[Di] . S[DELETE],
> 	       Diag[PDct] . I[PDi] . S[DELETE] . used == NONE ?
> 	       Diag[PDct] . I[PDi] . S[DELETE] . value :
> 	       Diag[PDct] . I[PDi] . S[DELETE] . value +
> 	       CONT_GAP_SCORE [_matrix_type],
> 	       Diag[PDct] . I[PDi] . S[INSERT] . used == NONE ?
> 	       Diag[PDct] . I[PDi] . S[INSERT] . value :
> 	       Diag[PDct] . I[PDi] . S[INSERT] . value +
> 	       OPEN_GAP_SCORE [_matrix_type],
> 	       Diag[PDct] . I[PDi] . S[MATCH]  . used == NONE ?
> 	       Diag[PDct] . I[PDi] . S[MATCH]  . value :
> 	       Diag[PDct] . I[PDi] . S[MATCH]  . value +
> 	       OPEN_GAP_SCORE [_matrix_type]);
> 	  else
> 	    {
> 	      Diag[Dct] . I[Di] . S[DELETE] . value = min_score;
> 	      Diag[Dct] . I[Di] . S[DELETE] . used = NONE;
223,239c239,257
< 			if (PDi >= 0 && PDi < PDs) {
< 				auto parent = Diag[PDct].I[PDi];
< 				auto dummy = scoreEdit(
< 					parent.used[DELETE] == NONE
< 						? parent.values[DELETE]
< 						: parent.values[DELETE] + OPEN_GAP_SCORE[_matrix_type],
< 					parent.used[INSERT] == NONE
< 						? parent.values[INSERT]
< 						: parent.values[INSERT] + CONT_GAP_SCORE[_matrix_type],
< 					parent.used[MATCH] == NONE
< 						? parent.values[MATCH]
< 						: parent.values[MATCH] + OPEN_GAP_SCORE[_matrix_type]);
< 				Diag[Dct].I[Di].values[INSERT] = dummy.value;
< 				Diag[Dct].I[Di].used[INSERT] = dummy.used;
< 			} else {
< 				Diag[Dct].I[Di].values[INSERT] = min_score;
< 				Diag[Dct].I[Di].used[INSERT] = NONE;
---
> 	  if ( PDi >= 0  &&  PDi < PDs )
> 	    scoreEdit
> 	      (Diag[Dct] . I[Di] . S[INSERT],
> 	       Diag[PDct] . I[PDi] . S[DELETE] . used == NONE ?
> 	       Diag[PDct] . I[PDi] . S[DELETE] . value :
> 	       Diag[PDct] . I[PDi] . S[DELETE] . value +
> 	       OPEN_GAP_SCORE [_matrix_type],
> 	       Diag[PDct] . I[PDi] . S[INSERT] . used == NONE ?
> 	       Diag[PDct] . I[PDi] . S[INSERT] . value :
> 	       Diag[PDct] . I[PDi] . S[INSERT] . value +
> 	       CONT_GAP_SCORE [_matrix_type],
> 	       Diag[PDct] . I[PDi] . S[MATCH]  . used == NONE ?
> 	       Diag[PDct] . I[PDi] . S[MATCH]  . value :
> 	       Diag[PDct] . I[PDi] . S[MATCH]  . value +
> 	       OPEN_GAP_SCORE [_matrix_type]);
> 	  else
> 	    {
> 	      Diag[Dct] . I[Di] . S[INSERT] . value = min_score;
> 	      Diag[Dct] . I[Di] . S[INSERT] . used = NONE;
243,252c261,274
< 			if (PPDi >= 0 && PPDi < PPDs) {
< 				auto dummy = scoreEdit(Diag[PPDct].I[PPDi].values[DELETE],
< 									   Diag[PPDct].I[PPDi].values[INSERT],
< 									   Diag[PPDct].I[PPDi].values[MATCH]);
< 				Diag[Dct].I[Di].values[MATCH] =
< 					dummy.value + scoreMatch(Diag[Dct], Dct, CDi, A, B, N, m_o);
< 				Diag[Dct].I[Di].used[MATCH] = dummy.used;
< 			} else {
< 				Diag[Dct].I[Di].values[MATCH] = min_score;
< 				Diag[Dct].I[Di].used[MATCH] = NONE;
---
> 	  if ( PPDi >= 0  &&  PPDi < PPDs )
> 	    {
> 	      scoreEdit
> 		(Diag[Dct] . I[Di] . S[MATCH],
> 		 Diag[PPDct] . I[PPDi] . S[DELETE] . value,
> 		 Diag[PPDct] . I[PPDi] . S[INSERT] . value,
> 		 Diag[PPDct] . I[PPDi] . S[MATCH]  . value);
> 	      Diag[Dct] . I[Di] . S[MATCH] . value +=
> 		scoreMatch (Diag[Dct], Dct, CDi, A, B, N, m_o);
> 	    }
> 	  else
> 	    {
> 	      Diag[Dct] . I[Di] . S[MATCH] . value = min_score;
> 	      Diag[Dct] . I[Di] . S[MATCH] . used = NONE;
257c279
< 			Diag[Dct].I[Di].maxi = maxScore(Diag[Dct].I[Di]);
---
> 	  Diag[Dct] . I[Di] . max = maxScore (Diag[Dct] . I[Di] . S);
260,262c282,284
< 			auto herp = Diag[Dct].I[Di];
< 			if (herp.values[herp.maxi] >= high_score) {
< 				high_score = herp.values[herp.maxi];
---
> 	  if ( Diag[Dct] . I[Di] . max->value >= high_score )
> 	    {
> 	      high_score = Diag[Dct] . I[Di] . max->value;
268a291
> 
270,275c293,301
< 		if (m_o & SEQEND_BIT && Dct >= L) {
< 			if (L == N) {
< 				if (lbound == 0) {
< 					auto maxi = Diag[Dct].I[0].maxi;
< 					if (Diag[Dct].I[0].values[maxi] >= xhigh_score) {
< 						xhigh_score = Diag[Dct].I[0].values[maxi];
---
>       if ( m_o & SEQEND_BIT  &&  Dct >= L )
> 	{
> 	  if ( L == N )
> 	    {
> 	      if ( lbound == 0 )
> 		{
> 		  if ( Diag[Dct] . I[0] . max->value >= xhigh_score )
> 		    {
> 		      xhigh_score = Diag[Dct] . I[0] . max->value;
280c306,307
< 			} else // L == M
---
> 	    }
> 	  else  // L == M
282,286c309,315
< 				if (rbound == M) {
< 					auto derp = Diag[Dct].I[M - Diag[Dct].lbound];
< 					auto maxi = derp.maxi;
< 					if (derp.values[maxi] >= xhigh_score) {
< 						xhigh_score = derp.values[maxi];
---
> 	      if ( rbound == M )
> 		{
> 		  if ( Diag[Dct] . I[M-Diag[Dct].lbound] .
> 		       max->value >= xhigh_score )
> 		    {
> 		      xhigh_score = Diag[Dct] . I[M-Diag[Dct].lbound] .
> 			max->value;
294,296d322
< 		//-- If in extender modus operandi, free soon to be greatgrandparent
< 		// diag
< 		if (m_o & SEARCH_BIT && Dct > 1) free(Diag[PPDct].I);
298c324,328
< 		auto current_diagonal = Diag[Dct];
---
>       //-- If in extender modus operandi, free soon to be greatgrandparent diag
>       if ( m_o & SEARCH_BIT  &&  Dct > 1 )
> 	free ( Diag[PPDct] . I );
> 
> 
300,303c330,332
< 		for (Di = 0; Di < Ds; Di++) {
< 			auto current_node = current_diagonal.I[Di];
< 			auto maxi = current_node.maxi;
< 			if (high_score - current_node.values[maxi] > max_diff)
---
>       for ( Di = 0; Di < Ds; Di ++ )
> 	{
> 	  if ( high_score - Diag[Dct] . I[Di] . max->value > max_diff )
308,311c337,339
< 		for (Di = Ds - 1; Di >= 0; Di--) {
< 			auto current_node = current_diagonal.I[Di];
< 			auto maxi = current_node.maxi;
< 			if (high_score - current_node.values[maxi] > max_diff)
---
>       for ( Di = Ds - 1; Di >= 0; Di -- )
> 	{
> 	  if ( high_score - Diag[Dct] . I[Di] . max->value > max_diff )
318,332c346,353
< 		if (Dct < N && Dct < M) {
< 			Dl++;
< 			rbound++;
< 			Dmid = (Dct + 1) / 2.0;
< 		} else if (Dct >= N && Dct >= M) {
< 			Dl--;
< 			lbound--;
< 			Dmid = N - (Dct + 1) / 2.0;
< 		} else if (Dct >= N) {
< 			lbound--;
< 			Dmid = N - (Dct + 1) / 2.0;
< 		} else {
< 			rbound++;
< 			Dmid = (Dct + 1) / 2.0;
< 		}
---
>       if ( Dct < N && Dct < M )
> 	{ Dl ++; rbound ++; Dmid = (Dct+1)/2.0; }
>       else if ( Dct >= N && Dct >= M )
> 	{ Dl --; lbound --; Dmid = N - (Dct+1)/2.0; }
>       else if ( Dct >= N )
>         { lbound --; Dmid = N - (Dct+1)/2.0; }
>       else
>         { rbound ++; Dmid = (Dct+1)/2.0; }
335c356,357
< 		if (Dband > 0) {
---
>       if ( Dband > 0 )
>         {
337c359,360
< 			if (lbound < tlb) lbound = tlb;
---
>           if ( lbound < tlb )
>             lbound = tlb;
339c362,363
< 			if (rbound > trb) rbound = trb;
---
>           if ( rbound > trb )
>             rbound = trb;
342,343c366,369
< 		if (lbound < 0) lbound = 0;
< 		if (rbound >= Dl) rbound = Dl - 1;
---
>       if ( lbound < 0 )
> 	lbound = 0;
>       if ( rbound >= Dl )
> 	rbound = Dl - 1;
351,352c377,380
< 	if (Dct == N + M) {
< 		if (~m_o & OPTIMAL_BIT || m_o & SEQEND_BIT) {
---
>   if ( Dct == N + M )
>     {
>       if ( ~m_o & OPTIMAL_BIT || m_o & SEQEND_BIT )
> 	{
356c384,385
< 		} else if (FinishCt == Dct)
---
> 	}
>       else if ( FinishCt == Dct )
358c387,389
< 	} else if (m_o & SEQEND_BIT && xFinishCt != 0) {
---
>     }
>   else if ( m_o & SEQEND_BIT  &&  xFinishCt != 0 )
>     {
365,369c396,399
< 	long int Aadj =
< 		FinishCt <= N ? FinishCt - FinishCDi - 1 : N - FinishCDi - 1;
< 	long int Badj =
< 		FinishCt <= N ? FinishCDi - 1 : FinishCt - N + FinishCDi - 1;
< 	if (~m_o & DIRECTION_BIT) {
---
>   long int Aadj = FinishCt <= N ? FinishCt - FinishCDi - 1 : N - FinishCDi - 1;
>   long int Badj = FinishCt <= N ? FinishCDi - 1 : FinishCt - N + FinishCDi - 1;
>   if ( ~m_o & DIRECTION_BIT )
>     {
386,387c416
< 	fprintf(stderr, "%ld nodes calculated, %ld nodes trimmed\n", CalcCt,
< 			TrimCt);
---
>   fprintf(stderr, "%ld nodes calculated, %ld nodes trimmed\n", CalcCt, TrimCt);
390,391c419
< 				(long int)sizeof(Diagonal) * Dct +
< 					(long int)sizeof(Node) * CalcCt);
---
> 	    (long int)sizeof(Diagonal) * Dct + (long int)sizeof(Node) * CalcCt);
394,395c422
< 				((long int)sizeof(Diagonal) + (long int)sizeof(Node) * MaxL) *
< 					2);
---
> 	    ((long int)sizeof(Diagonal) + (long int)sizeof(Node) * MaxL) * 2);
399c426,427
< 	if (~m_o & SEARCH_BIT) generateDelta(Diag, FinishCt, FinishCDi, N, Delta);
---
>   if ( ~m_o & SEARCH_BIT )
>     generateDelta (Diag, FinishCt, FinishCDi, N, Delta);
409,411c437,442
< static void generateDelta(const Diagonal *Diag, long int FinishCt,
< 						  long int FinishCDi, long int N,
< 						  vector<long int> &Delta)
---
> 
> 
> 
> static void generateDelta
>      (const Diagonal * Diag, long int FinishCt, long int FinishCDi,
>       long int N, vector<long int> & Delta)
446c477
< 	edit = Diag[Dct].I[Di].maxi;
---
>   edit = Diag[Dct] . I[Di] . max - Diag[Dct] . I[Di] . S;
449c480,481
< 	while (Dct >= 0) {
---
>   while ( Dct >= 0 )
>     {
451c483,484
< 		if (Pi >= PSize) {
---
>       if ( Pi >= PSize )
> 	{
453,454c486,487
< 			Reverse_Path =
< 				(char *)Safe_realloc(Reverse_Path, sizeof(char) * PSize);
---
> 	  Reverse_Path = (char *) Safe_realloc 
> 	    ( Reverse_Path, sizeof(char) * PSize );
458,459c491
< 		curr_score = Score{Diag[Dct].I[Di].values[edit],
< 						   Diag[Dct].I[Di].used[edit]}; // FIXME
---
>       curr_score = Diag[Dct] . I[Di] . S[edit];
462,464c494,501
< 		switch (edit) {
< 			case DELETE: CDi = Dct-- <= N ? CDi - 1 : CDi; break;
< 			case INSERT: CDi = Dct-- <= N ? CDi : CDi + 1; break;
---
>       switch ( edit )
> 	{
> 	case DELETE :
> 	  CDi = Dct -- <= N ? CDi - 1 : CDi;
> 	  break;
> 	case INSERT :
> 	  CDi = Dct -- <= N ? CDi : CDi + 1;
> 	  break;
469c506,508
< 			case START: Dct = -1; break;
---
> 	case START :
> 	  Dct = -1;
> 	  break;
481,482c520,523
< 	for (Pi -= 2; Pi >= 0; Pi--) {
< 		switch (Reverse_Path[Pi]) {
---
>   for (Pi -= 2; Pi >= 0; Pi --)
>     {
>       switch ( Reverse_Path[Pi] )
> 	{
491,492c532,536
< 			case MATCH: Count++; break;
< 			case START: break;
---
> 	case MATCH :
> 	  Count ++;
> 	  break;
> 	case START :
> 	  break;
505c549,553
< static inline unsigned char maxScore(const Node &n)
---
> 
> 
> 
> static inline Score * maxScore
>      (Score S[3])
510,512c558,561
< 	if (n.values[DELETE] > n.values[INSERT]) {
< 		if (n.values[DELETE] > n.values[MATCH])
< 			return DELETE;
---
>   if ( S[DELETE] . value > S[INSERT] . value )
>     {
>       if ( S[DELETE] . value > S[MATCH] . value )
> 	return S + DELETE;
514,516c563,566
< 			return MATCH;
< 	} else if (n.values[INSERT] > n.values[MATCH])
< 		return INSERT;
---
> 	return S + MATCH;
>     }
>   else if ( S[INSERT] . value > S[MATCH] . value )
>     return S + INSERT;
518c568
< 		return MATCH;
---
>     return S + MATCH;
521,522c571,575
< static inline Score scoreEdit(const long int del, const long int ins,
< 							  const long int mat)
---
> 
> 
> 
> static inline void scoreEdit
>      (Score & curr, const long int del, const long int ins, const long int mat)
527,529c580,583
< 	Score curr = {0, 0};
< 	if (del > ins) {
< 		if (del > mat) {
---
>   if ( del > ins )
>     {
>       if ( del > mat )
> 	{
532c586,588
< 		} else {
---
> 	}
>       else
> 	{
536c592,594
< 	} else if (ins > mat) {
---
>     }
>   else if ( ins > mat )
>     {
539c597,599
< 	} else {
---
>     }
>   else
>     {
543c603,604
< 	return curr;
---
> 
>   return;
546,548c607,612
< static inline long int scoreMatch(const Diagonal Diag, long int Dct,
< 								  long int CDi, const char *A, const char *B,
< 								  long int N, unsigned int m_o)
---
> 
> 
> 
> static inline long int scoreMatch
>      (const Diagonal Diag, long int Dct, long int CDi, 
>       const char * A, const char * B, long int N, unsigned int m_o)
566c630,631
< 	if (Dct <= N) {
---
>   if ( Dct <= N )
>     {
569c634,636
< 	} else {
---
>     }
>   else
>     {
574,575c641,644
< 	if (!isalpha(Ac)) Ac = STOP_CHAR;
< 	if (!isalpha(Bc)) Bc = STOP_CHAR;
---
>   if ( ! isalpha(Ac) )
>     Ac = STOP_CHAR;
>   if ( ! isalpha(Bc) )
>     Bc = STOP_CHAR;

Reply via email to