This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git

commit 2889c0d37d5cfa50c3ee130ac5d40f9d50cd485b
Author: Matthias Boehm <[email protected]>
AuthorDate: Thu Oct 5 16:00:30 2023 +0200

    [MINOR] Fix text/matrix-market readers counting nnz for symmetric data
    
    There was a double counting for non-zeros on the diagonal, leading to
    unnecessary allocation and partially incorrect meta data.
---
 .../sysds/runtime/io/ReaderTextCellParallel.java   | 34 +++++++++++++++++-----
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/io/ReaderTextCellParallel.java 
b/src/main/java/org/apache/sysds/runtime/io/ReaderTextCellParallel.java
index e34d96a2a9..23b6b738ca 100644
--- a/src/main/java/org/apache/sysds/runtime/io/ReaderTextCellParallel.java
+++ b/src/main/java/org/apache/sysds/runtime/io/ReaderTextCellParallel.java
@@ -259,24 +259,42 @@ public class ReaderTextCellParallel extends ReaderTextCell
 
                        RecordReader<LongWritable,Text> reader = 
_informat.getRecordReader(_split, _job, Reporter.NULL);
                        try {
-                               //counting without locking as conflicts unlikely
+                               //skip matrix-matrix meta data
+                               boolean foundComment = false;
                                while( reader.next(key, value) ) {
                                        if( value.toString().charAt(0) == '%' )
-                                               continue;
-                                       st.reset( value.toString() );
-                                       int nv = (int)st.nextLong()-1;
-                                       if(nv >= 0){
-                                               _rNnz[nv] ++;
-                                               if( _isSymmetric )
-                                                       
_rNnz[(int)st.nextLong()-1] ++;
+                                               foundComment = true;
+                                       else if( foundComment )
+                                               break; //skip meta data
+                                       else {
+                                               countCell(st, value.toString());
+                                               break;
                                        }
                                }
+                               
+                               //counting without locking as conflicts unlikely
+                               while( reader.next(key, value) ) {
+                                       countCell(st, value.toString());
+                               }
                        }
                        finally {
                                IOUtilFunctions.closeSilently(reader);
                        }
                        return null;
                }
+               
+               private void countCell(FastStringTokenizer st, String value) {
+                       st.reset( value );
+                       int rix = (int)st.nextLong()-1;
+                       if(rix >= 0){
+                               _rNnz[rix] ++;
+                               if( _isSymmetric ) {
+                                       int cix = (int)st.nextLong()-1;
+                                       if(rix != cix)
+                                               _rNnz[cix] ++;
+                               }
+                       }
+               }
        }
        
        /**

Reply via email to