xuyang1706 commented on a change in pull request #8631: [FLINK-12745][ml] add 
sparse and dense vector class, and dense matrix class with basic operations.
URL: https://github.com/apache/flink/pull/8631#discussion_r310030132
 
 

 ##########
 File path: 
flink-ml-parent/flink-ml-lib/src/main/java/org/apache/flink/ml/common/matrix/SparseVector.java
 ##########
 @@ -0,0 +1,791 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.flink.ml.common.matrix;
+
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+/**
+ * A sparse vector represented by an indices array and a values array.
+ */
+public class SparseVector extends Vector {
+
+       /**
+        * Size of the vector. n = -1 indicates that the vector size is 
undetermined.
+        */
+       int n;
+
+       /**
+        * Column indices.
+        */
+       int[] indices;
+
+       /**
+        * Column values.
+        */
+       double[] values;
+
+       /**
+        * Construct an empty sparse vector with undetermined size.
+        */
+       public SparseVector() {
+               this(-1);
+       }
+
+       /**
+        * Construct an empty sparse vector with determined size.
+        */
+       public SparseVector(int n) {
+               this.n = n;
+               this.indices = new int[0];
+               this.values = new double[0];
+       }
+
+       /**
+        * Construct a sparse vector with the given indices and values.
+        *
+        * @throws IllegalArgumentException If size of indices array and values 
array differ.
+        * @throws IllegalArgumentException If n >= 0 and the indices are out 
of bound.
+        */
+       public SparseVector(int n, int[] indices, double[] values) {
+               this.n = n;
+               this.indices = indices.clone();
+               this.values = values.clone();
+               checkIndices();
+               sortIndices();
+       }
+
+       /**
+        * Construct a sparse vector with given indices to values map.
+        *
+        * @throws IllegalArgumentException If n >= 0 and the indices are out 
of bound.
+        */
+       public SparseVector(int n, Map<Integer, Double> kv) {
+               this.n = n;
+               int nnz = kv.size();
+               int[] indices = new int[nnz];
+               double[] values = new double[nnz];
+
+               int pos = 0;
+               for (Map.Entry<Integer, Double> entry : kv.entrySet()) {
+                       indices[pos] = entry.getKey();
+                       values[pos] = entry.getValue();
+                       pos++;
+               }
+
+               this.indices = indices;
+               this.values = values;
+               checkIndices();
+
+               if (!(kv instanceof TreeMap)) {
+                       sortIndices();
+               }
+       }
+
+       private void checkIndices() {
+               if (indices.length != values.length) {
+                       throw new IllegalArgumentException("Indices size and 
values size should be the same.");
+               }
+               for (int i = 0; i < indices.length; i++) {
+                       if (indices[i] < 0 || (n >= 0 && indices[i] >= n)) {
+                               throw new IllegalArgumentException("Index out 
of bound.");
+                       }
+               }
+       }
+
+       private void sortIndices() {
+               boolean outOfOrder = false;
+               for (int i = 0; i < this.indices.length - 1; i++) {
+                       if (this.indices[i] >= this.indices[i + 1]) {
+                               outOfOrder = true;
+                               break;
+                       }
+               }
+
+               if (!outOfOrder) {
+                       return;
+               }
+
+               // sort
+               Integer[] order = new Integer[this.indices.length];
+               for (int i = 0; i < order.length; i++) {
+                       order[i] = i;
+               }
+
+               Arrays.sort(order, new Comparator<Integer>() {
+                       @Override
+                       public int compare(Integer o1, Integer o2) {
+                               if (indices[o1] < indices[o2]) {
+                                       return -1;
+                               } else if (indices[o1] > indices[o2]) {
+                                       return 1;
+                               } else {
+                                       return 0;
+                               }
+                       }
+               });
+
+               int nnz = this.indices.length;
+               int[] sortedIndices = new int[nnz];
+               double[] sortedValues = new double[nnz];
+
+               for (int i = 0; i < order.length; i++) {
+                       sortedValues[i] = this.values[order[i]];
+                       sortedIndices[i] = this.indices[order[i]];
+               }
+
+               this.indices = sortedIndices;
+               this.values = sortedValues;
+       }
+
+       @Override
+       public SparseVector clone() {
+               SparseVector vec = new SparseVector(this.n);
+               vec.indices = this.indices.clone();
+               vec.values = this.values.clone();
+               return vec;
+       }
+
+       @Override
+       public SparseVector prefix(double d) {
+               int[] indices = new int[this.indices.length + 1];
+               double[] values = new double[this.values.length + 1];
+               int n = (this.n >= 0) ? this.n + 1 : this.n;
+
+               indices[0] = 0;
+               values[0] = d;
+
+               for (int i = 0; i < this.indices.length; i++) {
+                       indices[i + 1] = this.indices[i] + 1;
+                       values[i + 1] = this.values[i];
+               }
+
+               return new SparseVector(n, indices, values);
+       }
+
+       @Override
+       public SparseVector append(double d) {
+               int[] indices = new int[this.indices.length + 1];
+               double[] values = new double[this.values.length + 1];
+               int n = (this.n >= 0) ? this.n + 1 : this.n;
+
+               int i;
+               for (i = 0; i < this.indices.length; i++) {
+                       indices[i] = this.indices[i];
+                       values[i] = this.values[i];
+               }
+
+               indices[i] = n - 1;
+               values[i] = d;
+
+               return new SparseVector(n, indices, values);
+       }
+
+       public int[] getIndices() {
+               return indices;
+       }
+
+       public double[] getValues() {
+               return values;
+       }
+
+       public int getMaxIndex() {
+               if (indices.length <= 0) {
+                       return -1;
+               }
+               return indices[indices.length - 1];
+       }
+
+       @Override
+       public int size() {
+               return n;
+       }
+
+       @Override
+       public double get(int i) {
+               int pos = Arrays.binarySearch(indices, i);
+               if (pos >= 0) {
+                       return values[pos];
+               }
+               return 0.;
+       }
+
+       public void setSize(int n) {
+               this.n = n;
+       }
+
+       public int numberOfValues() {
+               return this.values.length;
+       }
+
+       @Override
+       public void set(int i, double val) {
+               int pos = Arrays.binarySearch(indices, i);
+               if (pos >= 0) {
+                       this.values[pos] = val;
+               } else {
+                       pos = -(pos + 1);
+                       double[] newValues = new double[this.values.length + 1];
+                       int[] newIndices = new int[this.values.length + 1];
+                       int j = 0;
+                       for (; j < pos; j++) {
+                               newValues[j] = this.values[j];
+                               newIndices[j] = this.indices[j];
+                       }
+                       newValues[j] = val;
+                       newIndices[j] = i;
+                       for (; j < this.values.length; j++) {
+                               newValues[j + 1] = this.values[j];
+                               newIndices[j + 1] = this.indices[j];
+                       }
+                       this.values = newValues;
+                       this.indices = newIndices;
+               }
+       }
+
+       @Override
+       public void add(int i, double val) {
+               int pos = Arrays.binarySearch(indices, i);
+               if (pos >= 0) {
+                       this.values[pos] += val;
+               } else {
+                       pos = -(pos + 1);
+                       double[] newValues = new double[this.values.length + 1];
+                       int[] newIndices = new int[this.values.length + 1];
+                       int j = 0;
+                       for (; j < pos; j++) {
+                               newValues[j] = this.values[j];
+                               newIndices[j] = this.indices[j];
+                       }
+                       newValues[j] = val;
+                       newIndices[j] = i;
+                       for (; j < this.values.length; j++) {
+                               newValues[j + 1] = this.values[j];
+                               newIndices[j + 1] = this.indices[j];
+                       }
+                       this.values = newValues;
+                       this.indices = newIndices;
+               }
+       }
+
+       @Override
+       public String toString() {
+               return "Sparse Vector{" +
+                       "indices=" + Arrays.toString(indices) +
+                       "values=" + Arrays.toString(values) +
+                       "vectorSize=" + n +
+                       '}';
+       }
+
+       @Override
+       public double normL2() {
+               double d = 0;
+               for (double t : values) {
+                       d += t * t;
+               }
+               return Math.sqrt(d);
+       }
+
+       @Override
+       public double normL1() {
+               double d = 0;
+               for (double t : values) {
+                       d += Math.abs(t);
+               }
+               return d;
+       }
+
+       @Override
+       public double normInf() {
+               double d = 0;
+               for (double t : values) {
+                       d = Math.max(Math.abs(t), d);
+               }
+               return d;
+       }
+
+       @Override
+       public double normL2Square() {
+               double d = 0;
+               for (double t : values) {
+                       d += t * t;
+               }
+               return d;
+       }
+
+       @Override
+       public SparseVector slice(int[] indices) {
+               SparseVector sliced = new SparseVector(indices.length);
+               int nnz = 0;
+               sliced.indices = new int[indices.length];
+               sliced.values = new double[indices.length];
+
+               for (int i = 0; i < indices.length; i++) {
+                       int pos = Arrays.binarySearch(this.indices, indices[i]);
+                       if (pos >= 0) {
+                               sliced.indices[nnz] = i;
+                               sliced.values[nnz] = this.values[pos];
+                               nnz++;
+                       }
+               }
+
+               if (nnz < sliced.indices.length) {
+                       sliced.indices = Arrays.copyOf(sliced.indices, nnz);
+                       sliced.values = Arrays.copyOf(sliced.values, nnz);
+               }
+
+               return sliced;
+       }
+
+       public SparseVector plus(SparseVector other) {
 
 Review comment:
   Yes, we have made the change.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to