This is an automated email from the ASF dual-hosted git repository. zhangzp pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git
The following commit(s) were added to refs/heads/master by this push: new 169aca4 [FLINK-30568] Add benchmark for PolyNomialExpansion, Normalizer, Binarizer, Interaction, MaxAbsScaler, VectorSlicer, ElementWiseProduct and Featurehasher 169aca4 is described below commit 169aca44fa6541d9b230e7e8a4c5b146a35cda67 Author: weibo <wbz...@pku.edu.cn> AuthorDate: Mon Jan 30 17:19:46 2023 +0800 [FLINK-30568] Add benchmark for PolyNomialExpansion, Normalizer, Binarizer, Interaction, MaxAbsScaler, VectorSlicer, ElementWiseProduct and Featurehasher This closes #198. --- .../src/main/resources/binarizer-benchmark.json | 62 ++++++++++++++++++++++ .../resources/elementwiseproduct-benchmark.json | 39 ++++++++++++++ .../main/resources/featurehasher-benchmark.json | 54 +++++++++++++++++++ .../src/main/resources/interaction-benchmark.json | 48 +++++++++++++++++ .../src/main/resources/maxabsscaler-benchmark.json | 36 +++++++++++++ .../src/main/resources/normalizer-benchmark.json | 39 ++++++++++++++ .../resources/polynoimalexpansion-benchmark.json | 39 ++++++++++++++ .../src/main/resources/vectorslicer-benchmark.json | 39 ++++++++++++++ 8 files changed, 356 insertions(+) diff --git a/flink-ml-benchmark/src/main/resources/binarizer-benchmark.json b/flink-ml-benchmark/src/main/resources/binarizer-benchmark.json new file mode 100644 index 0000000..fc0a5fd --- /dev/null +++ b/flink-ml-benchmark/src/main/resources/binarizer-benchmark.json @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "version": 1, + "binarizer10000000": { + "inputData": { + "className": "org.apache.flink.ml.benchmark.datagenerator.common.DoubleGenerator", + "paramMap": { + "colNames": [ + [ + "f0", + "f1", + "f2", + "f3", + "f4" + ] + ], + "seed": 2, + "numValues": 10000000 + } + }, + "stage": { + "className": "org.apache.flink.ml.feature.binarizer.Binarizer", + "paramMap": { + "inputCols": [ + "f0", + "f1", + "f2", + "f3", + "f4" + ], + "outputCols": [ + "outputCol0", + "outputCol1", + "outputCol2", + "outputCol3", + "outputCol4" + ], + "thresholds": [ + 0.5, + 0.3, + 0.3, + 0.6, + 0.8 + ] + } + } + } +} \ No newline at end of file diff --git a/flink-ml-benchmark/src/main/resources/elementwiseproduct-benchmark.json b/flink-ml-benchmark/src/main/resources/elementwiseproduct-benchmark.json new file mode 100644 index 0000000..740799a --- /dev/null +++ b/flink-ml-benchmark/src/main/resources/elementwiseproduct-benchmark.json @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "version": 1, + "elementwiseproduct10000000": { + "inputData": { + "className": "org.apache.flink.ml.benchmark.datagenerator.common.DenseVectorGenerator", + "paramMap": { + "vectorDim": 5, + "colNames": [ + [ + "featuresCol" + ] + ], + "seed": 2, + "numValues": 10000000 + } + }, + "stage": { + "className": "org.apache.flink.ml.feature.elementwiseproduct.ElementwiseProduct", + "paramMap": { + "scalingVec": {"values": [1.0, 2.0, 3.0, 4.0, 5.0]} + } + } + } +} \ No newline at end of file diff --git a/flink-ml-benchmark/src/main/resources/featurehasher-benchmark.json b/flink-ml-benchmark/src/main/resources/featurehasher-benchmark.json new file mode 100644 index 0000000..5c3826f --- /dev/null +++ b/flink-ml-benchmark/src/main/resources/featurehasher-benchmark.json @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "version": 1, + "featurehasher10000000": { + "inputData": { + "className": "org.apache.flink.ml.benchmark.datagenerator.common.DoubleGenerator", + "paramMap": { + "colNames": [ + [ + "f0", + "f1", + "f2", + "f3", + "f4" + ] + ], + "seed": 2, + "numValues": 10000000 + } + }, + "stage": { + "className": "org.apache.flink.ml.feature.featurehasher.FeatureHasher", + "paramMap": { + "inputCols": [ + "f0", + "f1", + "f2", + "f3", + "f4" + ], + "categoricalCols": [ + "f0", + "f1", + "f2" + ], + "numFeatures": 1000 + } + } + } +} \ No newline at end of file diff --git a/flink-ml-benchmark/src/main/resources/interaction-benchmark.json b/flink-ml-benchmark/src/main/resources/interaction-benchmark.json new file mode 100644 index 0000000..01bb8bb --- /dev/null +++ b/flink-ml-benchmark/src/main/resources/interaction-benchmark.json @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "version": 1, + "interaction10000000": { + "inputData": { + "className": "org.apache.flink.ml.benchmark.datagenerator.common.DoubleGenerator", + "paramMap": { + "colNames": [ + [ + "f0", + "f1", + "f2", + "f3", + "f4" + ] + ], + "seed": 2, + "numValues": 10000000 + } + }, + "stage": { + "className": "org.apache.flink.ml.feature.interaction.Interaction", + "paramMap": { + "inputCols": [ + "f0", + "f1", + "f2", + "f3", + "f4" + ] + } + } + } +} \ No newline at end of file diff --git a/flink-ml-benchmark/src/main/resources/maxabsscaler-benchmark.json b/flink-ml-benchmark/src/main/resources/maxabsscaler-benchmark.json new file mode 100644 index 0000000..9cbe218 --- /dev/null +++ b/flink-ml-benchmark/src/main/resources/maxabsscaler-benchmark.json @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "version": 1, + "maxabsscaler10000000": { + "inputData": { + "className": "org.apache.flink.ml.benchmark.datagenerator.common.DenseVectorGenerator", + "paramMap": { + "vectorDim": 100, + "colNames": [ + [ + "featuresCol" + ] + ], + "seed": 2, + "numValues": 10000000 + } + }, + "stage": { + "className": "org.apache.flink.ml.feature.maxabsscaler.MaxAbsScaler" + } + } +} \ No newline at end of file diff --git a/flink-ml-benchmark/src/main/resources/normalizer-benchmark.json b/flink-ml-benchmark/src/main/resources/normalizer-benchmark.json new file mode 100644 index 0000000..77d1f9e --- /dev/null +++ b/flink-ml-benchmark/src/main/resources/normalizer-benchmark.json @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "version": 1, + "normalizer10000000": { + "inputData": { + "className": "org.apache.flink.ml.benchmark.datagenerator.common.DenseVectorGenerator", + "paramMap": { + "vectorDim": 5, + "colNames": [ + [ + "featuresCol" + ] + ], + "seed": 2, + "numValues": 10000000 + } + }, + "stage": { + "className": "org.apache.flink.ml.feature.normalizer.Normalizer", + "paramMap": { + "p": 2.0 + } + } + } +} \ No newline at end of file diff --git a/flink-ml-benchmark/src/main/resources/polynoimalexpansion-benchmark.json b/flink-ml-benchmark/src/main/resources/polynoimalexpansion-benchmark.json new file mode 100644 index 0000000..d35a45b --- /dev/null +++ b/flink-ml-benchmark/src/main/resources/polynoimalexpansion-benchmark.json @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "version": 1, + "polynoimalexpansion10000000": { + "inputData": { + "className": "org.apache.flink.ml.benchmark.datagenerator.common.DenseVectorGenerator", + "paramMap": { + "vectorDim": 5, + "colNames": [ + [ + "featuresCol" + ] + ], + "seed": 2, + "numValues": 10000000 + } + }, + "stage": { + "className": "org.apache.flink.ml.feature.polynomialexpansion.PolynomialExpansion", + "paramMap": { + "degree": 2 + } + } + } +} \ No newline at end of file diff --git a/flink-ml-benchmark/src/main/resources/vectorslicer-benchmark.json b/flink-ml-benchmark/src/main/resources/vectorslicer-benchmark.json new file mode 100644 index 0000000..7f77166 --- /dev/null +++ b/flink-ml-benchmark/src/main/resources/vectorslicer-benchmark.json @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "version": 1, + "vectorslicer10000000": { + "inputData": { + "className": "org.apache.flink.ml.benchmark.datagenerator.common.DenseVectorGenerator", + "paramMap": { + "vectorDim": 10, + "colNames": [ + [ + "featuresCol" + ] + ], + "seed": 2, + "numValues": 10000000 + } + }, + "stage": { + "className": "org.apache.flink.ml.feature.vectorslicer.VectorSlicer", + "paramMap": { + "indices": [1, 3 ,5 ,7] + } + } + } +} \ No newline at end of file