Repository: systemml
Updated Branches:
refs/heads/master bd139a575 -> d69686273
[SYSTEMML-1978] Add PCA to Performance Test Suite
Closes #694
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d6968627
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d6968627
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d6968627
Branch: refs/heads/master
Commit: d69686273da8bf4dc09441ec34ef3863eb437629
Parents: bd139a5
Author: Krishna Kalyan
Authored: Thu Nov 9 20:42:49 2017 +0100
Committer: Krishna Kalyan
Committed: Thu Nov 9 20:42:49 2017 +0100
--
scripts/perftest/python/datagen.py | 17 +
scripts/perftest/python/run_perftest.py | 11 +++
scripts/perftest/python/train.py| 15 +++
3 files changed, 39 insertions(+), 4 deletions(-)
--
http://git-wip-us.apache.org/repos/asf/systemml/blob/d6968627/scripts/perftest/python/datagen.py
--
diff --git a/scripts/perftest/python/datagen.py
b/scripts/perftest/python/datagen.py
index 54f2eff..55dd06d 100755
--- a/scripts/perftest/python/datagen.py
+++ b/scripts/perftest/python/datagen.py
@@ -215,6 +215,23 @@ def stats2_datagen(matrix_dim, matrix_type, datagen_dir,
config_dir):
return save_path
+def dimreduction_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):
+
+path_name = '.'.join(['dimreduction', matrix_type, str(matrix_dim)])
+datagen_write = join(datagen_dir, path_name)
+save_path = join(config_dir, path_name)
+row, col = split_rowcol(matrix_dim)
+
+R = row
+C = col
+OUT = join(datagen_write, 'X.data')
+
+config = dict(R=R, C=C, OUT=OUT, FMT=DATA_FORMAT)
+
+config_writer(save_path + '.json', config)
+return save_path
+
+
def config_packets_datagen(algo_payload, matrix_type, matrix_shape,
datagen_dir, dense_algos, config_dir):
"""
This function has two responsibilities. Generate the configuration files
for
http://git-wip-us.apache.org/repos/asf/systemml/blob/d6968627/scripts/perftest/python/run_perftest.py
--
diff --git a/scripts/perftest/python/run_perftest.py
b/scripts/perftest/python/run_perftest.py
index 6e87261..1f78a75 100755
--- a/scripts/perftest/python/run_perftest.py
+++ b/scripts/perftest/python/run_perftest.py
@@ -47,7 +47,8 @@ ML_ALGO = {'binomial': ['MultiLogReg', 'l2-svm', 'm-svm'],
'regression1': ['LinearRegDS', 'LinearRegCG'],
'regression2': ['GLM_poisson', 'GLM_gamma', 'GLM_binomial'],
'stats1': ['Univar-Stats', 'bivar-stats'],
- 'stats2': ['stratstats']}
+ 'stats2': ['stratstats'],
+ 'dimreduction': ['PCA']}
ML_GENDATA = {'binomial': 'genRandData4LogisticRegression',
'clustering': 'genRandData4Kmeans',
@@ -55,7 +56,8 @@ ML_GENDATA = {'binomial': 'genRandData4LogisticRegression',
'regression1': 'genRandData4LogisticRegression',
'regression2': 'genRandData4LogisticRegression',
'stats1': 'genRandData4DescriptiveStats',
- 'stats2': 'genRandData4StratStats'}
+ 'stats2': 'genRandData4StratStats',
+ 'dimreduction': 'genRandData4PCA'}
ML_TRAIN = {'GLM_poisson': 'GLM',
'GLM_gamma': 'GLM',
@@ -69,7 +71,8 @@ ML_TRAIN = {'GLM_poisson': 'GLM',
'm-svm': 'm-svm',
'l2-svm': 'l2-svm',
'MultiLogReg': 'MultiLogReg',
-'naive-bayes': 'naive-bayes'}
+'naive-bayes': 'naive-bayes',
+'PCA': 'PCA'}
ML_PREDICT = {'Kmeans': 'Kmeans-predict',
'LinearRegCG': 'GLM-predict',
@@ -82,7 +85,7 @@ ML_PREDICT = {'Kmeans': 'Kmeans-predict',
'GLM_gamma': 'GLM-predict',
'GLM_binomial': 'GLM-predict'}
-DENSE_TYPE_ALGOS = ['clustering', 'stats1', 'stats2']
+DENSE_TYPE_ALGOS = ['clustering', 'stats1', 'stats2', 'dimreduction']
# Responsible for execution and metric logging
http://git-wip-us.apache.org/repos/asf/systemml/blob/d6968627/scripts/perftest/python/train.py
--
diff --git a/scripts/perftest/python/train.py b/scripts/perftest/python/train.py
index 4428e8f..907c2b9 100755
--- a/scripts/perftest/python/train.py
+++ b/scripts/perftest/python/train.py
@@ -338,6 +338,21 @@ def regression2_glm_poisson_train(save_folder_name,
datagen_dir, train_dir, conf
return data_folders
+def dimreduction_pca_train(save_folder_name, datagen_dir, train_dir,
config_dir):
+save_path = join(config_dir, save_folder_name)
+train_write = join(train_dir, save_folder_name)
+
+INPUT = join(datagen_dir, 'X.data')
+SCALE = '1'
+