This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new d87a57b2f7 [MINOR] Fix learned sampling (missing license headers /
readme)
d87a57b2f7 is described below
commit d87a57b2f793fabe33aab35f45ea14a2d06faa4c
Author: Matthias Boehm <[email protected]>
AuthorDate: Sat Oct 7 21:18:20 2023 +0200
[MINOR] Fix learned sampling (missing license headers / readme)
---
.../staging/learnedSampling/1_Data_Model_Prep.dml | 22 +++++++++++++++
.../learnedSampling/2_Baseline_Sampling.dml | 21 ++++++++++++++
.../staging/learnedSampling/3_BasicTraining.dml | 22 +++++++++++++++
.../staging/learnedSampling/4_CombinedTraining.dml | 22 +++++++++++++++
scripts/staging/learnedSampling/README.md | 33 ++++++++++++++++++++++
scripts/staging/learnedSampling/notes.txt | 12 --------
6 files changed, 120 insertions(+), 12 deletions(-)
diff --git a/scripts/staging/learnedSampling/1_Data_Model_Prep.dml
b/scripts/staging/learnedSampling/1_Data_Model_Prep.dml
index 2a6be1a2c0..99d5a98e82 100644
--- a/scripts/staging/learnedSampling/1_Data_Model_Prep.dml
+++ b/scripts/staging/learnedSampling/1_Data_Model_Prep.dml
@@ -1,3 +1,25 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
# -- Train: Accuracy (%): 85.07940686145477
# -- Test: Accuracy (%): 85.04146616156444
# -- ~83% w/o intercept
diff --git a/scripts/staging/learnedSampling/2_Baseline_Sampling.dml
b/scripts/staging/learnedSampling/2_Baseline_Sampling.dml
index e600cd43e0..9e5aa5a50c 100644
--- a/scripts/staging/learnedSampling/2_Baseline_Sampling.dml
+++ b/scripts/staging/learnedSampling/2_Baseline_Sampling.dml
@@ -1,3 +1,24 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
# -- Baseline sampling with f=0.1, nrow=3256.0
# ---- Train Accuracy (%): 86.13587457731326
# ---- Test Accuracy (%): 84.90836490222176
diff --git a/scripts/staging/learnedSampling/3_BasicTraining.dml
b/scripts/staging/learnedSampling/3_BasicTraining.dml
index 7b37ca33b3..87a1220eb8 100644
--- a/scripts/staging/learnedSampling/3_BasicTraining.dml
+++ b/scripts/staging/learnedSampling/3_BasicTraining.dml
@@ -1,3 +1,25 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
X = read("data/Adult_X.csv")
y = read("data/Adult_y.csv")
B = read("data/Adult_W.csv")
diff --git a/scripts/staging/learnedSampling/4_CombinedTraining.dml
b/scripts/staging/learnedSampling/4_CombinedTraining.dml
index 807da398e5..1abbbafa6e 100644
--- a/scripts/staging/learnedSampling/4_CombinedTraining.dml
+++ b/scripts/staging/learnedSampling/4_CombinedTraining.dml
@@ -1,3 +1,25 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
X = read("data/Adult_X.csv")
y = read("data/Adult_y.csv")
B = read("data/Adult_W.csv")
diff --git a/scripts/staging/learnedSampling/README.md
b/scripts/staging/learnedSampling/README.md
new file mode 100644
index 0000000000..51a0b21a1b
--- /dev/null
+++ b/scripts/staging/learnedSampling/README.md
@@ -0,0 +1,33 @@
+<!--
+{% comment %}
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to you under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+{% end comment %}
+-->
+
+# Learned Sample Selection
+
+## Data Preparation
+
+ mkdir -p data;
+ chmod 755 data;
+ curl
https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data -o
data/Adult.csv;
+ sed -i '$d' data/Adult.csv; # fix empty line at end of file
+
+## Run Individual Scripts
+
+ java -Xmx4g -Xms4g -cp ./lib/*:./SystemDS.jar
org.apache.sysds.api.DMLScript \
+ -f XYZ.dml -debug -stats -explain
+
diff --git a/scripts/staging/learnedSampling/notes.txt
b/scripts/staging/learnedSampling/notes.txt
deleted file mode 100644
index fd02d49c4a..0000000000
--- a/scripts/staging/learnedSampling/notes.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-Data Preparation
-
-mkdir -p data;
-chmod 755 data;
-curl
https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data -o
data/Adult.csv;
-sed -i '$d' data/Adult.csv; # fix empty line at end of file
-
-Run Individual Scripts
-
-java -Xmx4g -Xms4g -cp ./lib/*:./SystemDS.jar org.apache.sysds.api.DMLScript \
- -f XYZ.dml -debug -stats -explain
-