[SYSTEMML-1139] Updated the beginner's guide The updated documentation reflect the installation steps as per commit https://github.com/apache/incubator-systemml/commit/d225cbdc90e4d5f8e464182c237f5e4900467a38
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/fa88464b Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/fa88464b Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/fa88464b Branch: refs/heads/gh-pages Commit: fa88464bab650ea0df736a2887391ce2847115c6 Parents: 313b1db Author: Niketan Pansare <npan...@us.ibm.com> Authored: Wed Dec 7 14:49:02 2016 -0800 Committer: Niketan Pansare <npan...@us.ibm.com> Committed: Wed Dec 7 14:49:02 2016 -0800 ---------------------------------------------------------------------- beginners-guide-python.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/fa88464b/beginners-guide-python.md ---------------------------------------------------------------------- diff --git a/beginners-guide-python.md b/beginners-guide-python.md index d0598aa..c919f3f 100644 --- a/beginners-guide-python.md +++ b/beginners-guide-python.md @@ -75,8 +75,8 @@ We are working towards uploading the python package on pypi. Until then, please ```bash git checkout https://github.com/apache/incubator-systemml.git cd incubator-systemml -mvn post-integration-test -P distribution -DskipTests -pip install src/main/python/dist/systemml-incubating-0.12.0.dev1.tar.gz +mvn clean package -P distribution +pip install target/systemml-0.12.0-incubating-SNAPSHOT-python.tgz ``` The above commands will install Python package and place the corresponding Java binaries (along with algorithms) into the installed location. @@ -214,10 +214,10 @@ digits = datasets.load_digits() X_digits = digits.data y_digits = digits.target n_samples = len(X_digits) -X_train = X_digits[:.9 * n_samples] -y_train = y_digits[:.9 * n_samples] -X_test = X_digits[.9 * n_samples:] -y_test = y_digits[.9 * n_samples:] +X_train = X_digits[:int(.9 * n_samples)] +y_train = y_digits[:int(.9 * n_samples)] +X_test = X_digits[int(.9 * n_samples):] +y_test = y_digits[int(.9 * n_samples):] logistic = LogisticRegression(sqlCtx) print('LogisticRegression score: %f' % logistic.fit(X_train, y_train).score(X_test, y_test)) ``` @@ -245,13 +245,13 @@ X_digits = digits.data y_digits = digits.target n_samples = len(X_digits) # Split the data into training/testing sets and convert to PySpark DataFrame -df_train = sml.convertToLabeledDF(sqlContext, X_digits[:.9 * n_samples], y_digits[:.9 * n_samples]) -X_test = sqlCtx.createDataFrame(pd.DataFrame(X_digits[.9 * n_samples:])) +df_train = sml.convertToLabeledDF(sqlContext, X_digits[:int(.9 * n_samples)], y_digits[:int(.9 * n_samples)]) +X_test = sqlCtx.createDataFrame(pd.DataFrame(X_digits[int(.9 * n_samples):])) logistic = LogisticRegression(sqlCtx) logistic.fit(df_train) y_predicted = logistic.predict(X_test) y_predicted = y_predicted.select('prediction').toPandas().as_matrix().flatten() -y_test = y_digits[.9 * n_samples:] +y_test = y_digits[int(.9 * n_samples):] print('LogisticRegression score: %f' % accuracy_score(y_test, y_predicted)) ``` @@ -331,8 +331,8 @@ X_digits = digits.data y_digits = digits.target + 1 n_samples = len(X_digits) # Split the data into training/testing sets and convert to PySpark DataFrame -X_df = sqlCtx.createDataFrame(pd.DataFrame(X_digits[:.9 * n_samples])) -y_df = sqlCtx.createDataFrame(pd.DataFrame(y_digits[:.9 * n_samples])) +X_df = sqlCtx.createDataFrame(pd.DataFrame(X_digits[:int(.9 * n_samples)])) +y_df = sqlCtx.createDataFrame(pd.DataFrame(y_digits[:int(.9 * n_samples)])) ml = sml.MLContext(sc) # Get the path of MultiLogReg.dml scriptPath = os.path.join(imp.find_module("systemml")[1], 'systemml-java', 'scripts', 'algorithms', 'MultiLogReg.dml')