This is an automated email from the ASF dual-hosted git repository. myui pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/incubator-hivemall-site.git
commit 13e887a89eb23fb824abc1a11ec07a0c1df9f482 Author: Makoto Yui <m...@apache.org> AuthorDate: Wed Oct 23 18:12:27 2019 +0900 Updated function docs --- userguide/misc/funcs.html | 129 ++++++++++++------------------------- userguide/misc/generic_funcs.html | 131 ++++++++++++-------------------------- 2 files changed, 82 insertions(+), 178 deletions(-) diff --git a/userguide/misc/funcs.html b/userguide/misc/funcs.html index 9673640..ff974bf 100644 --- a/userguide/misc/funcs.html +++ b/userguide/misc/funcs.html @@ -2077,13 +2077,13 @@ - <li class="header">Part XIII - Hivemall on Spark</li> + <li class="header">Part XIII - Hivemall on SparkSQL</li> - <li class="chapter " data-level="13.1" data-path="../spark/getting_started/"> + <li class="chapter " data-level="13.1" data-path="../spark/getting_started/README.md"> - <a href="../spark/getting_started/"> + <span> <b>13.1.</b> @@ -2133,27 +2133,12 @@ <ul class="articles"> - <li class="chapter " data-level="13.2.1" data-path="../spark/binaryclass/a9a_df.html"> - - <a href="../spark/binaryclass/a9a_df.html"> - - - <b>13.2.1.</b> - - a9a Tutorial for DataFrame - - </a> - - - - </li> - - <li class="chapter " data-level="13.2.2" data-path="../spark/binaryclass/a9a_sql.html"> + <li class="chapter " data-level="13.2.1" data-path="../spark/binaryclass/a9a_sql.html"> <a href="../spark/binaryclass/a9a_sql.html"> - <b>13.2.2.</b> + <b>13.2.1.</b> a9a Tutorial for SQL @@ -2184,27 +2169,12 @@ <ul class="articles"> - <li class="chapter " data-level="13.3.1" data-path="../spark/regression/e2006_df.html"> - - <a href="../spark/regression/e2006_df.html"> - - - <b>13.3.1.</b> - - E2006-tfidf Regression Tutorial for DataFrame - - </a> - - - - </li> - - <li class="chapter " data-level="13.3.2" data-path="../spark/regression/e2006_sql.html"> + <li class="chapter " data-level="13.3.1" data-path="../spark/regression/e2006_sql.html"> <a href="../spark/regression/e2006_sql.html"> - <b>13.3.2.</b> + <b>13.3.1.</b> E2006-tfidf Regression Tutorial for SQL @@ -2219,57 +2189,6 @@ </li> - <li class="chapter " data-level="13.4" data-path="../spark/misc/misc.html"> - - <a href="../spark/misc/misc.html"> - - - <b>13.4.</b> - - Generic Features - - </a> - - - - <ul class="articles"> - - - <li class="chapter " data-level="13.4.1" data-path="../spark/misc/topk_join.html"> - - <a href="../spark/misc/topk_join.html"> - - - <b>13.4.1.</b> - - Top-k Join Processing - - </a> - - - - </li> - - <li class="chapter " data-level="13.4.2" data-path="../spark/misc/functions.html"> - - <a href="../spark/misc/functions.html"> - - - <b>13.4.2.</b> - - Other Utility Functions - - </a> - - - - </li> - - - </ul> - - </li> - @@ -2955,6 +2874,38 @@ bloom <span class="hljs-keyword">as</span> ( </li> <li><p><code>train_randomforest_regressor(array<double|string> features, double target [, string options])</code> - Returns a relation consists of <int model_id, int model_type, string model, array<double> var_importance, double oob_errors, int oob_tests></p> </li> +<li><p><code>decision_path(string modelId, string model, array<double|string> features [, const string options] [, optional array<string> featureNames=null, optional array<string> classNames=null])</code> - Returns a decision path for each prediction in array<string></p> +<pre><code class="lang-sql">SELECT + t.passengerid, + decision_path(m.model_id, m.model, t.features, '-classification') +FROM + model_rf m + LEFT OUTER JOIN + test_rf t; +> | 892 | ["2 [0.0] = 0.0","0 [3.0] = 3.0","1 [696.0] != 107.0","7 [7.8292] <= 7.9104","1 [696.0] != 828.0","1 [696.0] != 391.0","0 [0.961038961038961, 0.03896103896103896]"] | + +-- Show 100 frequent branches +WITH tmp as ( + SELECT + decision_path(m.model_id, m.model, t.features, '-classification -no_verbose -no_leaf', array('pclass','name','sex','age','sibsp','parch','ticket','fare','cabin','embarked'), array('no','yes')) as path + FROM + model_rf m + LEFT OUTER JOIN -- CROSS JOIN + test_rf t +) +select + r.branch, + count(1) as cnt +from + tmp l + LATERAL VIEW explode(l.path) r as branch +group by + r.branch +order by + cnt desc +limit 100; +</code></pre> +</li> <li><p><code>guess_attribute_types(ANY, ...)</code> - Returns attribute types</p> <pre><code class="lang-sql">select guess_attribute_types(*) from train limit 1; > Q,Q,C,C,C,C,Q,C,C,C,Q,C,Q,Q,Q,Q,C,Q @@ -3057,7 +3008,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"List of Functions","level":"1.3","depth":1,"next":{"title":"Tips for Effective Hivemall","level":"1.4","depth":1,"path":"tips/README.md","ref":"tips/README.md","articles":[{"title":"Explicit add_bias() for better prediction","level":"1.4.1","depth":2,"path":"tips/addbias.md","ref":"tips/addbias.md","articles":[]},{"title":"Use rand_amplify() to better prediction results","level":"1.4.2","depth":2,"path":"tips/rand_amplify.md","ref":"t [...] + gitbook.page.hasChanged({"page":{"title":"List of Functions","level":"1.3","depth":1,"next":{"title":"Tips for Effective Hivemall","level":"1.4","depth":1,"path":"tips/README.md","ref":"tips/README.md","articles":[{"title":"Explicit add_bias() for better prediction","level":"1.4.1","depth":2,"path":"tips/addbias.md","ref":"tips/addbias.md","articles":[]},{"title":"Use rand_amplify() to better prediction results","level":"1.4.2","depth":2,"path":"tips/rand_amplify.md","ref":"t [...] }); </script> </div> diff --git a/userguide/misc/generic_funcs.html b/userguide/misc/generic_funcs.html index 592458e..9ff13d8 100644 --- a/userguide/misc/generic_funcs.html +++ b/userguide/misc/generic_funcs.html @@ -2077,13 +2077,13 @@ - <li class="header">Part XIII - Hivemall on Spark</li> + <li class="header">Part XIII - Hivemall on SparkSQL</li> - <li class="chapter " data-level="13.1" data-path="../spark/getting_started/"> + <li class="chapter " data-level="13.1" data-path="../spark/getting_started/README.md"> - <a href="../spark/getting_started/"> + <span> <b>13.1.</b> @@ -2133,27 +2133,12 @@ <ul class="articles"> - <li class="chapter " data-level="13.2.1" data-path="../spark/binaryclass/a9a_df.html"> - - <a href="../spark/binaryclass/a9a_df.html"> - - - <b>13.2.1.</b> - - a9a Tutorial for DataFrame - - </a> - - - - </li> - - <li class="chapter " data-level="13.2.2" data-path="../spark/binaryclass/a9a_sql.html"> + <li class="chapter " data-level="13.2.1" data-path="../spark/binaryclass/a9a_sql.html"> <a href="../spark/binaryclass/a9a_sql.html"> - <b>13.2.2.</b> + <b>13.2.1.</b> a9a Tutorial for SQL @@ -2184,27 +2169,12 @@ <ul class="articles"> - <li class="chapter " data-level="13.3.1" data-path="../spark/regression/e2006_df.html"> - - <a href="../spark/regression/e2006_df.html"> - - - <b>13.3.1.</b> - - E2006-tfidf Regression Tutorial for DataFrame - - </a> - - - - </li> - - <li class="chapter " data-level="13.3.2" data-path="../spark/regression/e2006_sql.html"> + <li class="chapter " data-level="13.3.1" data-path="../spark/regression/e2006_sql.html"> <a href="../spark/regression/e2006_sql.html"> - <b>13.3.2.</b> + <b>13.3.1.</b> E2006-tfidf Regression Tutorial for SQL @@ -2219,57 +2189,6 @@ </li> - <li class="chapter " data-level="13.4" data-path="../spark/misc/misc.html"> - - <a href="../spark/misc/misc.html"> - - - <b>13.4.</b> - - Generic Features - - </a> - - - - <ul class="articles"> - - - <li class="chapter " data-level="13.4.1" data-path="../spark/misc/topk_join.html"> - - <a href="../spark/misc/topk_join.html"> - - - <b>13.4.1.</b> - - Top-k Join Processing - - </a> - - - - </li> - - <li class="chapter " data-level="13.4.2" data-path="../spark/misc/functions.html"> - - <a href="../spark/misc/functions.html"> - - - <b>13.4.2.</b> - - Other Utility Functions - - </a> - - - - </li> - - - </ul> - - </li> - @@ -2399,6 +2318,40 @@ </div><!-- tocstop --> <h1 id="array">Array</h1> <ul> +<li><p><code>arange([int start=0, ] int stop, [int step=1])</code> - Return evenly spaced values within a given interval</p> +<pre><code class="lang-sql">SELECT arange(5), arange(1, 5), arange(1, 5, 1), arange(0, 5, 1); +> [0,1,2,3,4] [1,2,3,4] [1,2,3,4] [0,1,2,3,4] + +SELECT arange(1, 6, 2); +> 1, 3, 5 + +SELECT arange(-1, -6, 2); +> -1, -3, -5 +</code></pre> +</li> +<li><p><code>argmax(array<T> a)</code> - Returns the first index of the maximum value</p> +<pre><code class="lang-sql">SELECT argmax(array(5,2,0,1)); +> 0 +</code></pre> +</li> +<li><p><code>argmin(array<T> a)</code> - Returns the first index of the minimum value</p> +<pre><code class="lang-sql">SELECT argmin(array(5,2,0,1)); +> 2 +</code></pre> +</li> +<li><p><code>argrank(array<ANY> a)</code> - Returns the indices that would sort an array.</p> +<pre><code class="lang-sql">SELECT argrank(array(5,2,0,1)), argsort(argsort(array(5,2,0,1))); +> [3, 2, 0, 1] [3, 2, 0, 1] +</code></pre> +</li> +<li><p><code>argsort(array<ANY> a)</code> - Returns the indices that would sort an array.</p> +<pre><code class="lang-sql">SELECT argsort(array(5,2,0,1)); +> 2, 3, 1, 0 + +SELECT array_slice(array(5,2,0,1), argsort(array(5,2,0,1))); +> 0, 1, 2, 5 +</code></pre> +</li> <li><p><code>array_append(array<T> arr, T elem)</code> - Append an element to the end of an array</p> <pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> array_append(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">2</span>),<span class="hljs-number">3</span>); 1,2,3 @@ -3183,7 +3136,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"List of Generic Hivemall Functions","level":"2.1","depth":1,"next":{"title":"Efficient Top-K Query Processing","level":"2.2","depth":1,"path":"misc/topk.md","ref":"misc/topk.md","articles":[]},"previous":{"title":"Map-side join causes ClassCastException on Tez","level":"1.6.5","depth":2,"path":"troubleshooting/mapjoin_classcastex.md","ref":"troubleshooting/mapjoin_classcastex.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme [...] + gitbook.page.hasChanged({"page":{"title":"List of Generic Hivemall Functions","level":"2.1","depth":1,"next":{"title":"Efficient Top-K Query Processing","level":"2.2","depth":1,"path":"misc/topk.md","ref":"misc/topk.md","articles":[]},"previous":{"title":"Map-side join causes ClassCastException on Tez","level":"1.6.5","depth":2,"path":"troubleshooting/mapjoin_classcastex.md","ref":"troubleshooting/mapjoin_classcastex.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme [...] }); </script> </div>