[45/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/cov.html -- diff --git a/site/docs/2.3.2/api/R/cov.html b/site/docs/2.3.2/api/R/cov.html new file mode 100644 index 000..ec96abb --- /dev/null +++ b/site/docs/2.3.2/api/R/cov.html @@ -0,0 +1,137 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: cov + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +cov {SparkR}R Documentation + +cov + +Description + +Compute the covariance between two expressions. + + + +Usage + + +cov(x, ...) + +covar_samp(col1, col2) + +covar_pop(col1, col2) + +## S4 method for signature 'characterOrColumn' +cov(x, col2) + +## S4 method for signature 'characterOrColumn,characterOrColumn' +covar_samp(col1, col2) + +## S4 method for signature 'characterOrColumn,characterOrColumn' +covar_pop(col1, col2) + +## S4 method for signature 'SparkDataFrame' +cov(x, colName1, colName2) + + + +Arguments + + +x + +a Column or a SparkDataFrame. + +... + +additional argument(s). If x is a Column, a Column +should be provided. If x is a SparkDataFrame, two column names should +be provided. + +col1 + +the first Column. + +col2 + +the second Column. + +colName1 + +the name of the first column + +colName2 + +the name of the second column + + + + +Details + +cov: Compute the sample covariance between two expressions. + +covar_sample: Alias for cov. + +covar_pop: Computes the population covariance between two expressions. + +cov: When applied to SparkDataFrame, this calculates the sample covariance of two +numerical columns of one SparkDataFrame. + + + +Value + +The covariance of the two columns. + + + +Note + +cov since 1.6.0 + +covar_samp since 2.0.0 + +covar_pop since 2.0.0 + +cov since 1.6.0 + + + +See Also + +Other aggregate functions: avg, +column_aggregate_functions, +corr, count, +first, last + +Other stat functions: approxQuantile, +corr, crosstab, +freqItems, sampleBy + + + +Examples + +## Not run: +##D df <- createDataFrame(cbind(model = rownames(mtcars), mtcars)) +##D head(select(df, cov(df$mpg, df$hp), cov("mpg", "hp"), +##D covar_samp(df$mpg, df$hp), covar_samp("mpg", "hp"), +##D covar_pop(df$mpg, df$hp), covar_pop("mpg", "hp"))) +## End(Not run) + +## Not run: +##D cov(df, "mpg", "hp") +##D cov(df, df$mpg, df$hp) +## End(Not run) + + + +[Package SparkR version 2.3.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/createDataFrame.html -- diff --git a/site/docs/2.3.2/api/R/createDataFrame.html b/site/docs/2.3.2/api/R/createDataFrame.html new file mode 100644 index 000..0cf668e --- /dev/null +++ b/site/docs/2.3.2/api/R/createDataFrame.html @@ -0,0 +1,90 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Create a SparkDataFrame + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +createDataFrame {SparkR}R Documentation + +Create a SparkDataFrame + +Description + +Converts R data.frame or list into SparkDataFrame. + + + +Usage + + +## Default S3 method: +createDataFrame(data, schema = NULL, + samplingRatio = 1, numPartitions = NULL) + +## Default S3 method: +as.DataFrame(data, schema = NULL, samplingRatio = 1, + numPartitions = NULL) + +as.DataFrame(data, ...) + + + +Arguments + + +data + +a list or data.frame. + +schema + +a list of column names or named list (StructType), optional. + +samplingRatio + +Currently not used. + +numPartitions + +the number of partitions of the SparkDataFrame. Defaults to 1, this is +limited by length of the list or number of rows of the data.frame + +... + +additional argument(s). + + + + +Value + +A SparkDataFrame. + + + +Note + +createDataFrame since 1.4.0 + +as.DataFrame since 1.6.0 + + + +Examples + +## Not run: +##D sparkR.session() +##D df1 <- as.DataFrame(iris) +##D df2 <- as.DataFrame(list(3,4,5,6)) +##D df3 <- createDataFrame(iris) +##D df4 <- createDataFrame(cars, numPartitions = 2) +## End(Not run) + + + +[Package SparkR version 2.3.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/createExternalTable-deprecated.html -- diff --git a/site/docs/2.3.2/api/R/createExternalTable-deprecated.html b/site/docs/2.3.2/api/R/createExternalTable-deprecated.html new file mode 100644 ind
[32/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/index-all.html -- diff --git a/site/docs/2.3.2/api/java/index-all.html b/site/docs/2.3.2/api/java/index-all.html new file mode 100644 index 000..0782b81 --- /dev/null +++ b/site/docs/2.3.2/api/java/index-all.html @@ -0,0 +1,51649 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +Index (Spark 2.3.2 JavaDoc) + + + + + + + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev +Next + + +Frames +No Frames + + +All Classes + + + + + + + + + +$ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z _ + + +$ + +$colon$bslash(B, Function2) - Static method in class org.apache.spark.sql.types.StructType + +$colon$plus(B, CanBuildFrom) - Static method in class org.apache.spark.sql.types.StructType + +$div$colon(B, Function2) - Static method in class org.apache.spark.sql.types.StructType + +$greater(A) - Static method in class org.apache.spark.sql.types.Decimal + +$greater(A) - Static method in class org.apache.spark.storage.RDDInfo + +$greater$eq(A) - Static method in class org.apache.spark.sql.types.Decimal + +$greater$eq(A) - Static method in class org.apache.spark.storage.RDDInfo + +$less(A) - Static method in class org.apache.spark.sql.types.Decimal + +$less(A) - Static method in class org.apache.spark.storage.RDDInfo + +$less$eq(A) - Static method in class org.apache.spark.sql.types.Decimal + +$less$eq(A) - Static method in class org.apache.spark.storage.RDDInfo + +$minus$greater(T) - Static method in class org.apache.spark.ml.param.DoubleParam + +$minus$greater(T) - Static method in class org.apache.spark.ml.param.FloatParam + +$plus$colon(B, CanBuildFrom ) - Static method in class org.apache.spark.sql.types.StructType + +$plus$eq(T) - Static method in class org.apache.spark.Accumulator + +Deprecated. + +$plus$plus(RDD ) - Static method in class org.apache.spark.api.r.RRDD + +$plus$plus(RDD ) - Static method in class org.apache.spark.graphx.EdgeRDD + +$plus$plus(RDD ) - Static method in class org.apache.spark.graphx.impl.EdgeRDDImpl + +$plus$plus(RDD ) - Static method in class org.apache.spark.graphx.impl.VertexRDDImpl + +$plus$plus(RDD ) - Static method in class org.apache.spark.graphx.VertexRDD + +$plus$plus(RDD ) - Static method in class org.apache.spark.rdd.HadoopRDD + +$plus$plus(RDD ) - Static method in class org.apache.spark.rdd.JdbcRDD + +$plus$plus(RDD ) - Static method in class org.apache.spark.rdd.NewHadoopRDD + +$plus$plus(RDD ) - Static method in class org.apache.spark.rdd.PartitionPruningRDD + +$plus$plus(RDD ) - Static method in class org.apache.spark.rdd.UnionRDD + +$plus$plus(GenTraversableOnce, CanBuildFrom ) - Static method in class org.apache.spark.sql.types.StructType + +$plus$plus$colon(TraversableOnce, CanBuildFrom ) - Static method in class org.apache.spark.sql.types.StructType + +$plus$plus$colon(Traversable, CanBuildFrom ) - Static method in class org.apache.spark.sql.types.StructType + +$plus$plus$eq(R) - Static method in class org.apache.spark.Accumulator + +Deprecated. + + + + + +A + +abort(WriterCommitMessage[]) - Method in interface org.apache.spark.sql.sources.v2.writer.DataSourceWriter + +Aborts this writing job because some data writers are failed and keep failing when retry, or + the Spark job fails with some unknown reasons, or DataSourceWriter.commit(WriterCommitMessage[]) fails. + +abort() - Method in interface org.apache.spark.sql.sources.v2.writer.DataWriter + +Aborts this writer if it is failed. + +abort(long, WriterCommitMessage[]) - Method in interface org.apache.spark.sql.sources.v2.writer.streaming.StreamWriter + +Aborts this writing job because some data writers are failed and keep failing when retry, or + the Spark job fails with some unknown reasons, or StreamWriter.commit(WriterCommitMessage[]) fails. + +abort(WriterCommitMessage[]) - Method in interface org.apache.spark.sql.sources.v2.writer.streaming.StreamWriter + +abortJob(JobContext) - Method in class org.apache.spark.internal.io.FileCommitProtocol + +Aborts a job after the writes fail. + +abortJob(JobContext) - Method in class org.apache.spark.internal.io.HadoopMapReduceCommitProtocol + +abortTask(TaskAttemptContext) - Method in class org.apache.spark.
[34/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/allclasses-noframe.html -- diff --git a/site/docs/2.3.2/api/java/allclasses-noframe.html b/site/docs/2.3.2/api/java/allclasses-noframe.html new file mode 100644 index 000..ab206fd --- /dev/null +++ b/site/docs/2.3.2/api/java/allclasses-noframe.html @@ -0,0 +1,1300 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +All Classes (Spark 2.3.2 JavaDoc) + + + + + +All Classes + + +AbsoluteError +AbstractLauncher +Accumulable +AccumulableInfo +AccumulableInfo +AccumulableParam +Accumulator +AccumulatorContext +AccumulatorParam +AccumulatorParam.DoubleAccumulatorParam$ +AccumulatorParam.FloatAccumulatorParam$ +AccumulatorParam.IntAccumulatorParam$ +AccumulatorParam.LongAccumulatorParam$ +AccumulatorParam.StringAccumulatorParam$ +AccumulatorV2 +AFTAggregator +AFTCostFun +AFTSurvivalRegression +AFTSurvivalRegressionModel +AggregatedDialect +AggregatingEdgeContext +Aggregator +Aggregator +Algo +AllJobsCancelled +AllReceiverIds +ALS +ALS +ALS.InBlock$ +ALS.Rating +ALS.Rating$ +ALS.RatingBlock$ +ALSModel +AnalysisException +And +AnyDataType +ApiHelper +ApplicationAttemptInfo +ApplicationEnvironmentInfo +ApplicationInfo +ApplicationStatus +ApplyInPlace +AppStatusUtils +AreaUnderCurve +ArrayType +ArrowColumnVector +AskPermissionToCommitOutput +AssociationRules +AssociationRules +AssociationRules.Rule +AsyncEventQueue +AsyncRDDActions +Attribute +AttributeGroup +AttributeKeys +AttributeType +BaseRelation +BaseRRDD +BasicBlockReplicationPolicy +BatchInfo +BatchInfo +BatchStatus +BernoulliCellSampler +BernoulliSampler +Binarizer +BinaryAttribute +BinaryClassificationEvaluator +BinaryClassificationMetrics +BinaryLogisticRegressionSummary +BinaryLogisticRegressionSummaryImpl +BinaryLogisticRegressionTrainingSummary +BinaryLogisticRegressionTrainingSummaryImpl +BinarySample +BinaryType +BinomialBounds +BisectingKMeans +BisectingKMeans +BisectingKMeansModel +BisectingKMeansModel +BisectingKMeansModel.SaveLoadV1_0$ +BisectingKMeansSummary +BlacklistedExecutor +BLAS +BLAS +BlockId +BlockManagerId +BlockManagerMessages +BlockManagerMessages.BlockLocationsAndStatus +BlockManagerMessages.BlockLocationsAndStatus$ +BlockManagerMessages.BlockManagerHeartbeat +BlockManagerMessages.BlockManagerHeartbeat$ +BlockManagerMessages.GetBlockStatus +BlockManagerMessages.GetBlockStatus$ +BlockManagerMessages.GetExecutorEndpointRef +BlockManagerMessages.GetExecutorEndpointRef$ +BlockManagerMessages.GetLocations +BlockManagerMessages.GetLocations$ +BlockManagerMessages.GetLocationsAndStatus +BlockManagerMessages.GetLocationsAndStatus$ +BlockManagerMessages.GetLocationsMultipleBlockIds +BlockManagerMessages.GetLocationsMultipleBlockIds$ +BlockManagerMessages.GetMatchingBlockIds +BlockManagerMessages.GetMatchingBlockIds$ +BlockManagerMessages.GetMemoryStatus$ +BlockManagerMessages.GetPeers +BlockManagerMessages.GetPeers$ +BlockManagerMessages.GetStorageStatus$ +BlockManagerMessages.HasCachedBlocks +BlockManagerMessages.HasCachedBlocks$ +BlockManagerMessages.RegisterBlockManager +BlockManagerMessages.RegisterBlockManager$ +BlockManagerMessages.RemoveBlock +BlockManagerMessages.RemoveBlock$ +BlockManagerMessages.RemoveBroadcast +BlockManagerMessages.RemoveBroadcast$ +BlockManagerMessages.RemoveExecutor +BlockManagerMessages.RemoveExecutor$ +BlockManagerMessages.RemoveRdd +BlockManagerMessages.RemoveRdd$ +BlockManagerMessages.RemoveShuffle +BlockManagerMessages.RemoveShuffle$ +BlockManagerMessages.ReplicateBlock +BlockManagerMessages.ReplicateBlock$ +BlockManagerMessages.StopBlockManagerMaster$ +BlockManagerMessages.ToBlockManagerMaster +BlockManagerMessages.ToBlockManagerSlave +BlockManagerMessages.TriggerThreadDump$ +BlockManagerMessages.UpdateBlockInfo +BlockManagerMessages.UpdateBlockInfo$ +BlockMatrix +BlockNotFoundException +BlockReplicationPolicy +BlockReplicationUtils +BlockStatus +BlockUpdatedInfo +BloomFilter +BloomFilter.Version +BooleanParam +BooleanType +BoostingStrategy +BoundedDouble +BreezeUtil +Broadcast +BroadcastBlockId +BucketedRandomProjectionLSH +BucketedRandomProjectionLSHModel +Bucketizer +BufferReleasingInputStream +BytecodeUtils +ByteType +CalendarIntervalType +Catalog +CatalystScan +CategoricalSplit +CausedBy +CharType +CheckpointReader +CheckpointState +ChiSqSelector +ChiSqSelector +ChiSqSelectorModel +ChiSqSelectorModel +ChiSqSelectorModel.SaveLoadV1_0$ +ChiSqTest +ChiSqTest.Method +ChiSqTest.Method$ +ChiSqTest.NullHypothesis$ +ChiSqTestResult +ChiSquareTest +CholeskyDecomposition +ClassificationModel +ClassificationModel +Classifier +CleanAccum +CleanBroadcast +CleanCheckpoint +CleanRDD +CleanShuffle +CleanupTask +CleanupTaskWeakReference +ClosureCleaner +ClusteredDistribution +ClusteringEvaluator +ClusteringSummary +CoarseGrainedClusterMessages +CoarseGrainedClusterMessages.AddWebUIFilter +CoarseGrainedClusterMessages.AddWebUIFilter$ +CoarseGrainedClusterMess
[25/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/ExceptionFailure.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/ExceptionFailure.html b/site/docs/2.3.2/api/java/org/apache/spark/ExceptionFailure.html new file mode 100644 index 000..6f8cbba --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/ExceptionFailure.html @@ -0,0 +1,502 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +ExceptionFailure (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":10,"i1":5,"i2":10,"i3":9,"i4":10,"i5":5,"i6":10,"i7":10,"i8":5,"i9":5,"i10":9,"i11":9,"i12":10,"i13":10}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class ExceptionFailure + + + +Object + + +org.apache.spark.ExceptionFailure + + + + + + + +All Implemented Interfaces: +java.io.Serializable, TaskEndReason, TaskFailedReason, scala.Equals, scala.Product + + + +public class ExceptionFailure +extends Object +implements TaskFailedReason, scala.Product, scala.Serializable +:: DeveloperApi :: + Task failed due to a runtime exception. This is the most common failure case and also captures + user program exceptions. + + stackTrace contains the stack trace of the exception itself. It still exists for backward + compatibility. It's better to use this(e: Throwable, metrics: Option[TaskMetrics]) to + create ExceptionFailure as it will handle the backward compatibility properly. + + fullStackTrace is a better representation of the stack trace because it contains the whole + stack trace including the exception and its causes + + exception is the actual exception that caused the task to fail. It may be None in + the case that the exception is not in fact serializable. If a task fails more than + once (due to retries), exception is that one that caused the last failure. + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +ExceptionFailure(String className, +String description, +StackTraceElement[] stackTrace, +String fullStackTrace, + scala.OptionexceptionWrapper, +scala.collection.Seq accumUpdates, +scala.collection.Seq > accums) + + + + + + + + + +Method Summary + +All Methods Static Methods Instance Methods Abstract Methods Concrete Methods + +Modifier and Type +Method and Description + + +scala.collection.Seq +accumUpdates() + + +abstract static boolean +canEqual(Object that) + + +String +className() + + +static boolean +countTowardsTaskFailures() + + +String +description() + + +abstract static boolean +equals(Object that) + + +scala.Option +exception() + + +String +fullStackTrace() + + +abstract static int +productArity() + + +abstract static Object +productElement(int n) + + +static scala.collection.Iterator
[19/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/SparkContext.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/SparkContext.html b/site/docs/2.3.2/api/java/org/apache/spark/SparkContext.html new file mode 100644 index 000..3ffb13a --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/SparkContext.html @@ -0,0 +1,3117 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +SparkContext (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":42,"i1":42,"i2":42,"i3":42,"i4":42,"i5":10,"i6":10,"i7":10,"i8":10,"i9":10,"i10":10,"i11":10,"i12":10,"i13":10,"i14":10,"i15":10,"i16":10,"i17":10,"i18":10,"i19":10,"i20":10,"i21":10,"i22":10,"i23":10,"i24":10,"i25":10,"i26":10,"i27":10,"i28":10,"i29":10,"i30":10,"i31":10,"i32":10,"i33":10,"i34":10,"i35":10,"i36":42,"i37":10,"i38":9,"i39":9,"i40":10,"i41":10,"i42":10,"i43":10,"i44":10,"i45":10,"i46":10,"i47":10,"i48":10,"i49":10,"i50":10,"i51":9,"i52":9,"i53":10,"i54":10,"i55":10,"i56":10,"i57":10,"i58":10,"i59":10,"i60":10,"i61":10,"i62":10,"i63":10,"i64":10,"i65":10,"i66":10,"i67":10,"i68":10,"i69":10,"i70":10,"i71":10,"i72":10,"i73":10,"i74":10,"i75":10,"i76":10,"i77":10,"i78":10,"i79":10,"i80":10,"i81":10,"i82":10,"i83":10,"i84":10,"i85":10,"i86":10,"i87":10,"i88":10,"i89":10,"i90":10,"i91":10,"i92":10,"i93":10,"i94":10,"i95":10,"i96":10,"i97":10,"i98":10,"i99":10,"i100":10,"i101":10,"i102":10}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"],32:["t6","Deprecated Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class SparkContext + + + +Object + + +org.apache.spark.SparkContext + + + + + + + +All Implemented Interfaces: +Logging + + + +public class SparkContext +extends Object +implements Logging +Main entry point for Spark functionality. A SparkContext represents the connection to a Spark + cluster, and can be used to create RDDs, accumulators and broadcast variables on that cluster. + + Only one SparkContext may be active per JVM. You must stop() the active SparkContext before + creating a new one. This limitation may eventually be removed; see SPARK-2243 for more details. + + param: config a Spark Config object describing the application configuration. Any settings in + this config overrides the default configs as well as system properties. + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +SparkContext() +Create a SparkContext that loads settings from system properties (for instance, when + launching with ./bin/spark-submit). + + + +SparkContext(SparkConf config) + + +SparkContext(String master, +String appName, +SparkConf conf) +Alternative constructor that allows setting common Spark properties directly + + + +SparkContext(String master, +String appName, +String sparkHome, +scala.collection.Seqjars, +scala.collection.Map environment) +Alternative constructor that allows setting common Spark properties directly + + + + + + + + + + +Method Summary + +All Methods Static Methods Instance Methods Concrete Methods Deprecated Methods + +Modifier and Type +Method and Description + + + Accumulable +accumulable(R initialValue, + AccumulableParam param) +Deprecated. +use AccumulatorV2. Since 2.0.0. + + + + + Accumulable +accumulable(R initialValue, + String name, + AccumulableParam param) +Deprecated. +use AccumulatorV2. Since 2.0.0. + + + + + Accumulable +accumulableCollection(R initialValue, + scala.Function1 > evidence$9, + scala.reflect.ClassTag evidence$10) +Deprecated. +use AccumulatorV2. Since 2.0.0. + + + + + Accumulator +accumulator(T initialValue, + AccumulatorParam param) +Deprecated. +use AccumulatorV2. Since 2.0.0. + + + + + Accumulator +accumulator(T ini
[35/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/allclasses-frame.html -- diff --git a/site/docs/2.3.2/api/java/allclasses-frame.html b/site/docs/2.3.2/api/java/allclasses-frame.html new file mode 100644 index 000..ec0215c --- /dev/null +++ b/site/docs/2.3.2/api/java/allclasses-frame.html @@ -0,0 +1,1300 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +All Classes (Spark 2.3.2 JavaDoc) + + + + + +All Classes + + +AbsoluteError +AbstractLauncher +Accumulable +AccumulableInfo +AccumulableInfo +AccumulableParam +Accumulator +AccumulatorContext +AccumulatorParam +AccumulatorParam.DoubleAccumulatorParam$ +AccumulatorParam.FloatAccumulatorParam$ +AccumulatorParam.IntAccumulatorParam$ +AccumulatorParam.LongAccumulatorParam$ +AccumulatorParam.StringAccumulatorParam$ +AccumulatorV2 +AFTAggregator +AFTCostFun +AFTSurvivalRegression +AFTSurvivalRegressionModel +AggregatedDialect +AggregatingEdgeContext +Aggregator +Aggregator +Algo +AllJobsCancelled +AllReceiverIds +ALS +ALS +ALS.InBlock$ +ALS.Rating +ALS.Rating$ +ALS.RatingBlock$ +ALSModel +AnalysisException +And +AnyDataType +ApiHelper +ApplicationAttemptInfo +ApplicationEnvironmentInfo +ApplicationInfo +ApplicationStatus +ApplyInPlace +AppStatusUtils +AreaUnderCurve +ArrayType +ArrowColumnVector +AskPermissionToCommitOutput +AssociationRules +AssociationRules +AssociationRules.Rule +AsyncEventQueue +AsyncRDDActions +Attribute +AttributeGroup +AttributeKeys +AttributeType +BaseRelation +BaseRRDD +BasicBlockReplicationPolicy +BatchInfo +BatchInfo +BatchStatus +BernoulliCellSampler +BernoulliSampler +Binarizer +BinaryAttribute +BinaryClassificationEvaluator +BinaryClassificationMetrics +BinaryLogisticRegressionSummary +BinaryLogisticRegressionSummaryImpl +BinaryLogisticRegressionTrainingSummary +BinaryLogisticRegressionTrainingSummaryImpl +BinarySample +BinaryType +BinomialBounds +BisectingKMeans +BisectingKMeans +BisectingKMeansModel +BisectingKMeansModel +BisectingKMeansModel.SaveLoadV1_0$ +BisectingKMeansSummary +BlacklistedExecutor +BLAS +BLAS +BlockId +BlockManagerId +BlockManagerMessages +BlockManagerMessages.BlockLocationsAndStatus +BlockManagerMessages.BlockLocationsAndStatus$ +BlockManagerMessages.BlockManagerHeartbeat +BlockManagerMessages.BlockManagerHeartbeat$ +BlockManagerMessages.GetBlockStatus +BlockManagerMessages.GetBlockStatus$ +BlockManagerMessages.GetExecutorEndpointRef +BlockManagerMessages.GetExecutorEndpointRef$ +BlockManagerMessages.GetLocations +BlockManagerMessages.GetLocations$ +BlockManagerMessages.GetLocationsAndStatus +BlockManagerMessages.GetLocationsAndStatus$ +BlockManagerMessages.GetLocationsMultipleBlockIds +BlockManagerMessages.GetLocationsMultipleBlockIds$ +BlockManagerMessages.GetMatchingBlockIds +BlockManagerMessages.GetMatchingBlockIds$ +BlockManagerMessages.GetMemoryStatus$ +BlockManagerMessages.GetPeers +BlockManagerMessages.GetPeers$ +BlockManagerMessages.GetStorageStatus$ +BlockManagerMessages.HasCachedBlocks +BlockManagerMessages.HasCachedBlocks$ +BlockManagerMessages.RegisterBlockManager +BlockManagerMessages.RegisterBlockManager$ +BlockManagerMessages.RemoveBlock +BlockManagerMessages.RemoveBlock$ +BlockManagerMessages.RemoveBroadcast +BlockManagerMessages.RemoveBroadcast$ +BlockManagerMessages.RemoveExecutor +BlockManagerMessages.RemoveExecutor$ +BlockManagerMessages.RemoveRdd +BlockManagerMessages.RemoveRdd$ +BlockManagerMessages.RemoveShuffle +BlockManagerMessages.RemoveShuffle$ +BlockManagerMessages.ReplicateBlock +BlockManagerMessages.ReplicateBlock$ +BlockManagerMessages.StopBlockManagerMaster$ +BlockManagerMessages.ToBlockManagerMaster +BlockManagerMessages.ToBlockManagerSlave +BlockManagerMessages.TriggerThreadDump$ +BlockManagerMessages.UpdateBlockInfo +BlockManagerMessages.UpdateBlockInfo$ +BlockMatrix +BlockNotFoundException +BlockReplicationPolicy +BlockReplicationUtils +BlockStatus +BlockUpdatedInfo +BloomFilter +BloomFilter.Version +BooleanParam +BooleanType +BoostingStrategy +BoundedDouble +BreezeUtil +Broadcast +BroadcastBlockId +BucketedRandomProjectionLSH +BucketedRandomProjectionLSHModel +Bucketizer +BufferReleasingInputStream +BytecodeUtils +ByteType +CalendarIntervalType +Catalog +CatalystScan +CategoricalSplit +CausedBy +CharType +CheckpointReader +CheckpointState +ChiSqSelector +ChiSqSelector +ChiSqSelectorModel +ChiSqSelectorModel +ChiSqSelectorModel.SaveLoadV1_0$ +ChiSqTest +ChiSqTest.Method +ChiSqTest.Method$ +ChiSqTest.NullHypothesis$ +ChiSqTestResult +ChiSquareTest +CholeskyDecomposition +ClassificationModel +ClassificationModel +Classifier +CleanAccum +CleanBroadcast +CleanCheckpoint +CleanRDD +CleanShuffle +CleanupTask +CleanupTaskWeakReference +ClosureCleaner +ClusteredDistribution +ClusteringEvaluator +ClusteringSummary +CoarseGrainedClusterMessages +CoarseGrainedClusterMessages.AddWebUIFilter +CoarseGrainedClusterMessages.AddWebUIFilter$ +CoarseGrainedClusterMessages.Get
[49/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/00Index.html -- diff --git a/site/docs/2.3.2/api/R/00Index.html b/site/docs/2.3.2/api/R/00Index.html new file mode 100644 index 000..ec589d2 --- /dev/null +++ b/site/docs/2.3.2/api/R/00Index.html @@ -0,0 +1,1865 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";> +http://www.w3.org/1999/xhtml";> +R: R Frontend for Apache Spark + + + + R Frontend for Apache Spark +http://stat.ethz.ch/R-manual/R-devel/doc/html/logo.jpg"; alt="[R logo]" /> + + + +http://stat.ethz.ch/R-manual/R-devel/doc/html/packages.html";>http://stat.ethz.ch/R-manual/R-devel/doc/html/left.jpg"; alt="[Up]" /> +http://stat.ethz.ch/R-manual/R-devel/doc/html/index.html";>http://stat.ethz.ch/R-manual/R-devel/doc/html/up.jpg"; alt="[Top]" /> +Documentation for package ‘SparkR’ version 2.3.2 + +DESCRIPTION file. + + +Help Pages + + + +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +Y +misc + + + +-- A -- + + +abs +Math functions for Column operations +abs-method +Math functions for Column operations +acos +Math functions for Column operations +acos-method +Math functions for Column operations +add_months +Date time arithmetic functions for Column operations +add_months-method +Date time arithmetic functions for Column operations +AFTSurvivalRegressionModel-class +S4 class that represents a AFTSurvivalRegressionModel +agg +summarize +agg-method +summarize +alias +alias +alias-method +alias +ALSModel-class +S4 class that represents an ALSModel +approxCountDistinct +Aggregate functions for Column operations +approxCountDistinct-method +Aggregate functions for Column operations +approxQuantile +Calculates the approximate quantiles of numerical columns of a SparkDataFrame +approxQuantile-method +Calculates the approximate quantiles of numerical columns of a SparkDataFrame +arrange +Arrange Rows by Variables +arrange-method +Arrange Rows by Variables +array_contains +Collection functions for Column operations +array_contains-method +Collection functions for Column operations +as.data.frame +Download data from a SparkDataFrame into a R data.frame +as.data.frame-method +Download data from a SparkDataFrame into a R data.frame +as.DataFrame +Create a SparkDataFrame +as.DataFrame.default +Create a SparkDataFrame +asc +A set of operations working with SparkDataFrame columns +ascii +String functions for Column operations +ascii-method +String functions for Column operations +asin +Math functions for Column operations +asin-method +Math functions for Column operations +associationRules-method +FP-growth +atan +Math functions for Column operations +atan-method +Math functions for Column operations +atan2 +Math functions for Column operations +atan2-method +Math functions for Column operations +attach +Attach SparkDataFrame to R search path +attach-method +Attach SparkDataFrame to R search path +avg +avg +avg-method +avg +awaitTermination +awaitTermination +awaitTermination-method +awaitTermination + + +-- B -- + + +base64 +String functions for Column operations +base64-method +String functions for Column operations +between +between +between-method +between +bin +Math functions for Column operations +bin-method +Math functions for Column operations +BisectingKMeansModel-class +S4 class that represents a BisectingKMeansModel +bitwiseNOT +Non-aggregate functions for Column operations +bitwiseNOT-method +Non-aggregate functions for Column operations +broadcast +broadcast +broadcast-method +broadcast +bround +Math functions for Column operations +bround-method +Math functions for Column operations + + +-- C -- + + +cache +Cache +cache-method +Cache +cacheTable +Cache Table +cacheTable.default +Cache Table +cancelJobGroup +Cancel active jobs for the specified group +cancelJobGroup.default +Cancel active jobs for the specified group +cast +Casts the column to a different data type. +cast-method +Casts the column to a different data type. +cbrt +Math functions for Column operations +cbrt-method +Math functions for Column operations +ceil +Math functions for Column operations +ceil-method +Math functions for Column operations +ceiling +Math functions for Column operations +ceiling-method +Math functions for Column operations +checkpoint +checkpoint +checkpoint-method +checkpoint +clearCache +Clear Cache +clearCache.default +Clear Cache +clearJobGroup +Clear current job group ID and its description +clearJobGroup.default +Clear current job group ID and its description +coalesce +Coalesce +coalesce-method +Coalesce +coalesce-method +Non-aggregate functions for Column operations +collect +Collects all the elements of a SparkDataFrame and coerces them into an R data.frame. +collect-method +Collects all the elements of a SparkDataFrame and coerces them into an R data.frame. +collect_list +Aggregate functions for Column operations +collect_list-method +Aggregate functions for Column op
[22/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/NarrowDependency.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/NarrowDependency.html b/site/docs/2.3.2/api/java/org/apache/spark/NarrowDependency.html new file mode 100644 index 000..968e594 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/NarrowDependency.html @@ -0,0 +1,315 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +NarrowDependency (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":6,"i1":10}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class NarrowDependency+ + + +Object + + +org.apache.spark.Dependency + + +org.apache.spark.NarrowDependency + + + + + + + + + +All Implemented Interfaces: +java.io.Serializable + + +Direct Known Subclasses: +OneToOneDependency, RangeDependency + + + +public abstract class NarrowDependency +extends Dependency +:: DeveloperApi :: + Base class for dependencies where each partition of the child RDD depends on a small number + of partitions of the parent RDD. Narrow dependencies allow for pipelined execution. + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +NarrowDependency(RDD _rdd) + + + + + + + + + +Method Summary + +All Methods Instance Methods Abstract Methods Concrete Methods + +Modifier and Type +Method and Description + + +abstract scala.collection.Seq
[46/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/column_datetime_diff_functions.html -- diff --git a/site/docs/2.3.2/api/R/column_datetime_diff_functions.html b/site/docs/2.3.2/api/R/column_datetime_diff_functions.html new file mode 100644 index 000..3339b41 --- /dev/null +++ b/site/docs/2.3.2/api/R/column_datetime_diff_functions.html @@ -0,0 +1,199 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Date time arithmetic functions for Column operations + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +column_datetime_diff_functions {SparkR}R Documentation + +Date time arithmetic functions for Column operations + +Description + +Date time arithmetic functions defined for Column. + + + +Usage + + +add_months(y, x) + +datediff(y, x) + +date_add(y, x) + +date_format(y, x) + +date_sub(y, x) + +from_utc_timestamp(y, x) + +months_between(y, x) + +next_day(y, x) + +to_utc_timestamp(y, x) + +## S4 method for signature 'Column' +datediff(y, x) + +## S4 method for signature 'Column' +months_between(y, x) + +## S4 method for signature 'Column,character' +date_format(y, x) + +## S4 method for signature 'Column,character' +from_utc_timestamp(y, x) + +## S4 method for signature 'Column,character' +next_day(y, x) + +## S4 method for signature 'Column,character' +to_utc_timestamp(y, x) + +## S4 method for signature 'Column,numeric' +add_months(y, x) + +## S4 method for signature 'Column,numeric' +date_add(y, x) + +## S4 method for signature 'Column,numeric' +date_sub(y, x) + + + +Arguments + + +y + +Column to compute on. + +x + +For class Column, it is the column used to perform arithmetic operations +with column y. For class numeric, it is the number of months or +days to be added to or subtracted from y. For class character, it is + + + + date_format: date format specification. + + + from_utc_timestamp, to_utc_timestamp: time zone to use. + + + next_day: day of the week string. + + + + + + +Details + +datediff: Returns the number of days from y to x. + +months_between: Returns number of months between dates y and x. + +date_format: Converts a date/timestamp/string to a value of string in the format +specified by the date format given by the second argument. A pattern could be for instance +dd.MM. and could return a string like '18.03.1993'. All +pattern letters of java.text.SimpleDateFormat can be used. +Note: Use when ever possible specialized functions like year. These benefit from a +specialized implementation. + +from_utc_timestamp: Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a +time in UTC, and renders that time as a timestamp in the given time zone. For example, 'GMT+1' +would yield '2017-07-14 03:40:00.0'. + +next_day: Given a date column, returns the first date which is later than the value of +the date column that is on the specified day of the week. For example, +next_day("2015-07-27", "Sunday") returns 2015-08-02 because that is the first Sunday +after 2015-07-27. Day of the week parameter is case insensitive, and accepts first three or +two characters: "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun". + +to_utc_timestamp: Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a +time in the given time zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' +would yield '2017-07-14 01:40:00.0'. + +add_months: Returns the date that is numMonths (x) after startDate (y). + +date_add: Returns the date that is x days after. + +date_sub: Returns the date that is x days before. + + + +Note + +datediff since 1.5.0 + +months_between since 1.5.0 + +date_format since 1.5.0 + +from_utc_timestamp since 1.5.0 + +next_day since 1.5.0 + +to_utc_timestamp since 1.5.0 + +add_months since 1.5.0 + +date_add since 1.5.0 + +date_sub since 1.5.0 + + + +See Also + +Other data time functions: column_datetime_functions + + + +Examples + +## Not run: +##D dts <- c("2005-01-02 18:47:22", +##D "2005-12-24 16:30:58", +##D "2005-10-28 07:30:05", +##D "2005-12-28 07:01:05", +##D "2006-01-24 00:01:10") +##D y <- c(2.0, 2.2, 3.4, 2.5, 1.8) +##D df <- createDataFrame(data.frame(time = as.POSIXct(dts), y = y)) +## End(Not run) + +## Not run: +##D tmp <- createDataFrame(data.frame(time_string1 = as.POSIXct(dts), +##D time_string2 = as.POSIXct(dts[order(runif(length(dts)))]))) +##D tmp2 <- mutate(tmp, datediff = datediff(tmp$time_string1, tmp$time_string2), +##Dmonthdiff = months_between(tmp$time_string1, tmp$time_string2)) +##D head(tmp2) +## End(Not run) + +## Not run: +##D tmp <- mutate(df, from_utc = from_utc_timestamp(df$time,
[29/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/Accumulable.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/Accumulable.html b/site/docs/2.3.2/api/java/org/apache/spark/Accumulable.html new file mode 100644 index 000..813e8c2 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/Accumulable.html @@ -0,0 +1,489 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +Accumulable (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":42,"i1":42,"i2":42,"i3":42,"i4":42,"i5":42,"i6":42,"i7":42,"i8":42}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"],32:["t6","Deprecated Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class Accumulable+ + + +Object + + +org.apache.spark.Accumulable + + + + + + + +All Implemented Interfaces: +java.io.Serializable + + +Direct Known Subclasses: +Accumulator + + +Deprecated. +use AccumulatorV2. Since 2.0.0. + + +public class Accumulable +extends Object +implements java.io.Serializable +A data type that can be accumulated, i.e. has a commutative and associative "add" operation, + but where the result type, R, may be different from the element type being added, T. + + You must define how to add data, and how to merge two of these together. For some data types, + such as a counter, these might be the same operation. In that case, you can use the simpler + Accumulator. They won't always be the same, though -- e.g., imagine you are + accumulating a set. You will add items to the set, and you will union two sets together. + + Operations are not thread-safe. + + param: id ID of this accumulator; for internal use only. + param: initialValue initial value of accumulator + param: param helper object defining how to add elements of type R and T + param: name human-readable name for use in Spark's web UI + param: countFailedValues whether to accumulate values from failed tasks. This is set to true + for system and time metrics like serialization time or bytes spilled, + and false for things with absolute values like number of input rows. + This should be used for internal metrics only. + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +Accumulable(R initialValue, + AccumulableParam param) +Deprecated. + + + + + + + + + + +Method Summary + +All Methods Instance Methods Concrete Methods Deprecated Methods + +Modifier and Type +Method and Description + + +void +add(T term) +Deprecated. +Add more data to this accumulator / accumulable + + + +long +id() +Deprecated. + + + +R +localValue() +Deprecated. +Get the current value of this accumulator from within a task. + + + +void +merge(R term) +Deprecated. +Merge two accumulable objects together + + + +scala.Option +name() +Deprecated. + + + +void +setValue(R newValue) +Deprecated. +Set the accumulator's value. + + + +String +toString() +Deprecated. + + + +R +value() +Deprecated. +Access the accumulator's current value; only allowed on driver. + + + +R +zero() +Deprecated. + + + + + + + +Methods inherited from class Object +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait + + + + + + + + + + + + + + +Constructor Detail + + + + + + + +Accumulable +public Accumulable(R initialValue, + AccumulableParam param) +Deprecated. + + + + + + + + + +Method Detail + + + + + +id +public long id() +Deprecated. + + + + + + + +name +public scala.Option name() +Deprecated. + + + + + + + +zero +public R zero() +Deprecated. + + + + + + + + + +add +public void add(T term) +Deprecated. +Add more data to this accumulator / accumulable + +Parameters: +term - the data to add + + + + + + + + + + +merge +public void merge(R term) +Deprecated. +Merge two accumulable objects together + + Normally, a user will not want to use this version, but will instead ca
[24/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/FetchFailed.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/FetchFailed.html b/site/docs/2.3.2/api/java/org/apache/spark/FetchFailed.html new file mode 100644 index 000..263284c --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/FetchFailed.html @@ -0,0 +1,483 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +FetchFailed (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":10,"i1":5,"i2":10,"i3":5,"i4":10,"i5":10,"i6":5,"i7":5,"i8":9,"i9":9,"i10":10,"i11":10,"i12":10}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class FetchFailed + + + +Object + + +org.apache.spark.FetchFailed + + + + + + + +All Implemented Interfaces: +java.io.Serializable, TaskEndReason, TaskFailedReason, scala.Equals, scala.Product + + + +public class FetchFailed +extends Object +implements TaskFailedReason, scala.Product, scala.Serializable +:: DeveloperApi :: + Task failed to fetch shuffle data from a remote node. Probably means we have lost the remote + executors the task is trying to fetch from, and thus need to rerun the previous stage. + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +FetchFailed(BlockManagerId bmAddress, + int shuffleId, + int mapId, + int reduceId, + String message) + + + + + + + + + +Method Summary + +All Methods Static Methods Instance Methods Abstract Methods Concrete Methods + +Modifier and Type +Method and Description + + +BlockManagerId +bmAddress() + + +abstract static boolean +canEqual(Object that) + + +boolean +countTowardsTaskFailures() +Fetch failures lead to a different failure handling path: (1) we don't abort the stage after + 4 task failures, instead we immediately go back to the stage which generated the map output, + and regenerate the missing data. + + + +abstract static boolean +equals(Object that) + + +int +mapId() + + +String +message() + + +abstract static int +productArity() + + +abstract static Object +productElement(int n) + + +static scala.collection.Iterator+productIterator() + + +static String +productPrefix() + + +int +reduceId() + + +int +shuffleId() + + +String +toErrorString() +Error message displayed in the web UI. + + + + + + + +Methods inherited from class Object +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interface scala.Product +productArity, productElement, productIterator, productPrefix + + + + + +Methods inherited from interface scala.Equals +canEqual, equals + + + + + + + + + + + + + + +Constructor Detail + + + + + +FetchFailed +public FetchFailed(BlockManagerId bmAddress, + int shuffleId, + int mapId, + int reduceId, + String message) + + + + + + + + + +Method Detail + + + + + +canEqual +public abstract static boolean canEqual(Object that) + + + + + + + +equals +public abstract static boolean equals(Object that) + + + + + + + +productElement +public abstract static Object productElement(int n) + + + + + + + +productArity +public abstract static int productArity() + + + + + + + +productIterator +public static scala.collection.Iterator productIterator() + + + + + + + +productPrefix +public static String productPrefix() + + + + + + + +bmAddress +public BlockManagerId bmAddress() + + + + + + + +shuffleId +public int shuffleId() + + + + + + + +mapId +public int mapId() + + + + + + + +reduceId +public int reduceId() + + + + + + + +message +public String message() + + + + + + + +toErrorString +public String toErrorString() +Description copied from interface: TaskFailedReason +Error message displayed in the web UI. + +Specified by: +toErrorString in interface TaskFailedReason + + + + + + + +
[23/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/InternalAccumulator.output$.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/InternalAccumulator.output$.html b/site/docs/2.3.2/api/java/org/apache/spark/InternalAccumulator.output$.html new file mode 100644 index 000..3837c3b --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/InternalAccumulator.output$.html @@ -0,0 +1,325 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +InternalAccumulator.output$ (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":10,"i1":10}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class InternalAccumulator.output$ + + + +Object + + +org.apache.spark.InternalAccumulator.output$ + + + + + + + +Enclosing class: +InternalAccumulator + + + +public static class InternalAccumulator.output$ +extends Object + + + + + + + + + + + +Field Summary + +Fields + +Modifier and Type +Field and Description + + +static InternalAccumulator.output$ +MODULE$ +Static reference to the singleton instance of this Scala object. + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +output$() + + + + + + + + + +Method Summary + +All Methods Instance Methods Concrete Methods + +Modifier and Type +Method and Description + + +String +BYTES_WRITTEN() + + +String +RECORDS_WRITTEN() + + + + + + +Methods inherited from class Object +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + + + + + + + + + + +Field Detail + + + + + +MODULE$ +public static final InternalAccumulator.output$ MODULE$ +Static reference to the singleton instance of this Scala object. + + + + + + + + + +Constructor Detail + + + + + +output$ +public output$() + + + + + + + + + +Method Detail + + + + + +BYTES_WRITTEN +public String BYTES_WRITTEN() + + + + + + + +RECORDS_WRITTEN +public String RECORDS_WRITTEN() + + + + + + + + + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/InternalAccumulator.shuffleRead$.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/InternalAccumulator.shuffleRead$.html b/site/docs/2.3.2/api/java/org/apache/spark/InternalAccumulator.shuffleRead$.html new file mode 100644 index 000..0861499 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/InternalAccumulator.shuffleRead$.html @@ -0,0 +1,390 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +InternalAccumulator.shuffleRead$ (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":10,"i1":10,"i2":10,"i3":10,"i4":10,"i5":10,"i6":10}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + +var methods = {"i0":5,"i1":9,"i2":5,"i3":5,"i4":5,"i5":9,"i6":9,"i7":9}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class Resubmitted + + + +Object + + +org.apache.spark.Resubmitted + + + + + + + + +public class Resubmitted +extends Object +:: DeveloperApi :: + A org.apache.spark.scheduler.ShuffleMapTask that completed successfully earlier, but we + lost the executor before the stage completed. This means Spark needs to reschedule the task + to be re-executed on a different executor. + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +Resubmitted() + + + + + + + + + +Method Summary + +All Methods Static Methods Abstract Methods Concrete Methods + +Modifier and Type +Method and Description + + +abstract static boolean +canEqual(Object that) + + +static boolean +countTowardsTaskFailures() + + +abstract static boolean +equals(Object that) + + +abstract static int +productArity() + + +abstract static Object +productElement(int n) + + +static scala.collection.Iterator+productIterator() + + +static String +productPrefix() + + +static String +toErrorString() + + + + + + +Methods inherited from class Object +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + + + + + + + + + + +Constructor Detail + + + + + +Resubmitted +public Resubmitted() + + + + + + + + + +Method Detail + + + + + +toErrorString +public static String toErrorString() + + + + + + + +countTowardsTaskFailures +public static boolean countTowardsTaskFailures() + + + + + + + +canEqual +public abstract static boolean canEqual(Object that) + + + + + + + +equals +public abstract static boolean equals(Object that) + + + + + + + +productElement +public abstract static Object productElement(int n) + + + + + + + +productArity +public abstract static int productArity() + + + + + + + +productIterator +public static scala.collection.Iterator productIterator() + + + + + + + +productPrefix +public static String productPrefix() + + + + + + + + + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/SerializableWritable.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/SerializableWritable.html b/site/docs/2.3.2/api/java/org/apache/spark/SerializableWritable.html new file mode 100644 index 000..7639406 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/SerializableWritable.html @@ -0,0 +1,310 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +SerializableWritable (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":10,"i1":10,"i2":10}; +var tabs = {65535:["t0","All Methods"],2:["t2","
[39/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/spark.gbt.html -- diff --git a/site/docs/2.3.2/api/R/spark.gbt.html b/site/docs/2.3.2/api/R/spark.gbt.html new file mode 100644 index 000..db3b126 --- /dev/null +++ b/site/docs/2.3.2/api/R/spark.gbt.html @@ -0,0 +1,257 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Gradient Boosted Tree Model for Regression and Classification + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +spark.gbt {SparkR}R Documentation + +Gradient Boosted Tree Model for Regression and Classification + +Description + +spark.gbt fits a Gradient Boosted Tree Regression model or Classification model on a +SparkDataFrame. Users can call summary to get a summary of the fitted +Gradient Boosted Tree model, predict to make predictions on new data, and +write.ml/read.ml to save/load fitted models. +For more details, see +http://spark.apache.org/docs/latest/ml-classification-regression.html#gradient-boosted-tree-regression";> +GBT Regression and +http://spark.apache.org/docs/latest/ml-classification-regression.html#gradient-boosted-tree-classifier";> +GBT Classification + + + +Usage + + +spark.gbt(data, formula, ...) + +## S4 method for signature 'SparkDataFrame,formula' +spark.gbt(data, formula, + type = c("regression", "classification"), maxDepth = 5, + maxBins = 32, maxIter = 20, stepSize = 0.1, lossType = NULL, + seed = NULL, subsamplingRate = 1, minInstancesPerNode = 1, + minInfoGain = 0, checkpointInterval = 10, maxMemoryInMB = 256, + cacheNodeIds = FALSE, handleInvalid = c("error", "keep", "skip")) + +## S4 method for signature 'GBTRegressionModel' +summary(object) + +## S3 method for class 'summary.GBTRegressionModel' +print(x, ...) + +## S4 method for signature 'GBTClassificationModel' +summary(object) + +## S3 method for class 'summary.GBTClassificationModel' +print(x, ...) + +## S4 method for signature 'GBTRegressionModel' +predict(object, newData) + +## S4 method for signature 'GBTClassificationModel' +predict(object, newData) + +## S4 method for signature 'GBTRegressionModel,character' +write.ml(object, path, + overwrite = FALSE) + +## S4 method for signature 'GBTClassificationModel,character' +write.ml(object, path, + overwrite = FALSE) + + + +Arguments + + +data + +a SparkDataFrame for training. + +formula + +a symbolic description of the model to be fitted. Currently only a few formula +operators are supported, including '~', ':', '+', and '-'. + +... + +additional arguments passed to the method. + +type + +type of model, one of "regression" or "classification", to fit + +maxDepth + +Maximum depth of the tree (>= 0). + +maxBins + +Maximum number of bins used for discretizing continuous features and for choosing +how to split on features at each node. More bins give higher granularity. Must be +>= 2 and >= number of categories in any categorical feature. + +maxIter + +Param for maximum number of iterations (>= 0). + +stepSize + +Param for Step size to be used for each iteration of optimization. + +lossType + +Loss function which GBT tries to minimize. +For classification, must be "logistic". For regression, must be one of +"squared" (L2) and "absolute" (L1), default is "squared". + +seed + +integer seed for random number generation. + +subsamplingRate + +Fraction of the training data used for learning each decision tree, in +range (0, 1]. + +minInstancesPerNode + +Minimum number of instances each child must have after split. If a +split causes the left or right child to have fewer than +minInstancesPerNode, the split will be discarded as invalid. Should be +>= 1. + +minInfoGain + +Minimum information gain for a split to be considered at a tree node. + +checkpointInterval + +Param for set checkpoint interval (>= 1) or disable checkpoint (-1). +Note: this setting will be ignored if the checkpoint directory is not +set. + +maxMemoryInMB + +Maximum memory in MB allocated to histogram aggregation. + +cacheNodeIds + +If FALSE, the algorithm will pass trees to executors to match instances with +nodes. If TRUE, the algorithm will cache node IDs for each instance. Caching +can speed up training of deeper trees. Users can set how often should the +cache be checkpointed or disable it by setting checkpointInterval. + +handleInvalid + +How to handle invalid data (unseen labels or NULL values) in features and +label column of string type in classification model. +Supported options: "skip" (filter out rows with invalid data), +"error" (throw an error), "keep" (put invalid data in +a special additional bucket, at index numLabels). Default +is "error". + +object + +A fitted Gradient Boosted T
[13/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaDoubleRDD.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaDoubleRDD.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaDoubleRDD.html new file mode 100644 index 000..a65773f --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaDoubleRDD.html @@ -0,0 +1,2216 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +JavaDoubleRDD (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":9,"i1":10,"i2":9,"i3":9,"i4":10,"i5":10,"i6":10,"i7":9,"i8":9,"i9":9,"i10":9,"i11":9,"i12":9,"i13":9,"i14":9,"i15":9,"i16":9,"i17":9,"i18":9,"i19":10,"i20":10,"i21":10,"i22":10,"i23":9,"i24":9,"i25":9,"i26":9,"i27":9,"i28":9,"i29":9,"i30":9,"i31":9,"i32":9,"i33":9,"i34":9,"i35":9,"i36":9,"i37":9,"i38":10,"i39":10,"i40":10,"i41":9,"i42":10,"i43":9,"i44":9,"i45":9,"i46":9,"i47":9,"i48":9,"i49":9,"i50":9,"i51":9,"i52":9,"i53":9,"i54":9,"i55":9,"i56":9,"i57":9,"i58":10,"i59":10,"i60":10,"i61":10,"i62":10,"i63":9,"i64":9,"i65":9,"i66":10,"i67":9,"i68":9,"i69":9,"i70":9,"i71":9,"i72":10,"i73":10,"i74":10,"i75":9,"i76":10,"i77":10,"i78":10,"i79":10,"i80":10,"i81":9,"i82":9,"i83":9,"i84":10,"i85":10,"i86":10,"i87":10,"i88":10,"i89":10,"i90":10,"i91":10,"i92":10,"i93":10,"i94":9,"i95":9,"i96":9,"i97":9,"i98":9,"i99":9,"i100":9,"i101":9,"i102":9,"i103":9,"i104":9,"i105":9,"i106":9,"i107":9,"i108":9,"i109":10,"i110":10,"i111":10,"i112":10,"i113":10,"i114":9,"i115":9,"i116": 9,"i117":9}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.java +Class JavaDoubleRDD + + + +Object + + +org.apache.spark.api.java.JavaDoubleRDD + + + + + + + +All Implemented Interfaces: +java.io.Serializable, JavaRDDLike+ + + +public class JavaDoubleRDD +extends Object + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +JavaDoubleRDD(RDD srdd) + + + + + + + + + +Method Summary + +All Methods Static Methods Instance Methods Concrete Methods + +Modifier and Type +Method and Description + + +static U +aggregate(U zeroValue, + Function2 seqOp, + Function2 combOp) + + +JavaDoubleRDD +cache() +Persist this RDD with the default storage level (MEMORY_ONLY). + + + +static JavaPairRDD +cartesian(JavaRDDLike other) + + +static void +checkpoint() + + +scala.reflect.ClassTag +classTag() + + +JavaDoubleRDD +coalesce(int numPartitions) +Return a new RDD that is reduced into numPartitions partitions. + + + +JavaDoubleRDD +coalesce(int numPartitions, +boolean shuffle) +Return a new RDD that is reduced into numPartitions partitions. + + + +static java.util.List +collect() + + +static JavaFutureAction > +collectAsync() + + +static java.util.List [] +collectPartitions(int[] partitionIds) + + +static SparkContext +context() + + +static long +count() + + +static PartialResult +countApprox(long timeout) + + +static PartialResult +countApprox(long timeout, + double confidence) + + +static long +countApproxDistinct(double relativeSD) + + +static JavaFutureAction +countAsync() + + +static java.util.Map +countByValue() + + +static PartialResult > +countByValueApprox(long timeout) + + +static PartialResult > +countByValueApprox(long timeout, + double confidence) + + +JavaDoubleRDD +distinct() +Return a new RDD containing the distinct elements in this RDD. + + + +JavaDoubleRDD +distinct(int numPartitions) +Return a new RDD containing the distinct elements in this RDD. + + + +JavaDoubleRDD +filter(Function f) +Return a new RDD containing only the elements that satisfy a predicate. + + + +Double +first() +Return the first element i
[03/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/MapGroupsWithStateFunction.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/MapGroupsWithStateFunction.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/MapGroupsWithStateFunction.html new file mode 100644 index 000..c93487b --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/MapGroupsWithStateFunction.html @@ -0,0 +1,244 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +MapGroupsWithStateFunction (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":6}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.java.function +Interface MapGroupsWithStateFunction+ + + + + + +All Superinterfaces: +java.io.Serializable + + + +@InterfaceStability.Evolving +public interface MapGroupsWithStateFunction +extends java.io.Serializable +::Experimental:: + Base interface for a map function used in + KeyValueGroupedDataset.mapGroupsWithState( + MapGroupsWithStateFunction, org.apache.spark.sql.Encoder, org.apache.spark.sql.Encoder) + +Since: +2.1.1 + + + + + + + + + + + + +Method Summary + +All Methods Instance Methods Abstract Methods + +Modifier and Type +Method and Description + + +R +call(K key, +java.util.Iterator values, +GroupState state) + + + + + + + + + + + + + + + +Method Detail + + + + + + + +call +R call(K key, + java.util.Iteratorvalues, + GroupState state) +throws Exception + +Throws: +Exception + + + + + + + + + + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/MapPartitionsFunction.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/MapPartitionsFunction.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/MapPartitionsFunction.html new file mode 100644 index 000..ec17a18 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/MapPartitionsFunction.html @@ -0,0 +1,235 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +MapPartitionsFunction (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":6}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.java.function +Interface MapPartitionsFunction+ + + + + + +All Superinterfaces: +java.io.Serializable + + +Functional Inte
[05/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/CoGroupFunction.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/CoGroupFunction.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/CoGroupFunction.html new file mode 100644 index 000..27c2d1e --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/CoGroupFunction.html @@ -0,0 +1,242 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +CoGroupFunction (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":6}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.java.function +Interface CoGroupFunction+ + + + + + +All Superinterfaces: +java.io.Serializable + + +Functional Interface: +This is a functional interface and can therefore be used as the assignment target for a lambda expression or method reference. + + + +@FunctionalInterface +public interface CoGroupFunction +extends java.io.Serializable +A function that returns zero or more output records from each grouping key and its values from 2 + Datasets. + + + + + + + + + + + +Method Summary + +All Methods Instance Methods Abstract Methods + +Modifier and Type +Method and Description + + +java.util.Iterator +call(K key, +java.util.Iterator left, +java.util.Iterator right) + + + + + + + + + + + + + + + +Method Detail + + + + + + + +call +java.util.Iterator call(K key, + java.util.Iterator left, + java.util.Iterator right) +throws Exception + +Throws: +Exception + + + + + + + + + + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.html new file mode 100644 index 000..fa64168 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.html @@ -0,0 +1,237 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +DoubleFlatMapFunction (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":6}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.java.function +Interface DoubleFlatMapFunction + + +
[17/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/SparkFirehoseListener.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/SparkFirehoseListener.html b/site/docs/2.3.2/api/java/org/apache/spark/SparkFirehoseListener.html new file mode 100644 index 000..59eadc2 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/SparkFirehoseListener.html @@ -0,0 +1,575 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +SparkFirehoseListener (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":10,"i1":10,"i2":10,"i3":10,"i4":10,"i5":10,"i6":10,"i7":10,"i8":10,"i9":10,"i10":10,"i11":10,"i12":10,"i13":10,"i14":10,"i15":10,"i16":10,"i17":10,"i18":10,"i19":10,"i20":10,"i21":10,"i22":10,"i23":10}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class SparkFirehoseListener + + + +Object + + +org.apache.spark.SparkFirehoseListener + + + + + + + + +public class SparkFirehoseListener +extends Object +Class that allows users to receive all SparkListener events. + Users should override the onEvent method. + + This is a concrete Java class in order to ensure that we don't forget to update it when adding + new methods to SparkListener: forgetting to add a method will result in a compilation error (if + this was a concrete Scala class, default implementations of new event handlers would be inherited + from the SparkListener trait). + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +SparkFirehoseListener() + + + + + + + + + +Method Summary + +All Methods Instance Methods Concrete Methods + +Modifier and Type +Method and Description + + +void +onApplicationEnd(SparkListenerApplicationEnd applicationEnd) + + +void +onApplicationStart(SparkListenerApplicationStart applicationStart) + + +void +onBlockManagerAdded(SparkListenerBlockManagerAdded blockManagerAdded) + + +void +onBlockManagerRemoved(SparkListenerBlockManagerRemoved blockManagerRemoved) + + +void +onBlockUpdated(SparkListenerBlockUpdated blockUpdated) + + +void +onEnvironmentUpdate(SparkListenerEnvironmentUpdate environmentUpdate) + + +void +onEvent(SparkListenerEvent event) + + +void +onExecutorAdded(SparkListenerExecutorAdded executorAdded) + + +void +onExecutorBlacklisted(SparkListenerExecutorBlacklisted executorBlacklisted) + + +void +onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate executorMetricsUpdate) + + +void +onExecutorRemoved(SparkListenerExecutorRemoved executorRemoved) + + +void +onExecutorUnblacklisted(SparkListenerExecutorUnblacklisted executorUnblacklisted) + + +void +onJobEnd(SparkListenerJobEnd jobEnd) + + +void +onJobStart(SparkListenerJobStart jobStart) + + +void +onNodeBlacklisted(SparkListenerNodeBlacklisted nodeBlacklisted) + + +void +onNodeUnblacklisted(SparkListenerNodeUnblacklisted nodeUnblacklisted) + + +void +onOtherEvent(SparkListenerEvent event) + + +void +onSpeculativeTaskSubmitted(SparkListenerSpeculativeTaskSubmitted speculativeTask) + + +void +onStageCompleted(SparkListenerStageCompleted stageCompleted) + + +void +onStageSubmitted(SparkListenerStageSubmitted stageSubmitted) + + +void +onTaskEnd(SparkListenerTaskEnd taskEnd) + + +void +onTaskGettingResult(SparkListenerTaskGettingResult taskGettingResult) + + +void +onTaskStart(SparkListenerTaskStart taskStart) + + +void +onUnpersistRDD(SparkListenerUnpersistRDD unpersistRDD) + + + + + + +Methods inherited from class Object +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + + + + + + + + + + +Constructor Detail + + + + + +SparkFirehoseListener +public SparkFirehoseListener() + + + + + + + + + +Method Detail + + + + + +onEvent +public void onEvent(SparkListenerEvent event) + + + + + + + +onStageCompleted +public final void onStageCompleted(SparkListenerStageCompleted stageCompleted) + + + + + + + +onStageSubmitted +public final void onStageSubmitted(SparkListenerStageSubmitted stageSubm
[18/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/SparkEnv.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/SparkEnv.html b/site/docs/2.3.2/api/java/org/apache/spark/SparkEnv.html new file mode 100644 index 000..152b422 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/SparkEnv.html @@ -0,0 +1,504 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +SparkEnv (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":10,"i1":10,"i2":10,"i3":10,"i4":10,"i5":9,"i6":10,"i7":10,"i8":10,"i9":10,"i10":10,"i11":10,"i12":10,"i13":9,"i14":10}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class SparkEnv + + + +Object + + +org.apache.spark.SparkEnv + + + + + + + +All Implemented Interfaces: +Logging + + + +public class SparkEnv +extends Object +implements Logging +:: DeveloperApi :: + Holds all the runtime environment objects for a running Spark instance (either master or worker), + including the serializer, RpcEnv, block manager, map output tracker, etc. Currently + Spark code finds the SparkEnv through a global variable, so all the threads can access the same + SparkEnv. It can be accessed by SparkEnv.get (e.g. after creating a SparkContext). + + NOTE: This is not intended for external use. This is exposed for Shark and may be made private + in a future release. + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +SparkEnv(String executorId, +org.apache.spark.rpc.RpcEnv rpcEnv, +Serializer serializer, +Serializer closureSerializer, +org.apache.spark.serializer.SerializerManager serializerManager, +org.apache.spark.MapOutputTracker mapOutputTracker, +org.apache.spark.shuffle.ShuffleManager shuffleManager, +org.apache.spark.broadcast.BroadcastManager broadcastManager, +org.apache.spark.storage.BlockManager blockManager, +org.apache.spark.SecurityManager securityManager, +org.apache.spark.metrics.MetricsSystem metricsSystem, +org.apache.spark.memory.MemoryManager memoryManager, + org.apache.spark.scheduler.OutputCommitCoordinator outputCommitCoordinator, +SparkConf conf) + + + + + + + + + +Method Summary + +All Methods Static Methods Instance Methods Concrete Methods + +Modifier and Type +Method and Description + + +org.apache.spark.storage.BlockManager +blockManager() + + +org.apache.spark.broadcast.BroadcastManager +broadcastManager() + + +Serializer +closureSerializer() + + +SparkConf +conf() + + +String +executorId() + + +static SparkEnv +get() +Returns the SparkEnv. + + + +org.apache.spark.MapOutputTracker +mapOutputTracker() + + +org.apache.spark.memory.MemoryManager +memoryManager() + + +org.apache.spark.metrics.MetricsSystem +metricsSystem() + + +org.apache.spark.scheduler.OutputCommitCoordinator +outputCommitCoordinator() + + +org.apache.spark.SecurityManager +securityManager() + + +Serializer +serializer() + + +org.apache.spark.serializer.SerializerManager +serializerManager() + + +static void +set(SparkEnv e) + + +org.apache.spark.shuffle.ShuffleManager +shuffleManager() + + + + + + +Methods inherited from class Object +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interface org.apache.spark.internal.Logging +initializeLogging, initializeLogIfNecessary, initializeLogIfNecessary, isTraceEnabled, log_, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning + + + + + + + + + + + + + + +Constructor Detail + + + + + +SparkEnv +public SparkEnv(String executorId, +org.apache.spark.rpc.RpcEnv rpcEnv, +Serializer serializer, +Serializer closureSerializer, + org.apache.spark.serializer.SerializerManager serializerManager, +org.a
[33/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/constant-values.html -- diff --git a/site/docs/2.3.2/api/java/constant-values.html b/site/docs/2.3.2/api/java/constant-values.html new file mode 100644 index 000..0882fdc --- /dev/null +++ b/site/docs/2.3.2/api/java/constant-values.html @@ -0,0 +1,263 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +Constant Field Values (Spark 2.3.2 JavaDoc) + + + + + + + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev +Next + + +Frames +No Frames + + +All Classes + + + + + + + + + + +Constant Field Values +Contents + +org.apache.* + + + + + +org.apache.* + + + +org.apache.spark.launcher.SparkLauncher + +Modifier and Type +Constant Field +Value + + + + + +public static final String +CHILD_CONNECTION_TIMEOUT +"spark.launcher.childConectionTimeout" + + + + +public static final String +CHILD_PROCESS_LOGGER_NAME +"spark.launcher.childProcLoggerName" + + + + +public static final String +DEPLOY_MODE +"spark.submit.deployMode" + + + + +public static final String +DRIVER_EXTRA_CLASSPATH +"spark.driver.extraClassPath" + + + + +public static final String +DRIVER_EXTRA_JAVA_OPTIONS +"spark.driver.extraJavaOptions" + + + + +public static final String +DRIVER_EXTRA_LIBRARY_PATH +"spark.driver.extraLibraryPath" + + + + +public static final String +DRIVER_MEMORY +"spark.driver.memory" + + + + +public static final String +EXECUTOR_CORES +"spark.executor.cores" + + + + +public static final String +EXECUTOR_EXTRA_CLASSPATH +"spark.executor.extraClassPath" + + + + +public static final String +EXECUTOR_EXTRA_JAVA_OPTIONS +"spark.executor.extraJavaOptions" + + + + +public static final String +EXECUTOR_EXTRA_LIBRARY_PATH +"spark.executor.extraLibraryPath" + + + + +public static final String +EXECUTOR_MEMORY +"spark.executor.memory" + + + + +public static final String +NO_RESOURCE +"spark-internal" + + + + +public static final String +SPARK_MASTER +"spark.master" + + + + + + + + +org.apache.spark.util.kvstore.KVIndex + +Modifier and Type +Constant Field +Value + + + + + +public static final String +NATURAL_INDEX_NAME +"__main__" + + + + + + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev +Next + + +Frames +No Frames + + +All Classes + + + + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/deprecated-list.html -- diff --git a/site/docs/2.3.2/api/java/deprecated-list.html b/site/docs/2.3.2/api/java/deprecated-list.html new file mode 100644 index 000..9557476 --- /dev/null +++ b/site/docs/2.3.2/api/java/deprecated-list.html @@ -0,0 +1,827 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +Deprecated List (Spark 2.3.2 JavaDoc) + + + + + + + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev +Next + + +Frames +No Frames + + +All Classes + + + + + + + + + + +Deprecated API +Contents + +Deprecated Interfaces +Deprecated Classes +Deprecated Methods +Deprecated Constructors + + + + + + + + +Deprecated Interfaces + +Interface and Description + + + +org.apache.spark.AccumulableParam +use AccumulatorV2. Since 2.0.0. + + + +org.apache.spark.AccumulatorParam +use AccumulatorV2. Since 2.0.0. + + + + + + + + + + + + +Deprecated Classes + +Class and Description + + + +org.apache.spark.Accumulable +use AccumulatorV2. Since 2.0.0. + + + +org.apache.spark.Accumulator +use AccumulatorV2. Since 2.0.0. + + + +org.apache.spark.AccumulatorParam.DoubleAccumulatorParam$ +use AccumulatorV2. Since 2.0.0. + + + +org.apache.spark.AccumulatorParam.FloatAccumulatorP
[08/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaRDDLike.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaRDDLike.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaRDDLike.html new file mode 100644 index 000..415bc32 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaRDDLike.html @@ -0,0 +1,2086 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +JavaRDDLike (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":6,"i1":6,"i2":6,"i3":6,"i4":6,"i5":6,"i6":6,"i7":6,"i8":6,"i9":6,"i10":6,"i11":6,"i12":6,"i13":6,"i14":6,"i15":6,"i16":6,"i17":6,"i18":6,"i19":6,"i20":6,"i21":6,"i22":6,"i23":6,"i24":6,"i25":6,"i26":6,"i27":6,"i28":6,"i29":6,"i30":6,"i31":6,"i32":6,"i33":6,"i34":6,"i35":6,"i36":6,"i37":6,"i38":6,"i39":6,"i40":6,"i41":6,"i42":6,"i43":6,"i44":6,"i45":6,"i46":6,"i47":6,"i48":6,"i49":6,"i50":6,"i51":6,"i52":6,"i53":6,"i54":6,"i55":6,"i56":6,"i57":6,"i58":6,"i59":6,"i60":6,"i61":6,"i62":6,"i63":6,"i64":6,"i65":6,"i66":6,"i67":6,"i68":6,"i69":6,"i70":6,"i71":6,"i72":6,"i73":6,"i74":6,"i75":6,"i76":6,"i77":6,"i78":6,"i79":6}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.java +Interface JavaRDDLike> + + + + + + +All Superinterfaces: +java.io.Serializable + + +All Known Implementing Classes: +JavaDoubleRDD, JavaHadoopRDD, JavaNewHadoopRDD, JavaPairRDD, JavaRDD + + + +public interface JavaRDDLike > +extends scala.Serializable +Defines operations common to several Java RDD implementations. + + +Note: +This trait is not intended to be implemented by user code. + + + + + + + + + + + + +Method Summary + +All Methods Instance Methods Abstract Methods + +Modifier and Type +Method and Description + + + U +aggregate(U zeroValue, + Function2 seqOp, + Function2 combOp) +Aggregate the elements of each partition, and then the results for all the partitions, using + given combine functions and a neutral "zero value". + + + + JavaPairRDD +cartesian(JavaRDDLike other) +Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of + elements (a, b) where a is in this and b is in other. + + + +void +checkpoint() +Mark this RDD for checkpointing. + + + +scala.reflect.ClassTag +classTag() + + +java.util.List +collect() +Return an array that contains all of the elements in this RDD. + + + +JavaFutureAction > +collectAsync() +The asynchronous version of collect, which returns a future for + retrieving an array containing all of the elements in this RDD. + + + +java.util.List [] +collectPartitions(int[] partitionIds) +Return an array that contains all of the elements in a specific partition of this RDD. + + + +SparkContext +context() +The SparkContext that this RDD was created on. + + + +long +count() +Return the number of elements in the RDD. + + + +PartialResult +countApprox(long timeout) +Approximate version of count() that returns a potentially incomplete result + within a timeout, even if not all tasks have finished. + + + +PartialResult +countApprox(long timeout, + double confidence) +Approximate version of count() that returns a potentially incomplete result + within a timeout, even if not all tasks have finished. + + + +long +countApproxDistinct(double relativeSD) +Return approximate number of distinct elements in the RDD. + + + +JavaFutureAction +countAsync() +The asynchronous version of count, which returns a + future for counting the number of elements in this RDD. + + + +java.util.Map +countByValue() +Return the count of each unique value in this RDD as a map of (value, count) pairs. + + + +PartialResult > +countByValueApprox(long timeout) +Approximate version of countByValue(). + + + +PartialResult
[15/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/TaskCommitDenied.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/TaskCommitDenied.html b/site/docs/2.3.2/api/java/org/apache/spark/TaskCommitDenied.html new file mode 100644 index 000..c1fc564 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/TaskCommitDenied.html @@ -0,0 +1,448 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +TaskCommitDenied (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":10,"i1":5,"i2":10,"i3":5,"i4":10,"i5":10,"i6":5,"i7":5,"i8":9,"i9":9,"i10":10}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class TaskCommitDenied + + + +Object + + +org.apache.spark.TaskCommitDenied + + + + + + + +All Implemented Interfaces: +java.io.Serializable, TaskEndReason, TaskFailedReason, scala.Equals, scala.Product + + + +public class TaskCommitDenied +extends Object +implements TaskFailedReason, scala.Product, scala.Serializable +:: DeveloperApi :: + Task requested the driver to commit, but was denied. + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +TaskCommitDenied(int jobID, +int partitionID, +int attemptNumber) + + + + + + + + + +Method Summary + +All Methods Static Methods Instance Methods Abstract Methods Concrete Methods + +Modifier and Type +Method and Description + + +int +attemptNumber() + + +abstract static boolean +canEqual(Object that) + + +boolean +countTowardsTaskFailures() +If a task failed because its attempt to commit was denied, do not count this failure + towards failing the stage. + + + +abstract static boolean +equals(Object that) + + +int +jobID() + + +int +partitionID() + + +abstract static int +productArity() + + +abstract static Object +productElement(int n) + + +static scala.collection.Iterator+productIterator() + + +static String +productPrefix() + + +String +toErrorString() +Error message displayed in the web UI. + + + + + + + +Methods inherited from class Object +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interface scala.Product +productArity, productElement, productIterator, productPrefix + + + + + +Methods inherited from interface scala.Equals +canEqual, equals + + + + + + + + + + + + + + +Constructor Detail + + + + + +TaskCommitDenied +public TaskCommitDenied(int jobID, +int partitionID, +int attemptNumber) + + + + + + + + + +Method Detail + + + + + +canEqual +public abstract static boolean canEqual(Object that) + + + + + + + +equals +public abstract static boolean equals(Object that) + + + + + + + +productElement +public abstract static Object productElement(int n) + + + + + + + +productArity +public abstract static int productArity() + + + + + + + +productIterator +public static scala.collection.Iterator productIterator() + + + + + + + +productPrefix +public static String productPrefix() + + + + + + + +jobID +public int jobID() + + + + + + + +partitionID +public int partitionID() + + + + + + + +attemptNumber +public int attemptNumber() + + + + + + + +toErrorString +public String toErrorString() +Description copied from interface: TaskFailedReason +Error message displayed in the web UI. + +Specified by: +toErrorString in interface TaskFailedReason + + + + + + + + +countTowardsTaskFailures +public boolean countTowardsTaskFailures() +If a task failed because its attempt to commit was denied, do not count this failure + towards failing the stage. This is intended to prevent spurious stage failures in cases + where many speculative tasks are launched and denied to commit. + +Specified by: +countTowardsTaskFailures in interface TaskFailedReason +Returns: +(undocumented) + + + + + + + + + + + + + + +Skip navigation
[37/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/summary.html -- diff --git a/site/docs/2.3.2/api/R/summary.html b/site/docs/2.3.2/api/R/summary.html new file mode 100644 index 000..0cd3241 --- /dev/null +++ b/site/docs/2.3.2/api/R/summary.html @@ -0,0 +1,150 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: summary + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +summary {SparkR}R Documentation + +summary + +Description + +Computes specified statistics for numeric and string columns. Available statistics are: + + + + count + + + mean + + + stddev + + + min + + + max + + + arbitrary approximate percentiles specified as a percentage (eg, "75%") + + + +If no statistics are given, this function computes count, mean, stddev, min, +approximate quartiles (percentiles at 25%, 50%, and 75%), and max. +This function is meant for exploratory data analysis, as we make no guarantee about the +backward compatibility of the schema of the resulting Dataset. If you want to +programmatically compute summary statistics, use the agg function instead. + + + +Usage + + +summary(object, ...) + +## S4 method for signature 'SparkDataFrame' +summary(object, ...) + + + +Arguments + + +object + +a SparkDataFrame to be summarized. + +... + +(optional) statistics to be computed for all columns. + + + + +Value + +A SparkDataFrame. + + + +Note + +summary(SparkDataFrame) since 1.5.0 + +The statistics provided by summary were change in 2.3.0 use describe for +previous defaults. + + + +See Also + +describe + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, alias, +arrange, as.data.frame, +attach,SparkDataFrame-method, +broadcast, cache, +checkpoint, coalesce, +collect, colnames, +coltypes, +createOrReplaceTempView, +crossJoin, cube, +dapplyCollect, dapply, +describe, dim, +distinct, dropDuplicates, +dropna, drop, +dtypes, except, +explain, filter, +first, gapplyCollect, +gapply, getNumPartitions, +group_by, head, +hint, histogram, +insertInto, intersect, +isLocal, isStreaming, +join, limit, +localCheckpoint, merge, +mutate, ncol, +nrow, persist, +printSchema, randomSplit, +rbind, registerTempTable, +rename, repartition, +rollup, sample, +saveAsTable, schema, +selectExpr, select, +showDF, show, +storageLevel, str, +subset, take, +toJSON, unionByName, +union, unpersist, +withColumn, withWatermark, +with, write.df, +write.jdbc, write.json, +write.orc, write.parquet, +write.stream, write.text + + + +Examples + +## Not run: +##D sparkR.session() +##D path <- "path/to/file.json" +##D df <- read.json(path) +##D summary(df) +##D summary(df, "min", "25%", "75%", "max") +##D summary(select(df, "age", "height")) +## End(Not run) + + + +[Package SparkR version 2.3.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/tableNames.html -- diff --git a/site/docs/2.3.2/api/R/tableNames.html b/site/docs/2.3.2/api/R/tableNames.html new file mode 100644 index 000..bc07dad --- /dev/null +++ b/site/docs/2.3.2/api/R/tableNames.html @@ -0,0 +1,61 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Table Names + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +tableNames {SparkR}R Documentation + +Table Names + +Description + +Returns the names of tables in the given database as an array. + + + +Usage + + +## Default S3 method: +tableNames(databaseName = NULL) + + + +Arguments + + +databaseName + +(optional) name of the database + + + + +Value + +a list of table names + + + +Note + +tableNames since 1.4.0 + + + +Examples + +## Not run: +##D sparkR.session() +##D tableNames("hive") +## End(Not run) + + + +[Package SparkR version 2.3.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/tableToDF.html -- diff --git a/site/docs/2.3.2/api/R/tableToDF.html b/site/docs/2.3.2/api/R/tableToDF.html new file mode 100644 index 000..803a0d0 --- /dev/null +++ b/site/docs/2.3.2/api/R/tableToDF.html @@ -0,0 +1,67 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Create a SparkDataFrame from a SparkSQL table or view + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudfla
[12/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaFutureAction.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaFutureAction.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaFutureAction.html new file mode 100644 index 000..a43e61c --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaFutureAction.html @@ -0,0 +1,237 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +JavaFutureAction (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":6}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.java +Interface JavaFutureAction+ + + + + + +All Superinterfaces: +java.util.concurrent.Future + + + +public interface JavaFutureAction +extends java.util.concurrent.Future + + + + + + + + + + + +Method Summary + +All Methods Instance Methods Abstract Methods + +Modifier and Type +Method and Description + + +java.util.List +jobIds() +Returns the job IDs run by the underlying async operation. + + + + + + + +Methods inherited from interface java.util.concurrent.Future +cancel, get, get, isCancelled, isDone + + + + + + + + + + + + + + +Method Detail + + + + + +jobIds +java.util.List jobIds() +Returns the job IDs run by the underlying async operation. + + This returns the current snapshot of the job list. Certain operations may run multiple + jobs, so multiple calls to this method may return different lists. + + + + + + + + + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaHadoopRDD.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaHadoopRDD.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaHadoopRDD.html new file mode 100644 index 000..4d2f3bd --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaHadoopRDD.html @@ -0,0 +1,339 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +JavaHadoopRDD (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":10,"i1":10,"i2":10}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.java +Class JavaHadoopRDD + + + +Object + + +org.apache.spark.api.java.JavaPairRDD + + +org.apache.spark.api.java.JavaHadoopRDD + + + + + + + + + +All Implemented Interfaces: +java.io.Serializable, JavaRDDLike ,JavaPairRDD > + + + +public class JavaHadoopRDD +exte
[48/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/00frame_toc.html -- diff --git a/site/docs/2.3.2/api/R/00frame_toc.html b/site/docs/2.3.2/api/R/00frame_toc.html new file mode 100644 index 000..f627d53 --- /dev/null +++ b/site/docs/2.3.2/api/R/00frame_toc.html @@ -0,0 +1,323 @@ + + + + + +R Documentation of SparkR + + +window.onload = function() { + var imgs = document.getElementsByTagName('img'), i, img; + for (i = 0; i < imgs.length; i++) { +img = imgs[i]; +// center an image if it is the only element of its parent +if (img.parentElement.childElementCount === 1) + img.parentElement.style.textAlign = 'center'; + } +}; + + + + + + + +* { + font-family: "Trebuchet MS", "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", Arial, sans-serif; + font-size: 14px; +} +body { + padding: 0 5px; + margin: 0 auto; + width: 80%; + max-width: 60em; /* 960px */ +} + +h1, h2, h3, h4, h5, h6 { + color: #666; +} +h1, h2 { + text-align: center; +} +h1 { + font-size: x-large; +} +h2, h3 { + font-size: large; +} +h4, h6 { + font-style: italic; +} +h3 { + border-left: solid 5px #ddd; + padding-left: 5px; + font-variant: small-caps; +} + +p img { + display: block; + margin: auto; +} + +span, code, pre { + font-family: Monaco, "Lucida Console", "Courier New", Courier, monospace; +} +span.acronym {} +span.env { + font-style: italic; +} +span.file {} +span.option {} +span.pkg { + font-weight: bold; +} +span.samp{} + +dt, p code { + background-color: #F7F7F7; +} + + + + + + + + +SparkR + + +AFTSurvivalRegressionModel-class +ALSModel-class +BisectingKMeansModel-class +DecisionTreeClassificationModel-class +DecisionTreeRegressionModel-class +FPGrowthModel-class +GBTClassificationModel-class +GBTRegressionModel-class +GaussianMixtureModel-class +GeneralizedLinearRegressionModel-class +GroupedData +IsotonicRegressionModel-class +KMeansModel-class +KSTest-class +LDAModel-class +LinearSVCModel-class +LogisticRegressionModel-class +MultilayerPerceptronClassificationModel-class +NaiveBayesModel-class +RandomForestClassificationModel-class +RandomForestRegressionModel-class +SparkDataFrame +StreamingQuery +WindowSpec +alias +approxQuantile +arrange +as.data.frame +attach +avg +awaitTermination +between +broadcast +cache +cacheTable +cancelJobGroup +cast +checkpoint +clearCache +clearJobGroup +coalesce +collect +coltypes +column +columnaggregatefunctions +columncollectionfunctions +columndatetimediff_functions +columndatetimefunctions +columnmathfunctions +columnmiscfunctions +columnnonaggregatefunctions +columnstringfunctions +columnwindowfunctions +columnfunctions +columns +corr +count +cov +createDataFrame +createExternalTable-deprecated +createOrReplaceTempView +createTable +crossJoin +crosstab +cube +currentDatabase +dapply +dapplyCollect +describe +dim +distinct +drop +dropDuplicates +dropTempTable-deprecated +dropTempView +dtypes +endsWith +eqnullsafe +except +explain +filter +first +fitted +freqItems +gapply +gapplyCollect +getLocalProperty +getNumPartitions +glm +groupBy +hashCode +head +hint +histogram +insertInto +install.spark +intersect +isActive +isLocal +isStreaming +join +last +lastProgress +limit +listColumns +listDatabases +listFunctions +listTables +localCheckpoint +match +merge +mutate +nafunctions +ncol +not +nrow +orderBy +otherwise +over +partitionBy +persist +pivot +predict +print.jobj +print.structField +print.structType +printSchema +queryName +randomSplit +rangeBetween +rbind +read.df +read.jdbc +read.json +read.ml +read.orc +read.parquet +read.stream +read.text +recoverPartitions +refreshByPath +refreshTable +registerTempTable-deprecated +rename +repartition +rollup +rowsBetween +sample +sampleBy +saveAsTable +schema +select +selectExpr +setCheckpointDir +setCurrentDatabase +setJobDescription +setJobGroup +setLocalProperty +setLogLevel +show +showDF +spark.addFile +spark.als +spark.bisectingKmeans +spark.decisionTree +spark.fpGrowth +spark.gaussianMixture +spark.gbt +spark.getSparkFiles +spark.getSparkFilesRootDirectory +spark.glm +spark.isoreg +spark.kmeans +spark.kstest +spark.lapply +spark.lda +spark.logit +spark.mlp +spark.naiveBayes +spark.randomForest +spark.survreg +spark.svmLinear +sparkR.callJMethod +sparkR.callJStatic +sparkR.conf +sparkR.init-deprecated +sparkR.newJObject +sparkR.session +sparkR.session.stop +sparkR.uiWebUrl +sparkR.version +sparkRHive.init-deprecated +sparkRSQL.init-deprecated +sql +startsWith +status +stopQuery +storageLevel +str +structField +structType +subset +substr +summarize +summary +tableNames +tableToDF +tables +take +toJSON +uncacheTable +union +unionByName +unpersist +windowOrderBy +windowPartitionBy +with +withColumn +withWatermark +write.df +write.jdbc +write.json +write.ml +write.orc +write.parquet +write.stream +write.text + + +Generat
[28/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/AccumulatorParam.IntAccumulatorParam$.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/AccumulatorParam.IntAccumulatorParam$.html b/site/docs/2.3.2/api/java/org/apache/spark/AccumulatorParam.IntAccumulatorParam$.html new file mode 100644 index 000..f6890a4 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/AccumulatorParam.IntAccumulatorParam$.html @@ -0,0 +1,379 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +AccumulatorParam.IntAccumulatorParam$ (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":42,"i1":42}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"],32:["t6","Deprecated Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class AccumulatorParam.IntAccumulatorParam$ + + + +Object + + +org.apache.spark.AccumulatorParam.IntAccumulatorParam$ + + + + + + + +All Implemented Interfaces: +java.io.Serializable, AccumulableParam, AccumulatorParam + + +Enclosing interface: +AccumulatorParam + + +Deprecated. +use AccumulatorV2. Since 2.0.0. + + +public static class AccumulatorParam.IntAccumulatorParam$ +extends Object +implements AccumulatorParam + +See Also: +Serialized Form + + + + + + + + + + + + +Nested Class Summary + + + + +Nested classes/interfaces inherited from interface org.apache.spark.AccumulatorParam +AccumulatorParam.DoubleAccumulatorParam$, AccumulatorParam.FloatAccumulatorParam$, AccumulatorParam.IntAccumulatorParam$, AccumulatorParam.LongAccumulatorParam$, AccumulatorParam.StringAccumulatorParam$ + + + + + + + + +Field Summary + +Fields + +Modifier and Type +Field and Description + + +static AccumulatorParam.IntAccumulatorParam$ +MODULE$ +Deprecated. +Static reference to the singleton instance of this Scala object. + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +IntAccumulatorParam$() +Deprecated. + + + + + + + + + + +Method Summary + +All Methods Instance Methods Concrete Methods Deprecated Methods + +Modifier and Type +Method and Description + + +int +addInPlace(int t1, + int t2) +Deprecated. + + + +int +zero(int initialValue) +Deprecated. + + + + + + + +Methods inherited from class Object +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interface org.apache.spark.AccumulatorParam +addAccumulator + + + + + +Methods inherited from interface org.apache.spark.AccumulableParam +addInPlace, zero + + + + + + + + + + + + + + +Field Detail + + + + + +MODULE$ +public static final AccumulatorParam.IntAccumulatorParam$ MODULE$ +Deprecated. +Static reference to the singleton instance of this Scala object. + + + + + + + + + +Constructor Detail + + + + + +IntAccumulatorParam$ +public IntAccumulatorParam$() +Deprecated. + + + + + + + + + +Method Detail + + + + + +addInPlace +public int addInPlace(int t1, + int t2) +Deprecated. + + + + + + + +zero +public int zero(int initialValue) +Deprecated. + + + + + + + + + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/AccumulatorParam.LongAccumulatorParam$.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/AccumulatorParam.LongAccumulatorParam$.html b/site/docs/2.3.2/api/java/org/apache/spark/AccumulatorPa
[50/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/_layouts/global.html -- diff --git a/_layouts/global.html b/_layouts/global.html index 874138f..03a34db 100644 --- a/_layouts/global.html +++ b/_layouts/global.html @@ -121,7 +121,7 @@ Documentation - Latest Release (Spark 2.3.1) + Latest Release (Spark 2.3.2) Older Versions and Other Resources Frequently Asked Questions http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/documentation.md -- diff --git a/documentation.md b/documentation.md index 8a9b62d..197d4e4 100644 --- a/documentation.md +++ b/documentation.md @@ -12,6 +12,7 @@ navigation: Setup instructions, programming guides, and other documentation are available for each stable version of Spark below: + Spark 2.3.2 Spark 2.3.1 Spark 2.3.0 Spark 2.2.2 http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/js/downloads.js -- diff --git a/js/downloads.js b/js/downloads.js index d5ab599..6de20f0 100644 --- a/js/downloads.js +++ b/js/downloads.js @@ -29,6 +29,7 @@ var packagesV7 = [hadoop2p7, hadoop2p6, hadoop2p4, hadoop2p3, hadoopFree, source // 2.2.0+ var packagesV8 = [hadoop2p7, hadoop2p6, hadoopFree, sources]; +addRelease("2.3.2", new Date("09/24/2018"), packagesV8, true, true); addRelease("2.3.1", new Date("06/08/2018"), packagesV8, true, true); addRelease("2.3.0", new Date("02/28/2018"), packagesV8, true, false); addRelease("2.2.2", new Date("07/02/2018"), packagesV8, true, true); http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/releases/_posts/2018-09-25-spark-release-2-3-2.md -- diff --git a/releases/_posts/2018-09-25-spark-release-2-3-2.md b/releases/_posts/2018-09-25-spark-release-2-3-2.md new file mode 100644 index 000..fa1f1af --- /dev/null +++ b/releases/_posts/2018-09-25-spark-release-2-3-2.md @@ -0,0 +1,23 @@ +--- +layout: post +title: Spark Release 2.3.2 +categories: [] +tags: [] +status: publish +type: post +published: true +meta: + _edit_last: '4' + _wpas_done_all: '1' +--- + +Spark 2.3.2 is a maintenance release containing stability fixes. This release is based on the branch-2.3 maintenance branch of Spark. We strongly recommend all 2.3.x users to upgrade to this stable release. + +You can consult JIRA for the [detailed changes](https://s.apache.org/spark-2.3.2). + +### Know issues + + - **SQL** + - SPARK-25206: wrong records are returned when Hive metastore schema and parquet schema are in different letter cases + +We would like to acknowledge all community members for contributing patches to this release. http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/committers.html -- diff --git a/site/committers.html b/site/committers.html index 3ec90dd..003a303 100644 --- a/site/committers.html +++ b/site/committers.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 2.3.1) + Latest Release (Spark 2.3.2) Older Versions and Other Resources Frequently Asked Questions http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/community.html -- diff --git a/site/community.html b/site/community.html index da7e47e..349a8d4 100644 --- a/site/community.html +++ b/site/community.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 2.3.1) + Latest Release (Spark 2.3.2) Older Versions and Other Resources Frequently Asked Questions http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/contributing.html -- diff --git a/site/contributing.html b/site/contributing.html index 1dd4823..2dac454 100644 --- a/site/contributing.html +++ b/site/contributing.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 2.3.1) + Latest Release (Spark 2.3.2) Older Versions and Other Resources Frequently Asked Questions http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/developer-tools.html -- diff --git a/site/developer-tools.html b/site/developer-tools.html index e2b9217..54d848c 100644 --- a/site/developer-tools.html +++ b/site/developer-tools.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 2.3.1) +
[40/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/schema.html -- diff --git a/site/docs/2.3.2/api/R/schema.html b/site/docs/2.3.2/api/R/schema.html new file mode 100644 index 000..9cbcd1a --- /dev/null +++ b/site/docs/2.3.2/api/R/schema.html @@ -0,0 +1,106 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Get schema object + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +schema {SparkR}R Documentation + +Get schema object + +Description + +Returns the schema of this SparkDataFrame as a structType object. + + + +Usage + + +schema(x) + +## S4 method for signature 'SparkDataFrame' +schema(x) + + + +Arguments + + +x + +A SparkDataFrame + + + + +Note + +schema since 1.4.0 + + + +See Also + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, alias, +arrange, as.data.frame, +attach,SparkDataFrame-method, +broadcast, cache, +checkpoint, coalesce, +collect, colnames, +coltypes, +createOrReplaceTempView, +crossJoin, cube, +dapplyCollect, dapply, +describe, dim, +distinct, dropDuplicates, +dropna, drop, +dtypes, except, +explain, filter, +first, gapplyCollect, +gapply, getNumPartitions, +group_by, head, +hint, histogram, +insertInto, intersect, +isLocal, isStreaming, +join, limit, +localCheckpoint, merge, +mutate, ncol, +nrow, persist, +printSchema, randomSplit, +rbind, registerTempTable, +rename, repartition, +rollup, sample, +saveAsTable, selectExpr, +select, showDF, +show, storageLevel, +str, subset, +summary, take, +toJSON, unionByName, +union, unpersist, +withColumn, withWatermark, +with, write.df, +write.jdbc, write.json, +write.orc, write.parquet, +write.stream, write.text + + + +Examples + +## Not run: +##D sparkR.session() +##D path <- "path/to/file.json" +##D df <- read.json(path) +##D dfSchema <- schema(df) +## End(Not run) + + + +[Package SparkR version 2.3.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/select.html -- diff --git a/site/docs/2.3.2/api/R/select.html b/site/docs/2.3.2/api/R/select.html new file mode 100644 index 000..ffc36fc --- /dev/null +++ b/site/docs/2.3.2/api/R/select.html @@ -0,0 +1,157 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Select + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +select {SparkR}R Documentation + +Select + +Description + +Selects a set of columns with names or Column expressions. + + + +Usage + + +select(x, col, ...) + +## S4 method for signature 'SparkDataFrame' +x$name + +## S4 replacement method for signature 'SparkDataFrame' +x$name <- value + +## S4 method for signature 'SparkDataFrame,character' +select(x, col, ...) + +## S4 method for signature 'SparkDataFrame,Column' +select(x, col, ...) + +## S4 method for signature 'SparkDataFrame,list' +select(x, col) + + + +Arguments + + +x + +a SparkDataFrame. + +col + +a list of columns or single Column or name. + +... + +additional column(s) if only one column is specified in col. +If more than one column is assigned in col, ... +should be left empty. + +name + +name of a Column (without being wrapped by ""). + +value + +a Column or an atomic vector in the length of 1 as literal value, or NULL. +If NULL, the specified Column is dropped. + + + + +Value + +A new SparkDataFrame with selected columns. + + + +Note + +$ since 1.4.0 + +$<- since 1.4.0 + +select(SparkDataFrame, character) since 1.4.0 + +select(SparkDataFrame, Column) since 1.4.0 + +select(SparkDataFrame, list) since 1.4.0 + + + +See Also + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, alias, +arrange, as.data.frame, +attach,SparkDataFrame-method, +broadcast, cache, +checkpoint, coalesce, +collect, colnames, +coltypes, +createOrReplaceTempView, +crossJoin, cube, +dapplyCollect, dapply, +describe, dim, +distinct, dropDuplicates, +dropna, drop, +dtypes, except, +explain, filter, +first, gapplyCollect, +gapply, getNumPartitions, +group_by, head, +hint, histogram, +insertInto, intersect, +isLocal, isStreaming, +join, limit, +localCheckpoint, merge, +mutate, ncol, +nrow, persist, +printSchema, randomSplit, +rbind, registerTempTable, +rename, repartition, +rollup, sample, +saveAsTable, schema, +selectExpr, showDF, +show, storageLevel, +str, subset, +summary, take, +toJSON, unionByName, +union, unpersist, +withColumn, withWatermark, +with, write.df
[31/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/index.html -- diff --git a/site/docs/2.3.2/api/java/index.html b/site/docs/2.3.2/api/java/index.html new file mode 100644 index 000..b14a2d0 --- /dev/null +++ b/site/docs/2.3.2/api/java/index.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd";> + + + + +Spark 2.3.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/lib/api-javadocs.js -- diff --git a/site/docs/2.3.2/api/java/lib/api-javadocs.js b/site/docs/2.3.2/api/java/lib/api-javadocs.js new file mode 100644 index 000..ead13d6 --- /dev/null +++ b/site/docs/2.3.2/api/java/lib/api-javadocs.js @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Dynamically injected post-processing code for the API docs */ + +$(document).ready(function() { + addBadges(":: AlphaComponent ::", 'Alpha Component'); + addBadges(":: DeveloperApi ::", 'Developer API'); + addBadges(":: Experimental ::", 'Experimental'); +}); + +function addBadges(tag, html) { + var tags = $(".block:contains(" + tag + ")") + + // Remove identifier tags + tags.each(function(index) { +var oldHTML = $(this).html(); +var newHTML = oldHTML.replace(tag, ""); +$(this).html(newHTML); + }); + + // Add html badge tags + tags.each(function(index) { +if ($(this).parent().is('td.colLast')) { + $(this).parent().prepend(html); +} else if ($(this).parent('li.blockList') + .parent('ul.blockList') + .parent('div.description') + .parent().is('div.contentContainer')) { + var contentContainer = $(this).parent('li.blockList') +.parent('ul.blockList') +.parent('div.description') +.parent('div.contentContainer') + var header = contentContainer.prev('div.header'); + if (header.length > 0) { +hea
[11/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaNewHadoopRDD.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaNewHadoopRDD.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaNewHadoopRDD.html new file mode 100644 index 000..f0715cd --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaNewHadoopRDD.html @@ -0,0 +1,339 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +JavaNewHadoopRDD (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":10,"i1":10,"i2":10}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.java +Class JavaNewHadoopRDD+ + + +Object + + +org.apache.spark.api.java.JavaPairRDD + + +org.apache.spark.api.java.JavaNewHadoopRDD + + + + + + + + + +All Implemented Interfaces: +java.io.Serializable, JavaRDDLike ,JavaPairRDD > + + + +public class JavaNewHadoopRDD +extends JavaPairRDD + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +JavaNewHadoopRDD(NewHadoopRDD rdd, +scala.reflect.ClassTag kClassTag, +scala.reflect.ClassTag vClassTag) + + + + + + + + + +Method Summary + +All Methods Instance Methods Concrete Methods + +Modifier and Type +Method and Description + + +scala.reflect.ClassTag +kClassTag() + + + JavaRDD +mapPartitionsWithInputSplit(Function2 >,java.util.Iterator > f, + boolean preservesPartitioning) +Maps over a partition, providing the InputSplit that was used as the base of the partition. + + + +scala.reflect.ClassTag +vClassTag() + + + + + + +Methods inherited from class org.apache.spark.api.java.JavaPairRDD +aggregate, aggregateByKey, aggregateByKey, aggregateByKey, cache, cartesian, checkpoint, classTag, coalesce, coalesce, cogroup, cogroup, cogroup, co group, cogroup, cogroup, cogroup, cogroup, cogroup, collect, collectAsMap, collectAsync, collectPartitions, combineByKey, combineByKey, combineByKey, combineByKey, context, count, countApprox, countApprox, countApproxDistinct, countApproxDistinctByKey, countApproxDistinctByKey, countApproxDistinctByKey, countAsync, countByKey, countByKeyApprox, countByKeyApprox, countByValue, countByValueApprox, countByValueApprox, distinct, distinct, filter, first, flatMap, flatMapToDouble, flatMapToPair, flatMapValues, fold, foldByKey, foldByKey, foldByKey, foreach, foreachAsync, foreachPartition, foreachPartitionAsync, fromJavaRDD, fromRDD, fullOuterJoin, fullOuterJoin, fullOuterJoin, getCheckpointFile, getNumPartitions, getStorageLevel, glom, groupBy, groupBy, groupByKey, groupByKey, groupByKey, groupWith, groupWith, groupWith, id, intersection, isCheckpointed< /a>, isEmpty, iterator, join, join, join, keyBy, keys, leftOuterJoin, leftOuterJoin, leftOuterJoin, lookup, map, mapPartitions, mapPartitions, mapPa rtitionsToDouble, mapPartitionsToDouble, mapPartitionsToPair, mapPartitionsToPair, mapPartitionsWithIndex, mapPartitionsWithIndex$default$2, mapToDouble, href="../../../../../org/apache/spark/api/java/JavaPairRDD.html#mapToPair-org.apache.spark.api.java.function.PairFunction-">mapToPair, > href="../../../../../org/apache/spark/api/java/JavaPairRDD.html#mapValues-org.apache.spark.api.java.function.Function-">mapValues, > href="../../../../../org/apache/spark/api/java/JavaPairRDD.html#max-java.util.Comparator-">max, > href="../../../../../org/apache/spark/api/java/JavaPairRDD.html#min-java.util.Comparator-">min, > href="../../../../../org/apache/spark/api/java/JavaPairRDD.html#name--">name, > href="../../../../../org/apache/spark/api/java/JavaPairRDD.html#partitionBy-org.apache.spark.Partitioner-">partitionBy, > href="../../
[07/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaSparkContext.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaSparkContext.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaSparkContext.html new file mode 100644 index 000..7d037c3 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaSparkContext.html @@ -0,0 +1,2389 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +JavaSparkContext (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":42,"i1":42,"i2":42,"i3":42,"i4":42,"i5":42,"i6":42,"i7":42,"i8":10,"i9":10,"i10":10,"i11":10,"i12":10,"i13":10,"i14":10,"i15":10,"i16":10,"i17":10,"i18":10,"i19":10,"i20":10,"i21":10,"i22":10,"i23":42,"i24":42,"i25":10,"i26":9,"i27":10,"i28":10,"i29":10,"i30":10,"i31":10,"i32":10,"i33":10,"i34":10,"i35":10,"i36":10,"i37":42,"i38":42,"i39":10,"i40":9,"i41":9,"i42":10,"i43":10,"i44":10,"i45":10,"i46":10,"i47":10,"i48":10,"i49":10,"i50":10,"i51":10,"i52":10,"i53":10,"i54":10,"i55":10,"i56":10,"i57":10,"i58":10,"i59":10,"i60":10,"i61":10,"i62":10,"i63":10,"i64":10,"i65":10,"i66":10,"i67":10,"i68":10,"i69":10,"i70":9,"i71":10,"i72":10,"i73":10,"i74":10,"i75":10,"i76":10,"i77":10,"i78":10,"i79":10}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"],32:["t6","Deprecated Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.java +Class JavaSparkContext + + + +Object + + +org.apache.spark.api.java.JavaSparkContext + + + + + + + +All Implemented Interfaces: +java.io.Closeable, AutoCloseable + + + +public class JavaSparkContext +extends Object +implements java.io.Closeable +A Java-friendly version of SparkContext that returns + JavaRDDs and works with Java collections instead of Scala ones. + + Only one SparkContext may be active per JVM. You must stop() the active SparkContext before + creating a new one. This limitation may eventually be removed; see SPARK-2243 for more details. + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +JavaSparkContext() +Create a JavaSparkContext that loads settings from system properties (for instance, when + launching with ./bin/spark-submit). + + + +JavaSparkContext(SparkConf conf) + + +JavaSparkContext(SparkContext sc) + + +JavaSparkContext(String master, +String appName) + + +JavaSparkContext(String master, +String appName, +SparkConf conf) + + +JavaSparkContext(String master, +String appName, +String sparkHome, +String jarFile) + + +JavaSparkContext(String master, +String appName, +String sparkHome, +String[] jars) + + +JavaSparkContext(String master, +String appName, +String sparkHome, +String[] jars, + java.util.Mapenvironment) + + + + + + + + + +Method Summary + +All Methods Static Methods Instance Methods Concrete Methods Deprecated Methods + +Modifier and Type +Method and Description + + + Accumulable +accumulable(T initialValue, + AccumulableParam param) +Deprecated. +use AccumulatorV2. Since 2.0.0. + + + + + Accumulable +accumulable(T initialValue, + String name, + AccumulableParam param) +Deprecated. +use AccumulatorV2. Since 2.0.0. + + + + +Accumulator +accumulator(double initialValue) +Deprecated. +use sc().doubleAccumulator(). Since 2.0.0. + + + + +Accumulator +accumulator(double initialValue, + String name) +Deprecated. +use sc().doubleAccumulator(String). Since 2.0.0. + + + + +Accumulator +accumulator(int initialValue) +Deprecated. +use sc().longAccumulator(). Since 2.0.0. + + + + +Accumulator +accumulator(int initialValue, + String name) +Deprecated. +use sc().longAccumulator(String). Since 2.0.0. + + + + +<
[09/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaRDD.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaRDD.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaRDD.html new file mode 100644 index 000..6901011 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaRDD.html @@ -0,0 +1,1957 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +JavaRDD (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":9,"i1":10,"i2":9,"i3":9,"i4":10,"i5":10,"i6":10,"i7":9,"i8":9,"i9":9,"i10":9,"i11":9,"i12":9,"i13":9,"i14":9,"i15":9,"i16":9,"i17":9,"i18":9,"i19":10,"i20":10,"i21":10,"i22":9,"i23":9,"i24":9,"i25":9,"i26":9,"i27":9,"i28":9,"i29":9,"i30":9,"i31":9,"i32":9,"i33":9,"i34":9,"i35":9,"i36":9,"i37":9,"i38":9,"i39":10,"i40":9,"i41":9,"i42":9,"i43":9,"i44":9,"i45":9,"i46":9,"i47":9,"i48":9,"i49":9,"i50":9,"i51":9,"i52":9,"i53":9,"i54":9,"i55":9,"i56":9,"i57":9,"i58":9,"i59":9,"i60":10,"i61":9,"i62":9,"i63":9,"i64":9,"i65":9,"i66":10,"i67":10,"i68":10,"i69":9,"i70":10,"i71":10,"i72":10,"i73":9,"i74":9,"i75":9,"i76":10,"i77":10,"i78":10,"i79":10,"i80":10,"i81":9,"i82":9,"i83":9,"i84":9,"i85":9,"i86":9,"i87":9,"i88":9,"i89":9,"i90":9,"i91":9,"i92":10,"i93":9,"i94":9,"i95":9,"i96":9,"i97":10,"i98":10,"i99":10,"i100":10,"i101":9,"i102":9,"i103":9,"i104":9}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.java +Class JavaRDD+ + + +Object + + +org.apache.spark.api.java.JavaRDD + + + + + + + +All Implemented Interfaces: +java.io.Serializable, JavaRDDLike > + + + +public class JavaRDD +extends Object + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +JavaRDD(RDD rdd, + scala.reflect.ClassTag classTag) + + + + + + + + + +Method Summary + +All Methods Static Methods Instance Methods Concrete Methods + +Modifier and Type +Method and Description + + +static U +aggregate(U zeroValue, + Function2 seqOp, + Function2 combOp) + + +JavaRDD +cache() +Persist this RDD with the default storage level (MEMORY_ONLY). + + + +static JavaPairRDD +cartesian(JavaRDDLike other) + + +static void +checkpoint() + + +scala.reflect.ClassTag +classTag() + + +JavaRDD +coalesce(int numPartitions) +Return a new RDD that is reduced into numPartitions partitions. + + + +JavaRDD +coalesce(int numPartitions, +boolean shuffle) +Return a new RDD that is reduced into numPartitions partitions. + + + +static java.util.List +collect() + + +static JavaFutureAction > +collectAsync() + + +static java.util.List [] +collectPartitions(int[] partitionIds) + + +static SparkContext +context() + + +static long +count() + + +static PartialResult +countApprox(long timeout) + + +static PartialResult +countApprox(long timeout, + double confidence) + + +static long +countApproxDistinct(double relativeSD) + + +static JavaFutureAction +countAsync() + + +static java.util.Map +countByValue() + + +static PartialResult > +countByValueApprox(long timeout) + + +static PartialResult > +countByValueApprox(long timeout, + double confidence) + + +JavaRDD +distinct() +Return a new RDD containing the distinct elements in this RDD. + + + +JavaRDD +distinct(int numPartitions) +Return a new RDD containing the distinct elements in this RDD. + + + +JavaRDD +filter(Function f) +Return a new RDD containing only the elements that satisfy a predicate. + + + +static T +first() + + +static JavaRDD +flatMap(FlatMapFunction f) + + +static JavaDoubleRDD +flatMapToDouble(DoubleFlatMapFunction f) + + +static JavaPairRDD +flatMapToPair(PairFlatMapFunction
[47/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/attach.html -- diff --git a/site/docs/2.3.2/api/R/attach.html b/site/docs/2.3.2/api/R/attach.html new file mode 100644 index 000..3d0058b --- /dev/null +++ b/site/docs/2.3.2/api/R/attach.html @@ -0,0 +1,122 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Attach SparkDataFrame to R search path + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +attach,SparkDataFrame-method {SparkR}R Documentation + +Attach SparkDataFrame to R search path + +Description + +The specified SparkDataFrame is attached to the R search path. This means that +the SparkDataFrame is searched by R when evaluating a variable, so columns in +the SparkDataFrame can be accessed by simply giving their names. + + + +Usage + + +## S4 method for signature 'SparkDataFrame' +attach(what, pos = 2L, + name = deparse(substitute(what), backtick = FALSE), + warn.conflicts = TRUE) + + + +Arguments + + +what + +(SparkDataFrame) The SparkDataFrame to attach + +pos + +(integer) Specify position in search() where to attach. + +name + +(character) Name to use for the attached SparkDataFrame. Names +starting with package: are reserved for library. + +warn.conflicts + +(logical) If TRUE, warnings are printed about conflicts +from attaching the database, unless that SparkDataFrame contains an object + + + + +Note + +attach since 1.6.0 + + + +See Also + +detach + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, alias, +arrange, as.data.frame, +broadcast, cache, +checkpoint, coalesce, +collect, colnames, +coltypes, +createOrReplaceTempView, +crossJoin, cube, +dapplyCollect, dapply, +describe, dim, +distinct, dropDuplicates, +dropna, drop, +dtypes, except, +explain, filter, +first, gapplyCollect, +gapply, getNumPartitions, +group_by, head, +hint, histogram, +insertInto, intersect, +isLocal, isStreaming, +join, limit, +localCheckpoint, merge, +mutate, ncol, +nrow, persist, +printSchema, randomSplit, +rbind, registerTempTable, +rename, repartition, +rollup, sample, +saveAsTable, schema, +selectExpr, select, +showDF, show, +storageLevel, str, +subset, summary, +take, toJSON, +unionByName, union, +unpersist, withColumn, +withWatermark, with, +write.df, write.jdbc, +write.json, write.orc, +write.parquet, write.stream, +write.text + + + +Examples + +## Not run: +##D attach(irisDf) +##D summary(Sepal_Width) +## End(Not run) + + + +[Package SparkR version 2.3.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/avg.html -- diff --git a/site/docs/2.3.2/api/R/avg.html b/site/docs/2.3.2/api/R/avg.html new file mode 100644 index 000..1306740 --- /dev/null +++ b/site/docs/2.3.2/api/R/avg.html @@ -0,0 +1,67 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: avg + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +avg {SparkR}R Documentation + +avg + +Description + +Aggregate function: returns the average of the values in a group. + + + +Usage + + +avg(x, ...) + +## S4 method for signature 'Column' +avg(x) + + + +Arguments + + +x + +Column to compute on or a GroupedData object. + +... + +additional argument(s) when x is a GroupedData object. + + + + +Note + +avg since 1.4.0 + + + +See Also + +Other aggregate functions: column_aggregate_functions, +corr, count, +cov, first, +last + + + +Examples + +## Not run: avg(df$c) + + + +[Package SparkR version 2.3.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/awaitTermination.html -- diff --git a/site/docs/2.3.2/api/R/awaitTermination.html b/site/docs/2.3.2/api/R/awaitTermination.html new file mode 100644 index 000..b8a65a2 --- /dev/null +++ b/site/docs/2.3.2/api/R/awaitTermination.html @@ -0,0 +1,84 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: awaitTermination + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +awaitTermination {SparkR}R Documentation + +awaitTermination + +Description + +Waits
[42/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/match.html -- diff --git a/site/docs/2.3.2/api/R/match.html b/site/docs/2.3.2/api/R/match.html new file mode 100644 index 000..d405b90 --- /dev/null +++ b/site/docs/2.3.2/api/R/match.html @@ -0,0 +1,65 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Match a column with given values. + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +%in% {SparkR}R Documentation + +Match a column with given values. + +Description + +Match a column with given values. + + + +Usage + + +## S4 method for signature 'Column' +x %in% table + + + +Arguments + + +x + +a Column. + +table + +a collection of values (coercible to list) to compare with. + + + + +Value + +A matched values as a result of comparing with given values. + + + +Note + +%in% since 1.5.0 + + + +Examples + +## Not run: +##D filter(df, "age in (10, 30)") +##D where(df, df$age %in% c(10, 30)) +## End(Not run) + + + +[Package SparkR version 2.3.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/merge.html -- diff --git a/site/docs/2.3.2/api/R/merge.html b/site/docs/2.3.2/api/R/merge.html new file mode 100644 index 000..3eb2a86 --- /dev/null +++ b/site/docs/2.3.2/api/R/merge.html @@ -0,0 +1,177 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Merges two data frames + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +merge {SparkR}R Documentation + +Merges two data frames + +Description + +Merges two data frames + + + +Usage + + +merge(x, y, ...) + +## S4 method for signature 'SparkDataFrame,SparkDataFrame' +merge(x, y, + by = intersect(names(x), names(y)), by.x = by, by.y = by, + all = FALSE, all.x = all, all.y = all, sort = TRUE, + suffixes = c("_x", "_y"), ...) + + + +Arguments + + +x + +the first data frame to be joined. + +y + +the second data frame to be joined. + +... + +additional argument(s) passed to the method. + +by + +a character vector specifying the join columns. If by is not +specified, the common column names in x and y will be used. +If by or both by.x and by.y are explicitly set to NULL or of length 0, the Cartesian +Product of x and y will be returned. + +by.x + +a character vector specifying the joining columns for x. + +by.y + +a character vector specifying the joining columns for y. + +all + +a boolean value setting all.x and all.y +if any of them are unset. + +all.x + +a boolean value indicating whether all the rows in x should +be including in the join. + +all.y + +a boolean value indicating whether all the rows in y should +be including in the join. + +sort + +a logical argument indicating whether the resulting columns should be sorted. + +suffixes + +a string vector of length 2 used to make colnames of +x and y unique. +The first element is appended to each colname of x. +The second element is appended to each colname of y. + + + + +Details + +If all.x and all.y are set to FALSE, a natural join will be returned. If +all.x is set to TRUE and all.y is set to FALSE, a left outer join will +be returned. If all.x is set to FALSE and all.y is set to TRUE, a right +outer join will be returned. If all.x and all.y are set to TRUE, a full +outer join will be returned. + + + +Note + +merge since 1.5.0 + + + +See Also + +join crossJoin + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, alias, +arrange, as.data.frame, +attach,SparkDataFrame-method, +broadcast, cache, +checkpoint, coalesce, +collect, colnames, +coltypes, +createOrReplaceTempView, +crossJoin, cube, +dapplyCollect, dapply, +describe, dim, +distinct, dropDuplicates, +dropna, drop, +dtypes, except, +explain, filter, +first, gapplyCollect, +gapply, getNumPartitions, +group_by, head, +hint, histogram, +insertInto, intersect, +isLocal, isStreaming, +join, limit, +localCheckpoint, mutate, +ncol, nrow, +persist, printSchema, +randomSplit, rbind, +registerTempTable, rename, +repartition, rollup, +sample, saveAsTable, +schema, selectExpr, +select, showDF, +show, storageLevel, +str, subset, +summary, take, +toJSON, unionByName, +union, unpersist, +withColumn, withWatermark, +with, write.df, +write.jdbc, write.json, +write.orc, write.parquet, +write.stream, write.text + + + +Examples + +## Not run: +##D sparkR.session() +##D df1 <- read.json(path) +##D
[20/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/SimpleFutureAction.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/SimpleFutureAction.html b/site/docs/2.3.2/api/java/org/apache/spark/SimpleFutureAction.html new file mode 100644 index 000..075265c --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/SimpleFutureAction.html @@ -0,0 +1,517 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +SimpleFutureAction (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":10,"i1":10,"i2":10,"i3":10,"i4":10,"i5":10,"i6":10,"i7":10,"i8":10,"i9":10}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class SimpleFutureAction+ + + +Object + + +org.apache.spark.SimpleFutureAction + + + + + + + +All Implemented Interfaces: +FutureAction , scala.concurrent.Awaitable , scala.concurrent.Future + + + +public class SimpleFutureAction +extends Object +implements FutureAction +A FutureAction holding the result of an action that triggers a single job. Examples include + count, collect, reduce. + + + + + + + + + + + +Nested Class Summary + + + + +Nested classes/interfaces inherited from interface scala.concurrent.Future +scala.concurrent.Future.InternalCallbackExecutor$ + + + + + + + + +Method Summary + +All Methods Instance Methods Concrete Methods + +Modifier and Type +Method and Description + + +void +cancel() +Cancels the execution of this action. + + + +boolean +isCancelled() +Returns whether the action has been cancelled. + + + +boolean +isCompleted() +Returns whether the action has already been completed with a value or an exception. + + + +scala.collection.Seq +jobIds() +Returns the job IDs run by the underlying async operation. + + + + void +onComplete(scala.Function1 ,U> func, + scala.concurrent.ExecutionContext executor) +When this action is completed, either through an exception, or a value, applies the provided + function. + + + +SimpleFutureAction +ready(scala.concurrent.duration.Duration atMost, + scala.concurrent.CanAwait permit) +Blocks until this action completes. + + + +T +result(scala.concurrent.duration.Duration atMost, + scala.concurrent.CanAwait permit) +Awaits and returns the result (of type T) of this action. + + + + scala.concurrent.Future+transform(scala.Function1,scala.util.Try > f, + scala.concurrent.ExecutionContext e) + + +scala.concurrent.Future+transformWith(scala.Function1,scala.concurrent.Future > f, + scala.concurrent.ExecutionContext e) + + +scala.Option> +value() +The value of this Future. + + + + + + + +Methods inherited from class Object +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interface org.apache.spark.FutureAction +get + + + + + +Methods inherited from interface scala.concurrent.Future +andThen, collect, failed, fallbackTo, filter, flatMap, foreach, map, mapTo, onFailure, onSuccess, recover, recoverWith, transform, withFilter, zip + + + + + + + + + + + + + + +Method Detail + + + + + +cancel +public void cancel() +Description copied from interface: FutureAction +Cancels the execution of this action. + +Specified by: +cancel in interface FutureAction + + + + + + + + +ready +public SimpleFutureAction ready(scala.concurrent.duration.Duration atMost, + scala.concurrent.CanAwait permit) +Description copied from interface: FutureAction +Blocks until this action completes. + + +Specified by: +ready in interface FutureAction +Specified by: +ready in interface scala.concurrent.Awaitable +Parameters: +atMost - maximum wait time, which may be negative (no waiting is done), Duration.Inf + for unbounded waiting, or a finite positive duration +permit - (undocumented) +Returns: +this FutureAction + + + + + + + +
[01/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
Repository: spark-website Updated Branches: refs/heads/asf-site 806a1bd52 -> 04a27dbf1 http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/r/PairwiseRRDD.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/r/PairwiseRRDD.html b/site/docs/2.3.2/api/java/org/apache/spark/api/r/PairwiseRRDD.html new file mode 100644 index 000..fb6f330 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/r/PairwiseRRDD.html @@ -0,0 +1,322 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +PairwiseRRDD (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":10}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.r +Class PairwiseRRDD+ + + +Object + + +org.apache.spark.rdd.RDD + + +org.apache.spark.api.r.BaseRRDD > + + +org.apache.spark.api.r.PairwiseRRDD + + + + + + + + + + + +All Implemented Interfaces: +java.io.Serializable, Logging + + + +public class PairwiseRRDD +extends BaseRRDD > +Form an RDD[(Int, Array[Byte])] from key-value pairs returned from R. + This is used by SparkR's shuffle operations. + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +PairwiseRRDD(RDD parent, +int numPartitions, +byte[] hashFunc, +String deserializer, +byte[] packageNames, +Object[] broadcastVars, +scala.reflect.ClassTag evidence$3) + + + + + + + + + +Method Summary + +All Methods Instance Methods Concrete Methods + +Modifier and Type +Method and Description + + +JavaPairRDD +asJavaPairRDD() + + + + + + +Methods inherited from class org.apache.spark.api.r.BaseRRDD +compute, getPartitions + + + + + +Methods inherited from class org.apache.spark.rdd.RDD +aggregate, cache, cartesian, checkpoint, coalesce, collect, collect, context, count, countAppro x, countApproxDistinct, countApproxDistinct, countByValue, countByValueApprox, dependencies, distinct, distinct, doubleRDDToDoubleRDDFunctions, filter, first, flatMap, fold, foreach, foreachPartition, getCheckpointFile, getNumPartitions, getStorageLevel, glom, groupBy, groupBy, groupBy, id, intersection, intersection, intersection, isCheckpointed, isEmpty, iterator, keyBy, localCheckpoint, map, mapPartitions, mapPartitionsWithIndex, max, min, name, numericRDDToDoubleRDDFunctions, partitioner, partitions, persist, persist, pipe, pipe, pipe, preferredLocations, randomSplit, rddToAsyncRDDActions, rddToOrderedRDDFunctions, rddToPairRDDFunctions, rddToSequenceFileRDDFunctions, reduce, repartition, sample, saveAsObjectFile, saveAsTextFile, saveAsTextFile, setName, sortBy, sparkContext, subtract, subtract, subtract, take, takeOrdered, takeSample, toDebugString, toJavaRDD, toLocalIterator, top, toString, treeAggregate, treeRedu ce, union, unpersist, zip, zipPartitions, zipPartitions, zipPartitions, zipPartitions, zipPartitions, zipPartitions, zipWithIndex, zipWithUniqueId + + + + + +Methods inherited from class Object +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait + + + + + +Methods inherited from interface org.apache.spark.internal.Logging +initializeLogging, initializeLogIfNecessary, initializeLogIfNecessary, isTraceEnabled, log_, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning + + + + + + + + + + + + + + +Constructor Detail + + + + + +PairwiseRRDD +public PairwiseRRDD(RDD parent, +int numPartitions, +byte[] hashFunc, +String deserializer, +byte[] packageNames, +Object[] broadcastVars, +scala.reflect.ClassTag evidence$3) + + + + + + + + + +Method Detail + + + + + +asJava
[04/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/ForeachFunction.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/ForeachFunction.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/ForeachFunction.html new file mode 100644 index 000..89baac3 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/ForeachFunction.html @@ -0,0 +1,239 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +ForeachFunction (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":6}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.java.function +Interface ForeachFunction+ + + + + + +All Superinterfaces: +java.io.Serializable + + +Functional Interface: +This is a functional interface and can therefore be used as the assignment target for a lambda expression or method reference. + + + +@FunctionalInterface +public interface ForeachFunction +extends java.io.Serializable +Base interface for a function used in Dataset's foreach function. + + Spark will invoke the call function on each element in the input Dataset. + + + + + + + + + + + +Method Summary + +All Methods Instance Methods Abstract Methods + +Modifier and Type +Method and Description + + +void +call(T t) + + + + + + + + + + + + + + + +Method Detail + + + + + + + +call +void call(T t) + throws Exception + +Throws: +Exception + + + + + + + + + + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/ForeachPartitionFunction.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/ForeachPartitionFunction.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/ForeachPartitionFunction.html new file mode 100644 index 000..09d4281 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/ForeachPartitionFunction.html @@ -0,0 +1,235 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +ForeachPartitionFunction (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":6}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.java.function +Interface ForeachPartitionFunction + + + + + + +All Superinterfaces: +java.io.Serializable + + +Functional Interface: +This is a functional interface and can therefore be used as the assignment target for a lambda expression or method re
[06/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaSparkStatusTracker.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaSparkStatusTracker.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaSparkStatusTracker.html new file mode 100644 index 000..1a9cef2 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaSparkStatusTracker.html @@ -0,0 +1,354 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +JavaSparkStatusTracker (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":10,"i1":10,"i2":10,"i3":10,"i4":10}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.java +Class JavaSparkStatusTracker + + + +Object + + +org.apache.spark.api.java.JavaSparkStatusTracker + + + + + + + + +public class JavaSparkStatusTracker +extends Object +Low-level status reporting APIs for monitoring job and stage progress. + + These APIs intentionally provide very weak consistency semantics; consumers of these APIs should + be prepared to handle empty / missing information. For example, a job's stage ids may be known + but the status API may not have any information about the details of those stages, so + getStageInfo could potentially return null for a valid stage id. + + To limit memory usage, these APIs only provide information on recent jobs / stages. These APIs + will provide information for the last spark.ui.retainedStages stages and + spark.ui.retainedJobs jobs. + + +Note: +This class's constructor should be considered private and may be subject to change. + + + + + + + + + + + + +Method Summary + +All Methods Instance Methods Concrete Methods + +Modifier and Type +Method and Description + + +int[] +getActiveJobIds() +Returns an array containing the ids of all active jobs. + + + +int[] +getActiveStageIds() +Returns an array containing the ids of all active stages. + + + +int[] +getJobIdsForGroup(String jobGroup) +Return a list of all known jobs in a particular job group. + + + +SparkJobInfo +getJobInfo(int jobId) +Returns job information, or null if the job info could not be found or was garbage collected. + + + +SparkStageInfo +getStageInfo(int stageId) +Returns stage information, or null if the stage info could not be found or was + garbage collected. + + + + + + + +Methods inherited from class Object +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + + + + + + + + + + +Method Detail + + + + + +getJobIdsForGroup +public int[] getJobIdsForGroup(String jobGroup) +Return a list of all known jobs in a particular job group. If jobGroup is null, then + returns all known jobs that are not associated with a job group. + + The returned list may contain running, failed, and completed jobs, and may vary across + invocations of this method. This method does not guarantee the order of the elements in + its result. + +Parameters: +jobGroup - (undocumented) +Returns: +(undocumented) + + + + + + + + +getActiveStageIds +public int[] getActiveStageIds() +Returns an array containing the ids of all active stages. + + This method does not guarantee the order of the elements in its result. + +Returns: +(undocumented) + + + + + + + + +getActiveJobIds +public int[] getActiveJobIds() +Returns an array containing the ids of all active jobs. + + This method does not guarantee the order of the elements in its result. + +Returns: +(undocumented) + + + + + + + + +getJobInfo +public SparkJobInfo getJobInfo(int jobId) +Returns job information, or null if the job info could not be found or was garbage collected. + +Parameters: +jobId - (undocumented) +Returns: +(undocumented) + + + + + + + + +getStageInfo +public SparkStageInfo getStageInfo(int stageId) +Returns stage information, or null if the stage info could not be found or was + garbage collected. + +Parameters: +stageId - (undocumented) +Returns: +(undocumented) + + + + + + + + + + + + + + +Sk
[36/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/write.jdbc.html -- diff --git a/site/docs/2.3.2/api/R/write.jdbc.html b/site/docs/2.3.2/api/R/write.jdbc.html new file mode 100644 index 000..8906d4d --- /dev/null +++ b/site/docs/2.3.2/api/R/write.jdbc.html @@ -0,0 +1,148 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Save the content of SparkDataFrame to an external database... + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +write.jdbc {SparkR}R Documentation + +Save the content of SparkDataFrame to an external database table via JDBC. + +Description + +Save the content of the SparkDataFrame to an external database table via JDBC. Additional JDBC +database connection properties can be set (...) + + + +Usage + + +write.jdbc(x, url, tableName, mode = "error", ...) + +## S4 method for signature 'SparkDataFrame,character,character' +write.jdbc(x, url, + tableName, mode = "error", ...) + + + +Arguments + + +x + +a SparkDataFrame. + +url + +JDBC database url of the form jdbc:subprotocol:subname. + +tableName + +yhe name of the table in the external database. + +mode + +one of 'append', 'overwrite', 'error', 'errorifexists', 'ignore' +save mode (it is 'error' by default) + +... + +additional JDBC database connection properties. + + + + +Details + +Also, mode is used to specify the behavior of the save operation when +data already exists in the data source. There are four modes: + + + + 'append': Contents of this SparkDataFrame are expected to be appended to existing data. + + + 'overwrite': Existing data is expected to be overwritten by the contents of this +SparkDataFrame. + + + 'error' or 'errorifexists': An exception is expected to be thrown. + + + 'ignore': The save operation is expected to not save the contents of the SparkDataFrame +and to not change the existing data. + + + + + +Note + +write.jdbc since 2.0.0 + + + +See Also + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, alias, +arrange, as.data.frame, +attach,SparkDataFrame-method, +broadcast, cache, +checkpoint, coalesce, +collect, colnames, +coltypes, +createOrReplaceTempView, +crossJoin, cube, +dapplyCollect, dapply, +describe, dim, +distinct, dropDuplicates, +dropna, drop, +dtypes, except, +explain, filter, +first, gapplyCollect, +gapply, getNumPartitions, +group_by, head, +hint, histogram, +insertInto, intersect, +isLocal, isStreaming, +join, limit, +localCheckpoint, merge, +mutate, ncol, +nrow, persist, +printSchema, randomSplit, +rbind, registerTempTable, +rename, repartition, +rollup, sample, +saveAsTable, schema, +selectExpr, select, +showDF, show, +storageLevel, str, +subset, summary, +take, toJSON, +unionByName, union, +unpersist, withColumn, +withWatermark, with, +write.df, write.json, +write.orc, write.parquet, +write.stream, write.text + + + +Examples + +## Not run: +##D sparkR.session() +##D jdbcUrl <- "jdbc:mysql://localhost:3306/databasename" +##D write.jdbc(df, jdbcUrl, "table", user = "username", password = "password") +## End(Not run) + + + +[Package SparkR version 2.3.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/write.json.html -- diff --git a/site/docs/2.3.2/api/R/write.json.html b/site/docs/2.3.2/api/R/write.json.html new file mode 100644 index 000..23eb21b --- /dev/null +++ b/site/docs/2.3.2/api/R/write.json.html @@ -0,0 +1,122 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Save the contents of SparkDataFrame as a JSON file + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +write.json {SparkR}R Documentation + +Save the contents of SparkDataFrame as a JSON file + +Description + +Save the contents of a SparkDataFrame as a JSON file (http://jsonlines.org/";> +JSON Lines text format or newline-delimited JSON). Files written out +with this method can be read back in as a SparkDataFrame using read.json(). + + + +Usage + + +write.json(x, path, ...) + +## S4 method for signature 'SparkDataFrame,character' +write.json(x, path, mode = "error", + ...) + + + +Arguments + + +x + +A SparkDataFrame + +path + +The directory where the file is saved + +... + +additional argument(s) passed to the method. + +mode + +one of 'append', 'overwrite', 'error', 'errorifexists', 'ignore' +save mode (it is 'error' by default) + + + + +Not
[30/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/lib/jquery.js -- diff --git a/site/docs/2.3.2/api/java/lib/jquery.js b/site/docs/2.3.2/api/java/lib/jquery.js new file mode 100644 index 000..bc3fbc8 --- /dev/null +++ b/site/docs/2.3.2/api/java/lib/jquery.js @@ -0,0 +1,2 @@ +/*! jQuery v1.8.2 jquery.com | jquery.org/license */ +(function(a,b){function G(a){var b=F[a]={};return p.each(a.split(s),function(a,c){b[c]=!0}),b}function J(a,c,d){if(d===b&&a.nodeType===1){var e="data-"+c.replace(I,"-$1").toLowerCase();d=a.getAttribute(e);if(typeof d=="string"){try{d=d==="true"?!0:d==="false"?!1:d==="null"?null:+d+""===d?+d:H.test(d)?p.parseJSON(d):d}catch(f){}p.data(a,c,d)}else d=b}return d}function K(a){var b;for(b in a){if(b==="data"&&p.isEmptyObject(a[b]))continue;if(b!=="toJSON")return!1}return!0}function ba(){return!1}function bb(){return!0}function bh(a){return!a||!a.parentNode||a.parentNode.nodeType===11}function bi(a,b){do a=a[b];while(a&&a.nodeType!==1);return a}function bj(a,b,c){b=b||0;if(p.isFunction(b))return p.grep(a,function(a,d){var e=!!b.call(a,d,a);return e===c});if(b.nodeType)return p.grep(a,function(a,d){return a===b===c});if(typeof b=="string"){var d=p.grep(a,function(a){return a.nodeType===1});if(be.test(b))return p.filter(b,d,!c);b=p.filter(b,d)}return p.grep(a,function(a,d){return p.inArray( a,b)>=0===c})}function bk(a){var b=bl.split("|"),c=a.createDocumentFragment();if(c.createElement)while(b.length)c.createElement(b.pop());return c}function bC(a,b){return a.getElementsByTagName(b)[0]||a.appendChild(a.ownerDocument.createElement(b))}function bD(a,b){if(b.nodeType!==1||!p.hasData(a))return;var c,d,e,f=p._data(a),g=p._data(b,f),h=f.events;if(h){delete g.handle,g.events={};for(c in h)for(d=0,e=h[c].length;d").appendTo(e.body),c=b.css("display");b.remove();if(c==="none"||c===""){bI=e.body.appendChild(bI||p.extend(e.createElement("iframe"),{frameBorder:0,width:0,height:0}));if(!bJ||!bI. createElement)bJ=(bI.contentWindow||bI.contentDocument).document,bJ.write(""),bJ.close();b=bJ.body.appendChild(bJ.createElement(a)),c=bH(b,"display"),e.body.removeChild(bI)}return bS[a]=c,c}function ci(a,b,c,d){var e;if(p.isArray(b))p.each(b,function(b,e){c||ce.test(a)?d(a,e):ci(a+"["+(typeof e=="object"?b:"")+"]",e,c,d)});else if(!c&&p.type(b)==="object")for(e in b)ci(a+"["+e+"]",b[e],c,d);else d(a,b)}function cz(a){return function(b,c){typeof b!="string"&&(c=b,b="*");var d,e,f,g=b.toLowerCase().split(s),h=0,i=g.length;if(p.isFunction(c))for(;h)[^>]*$|#([\w\-]*)$)/,v=/^<(\w+)\s*\/?>(?:<\/\1>|)$/,w=/^[\],:{}\s]*$/,x=/(?:^|:|,)(?:\s*\[)+/g,y=/\\(?:["\\\/bfnrt]|u[\da-fA-F]{4})/g,z=/"[^"\\\r\n]*"|true|false|null|-?(?:\d\d*\.|)\d+(?:[eE][\-+]?\d+|)/g,A=/^-ms-/,B=/-([\da-z])/gi,C=function(a,b){return(b+"").toUpperCase()},D=function(){e.addEventListener?(e.removeEventListener("DOMContentLoaded",D,!1),p.ready()):e.readyState==="complete"&&(e.detachEvent("onreadystatechange",D),p.ready())},E={};p.fn=p.prototype={constructor:p,init :function(a,c,d){var f,g,h,i;if(!a)return this;if(a.nodeType)return this.context=this[0]=a,this.length=1,this;if(typeof a=="string"){a.charAt(0)==="<"&&a.charAt(a.length-1)===">"&&a.length>=3?f=[null,a,null]:f=u.exec(a);if(f&&(f[1]||!c)){if(f[1])return c=c instanceof p?c[0]:c,i=c&&c.nodeType?c.ownerDocument||c:e,a=p.parseHTML(f[1],i,!0),v.test(f[1])&&p.isPlainObject(c)&&this.attr.call(a,c,!0),p.merge(this,a);g=e.getElementById(f[2]);if(g&&g.parentNode){if(g.id!==f[2])return d.find(a);this.length=1,this[0]=g}return this.context=e,this.selector=a,this}return!c||c.jquery?(c||d).find(a):this.constructor(c).find(a)}return p.isFunction(a)?d.ready(a):(a.selector!==b&&(this.selector=a.selector,this.context=a.context),p.makeArray(a,this))},selector:"",jquery:"1.8.2",length:0,size:function(){return this.length},toArray:function(){return k.call(this)},get:function(a){return a==null?this.toArray():a<0?this[this.length+a]:this[a]},pushStack:function(a,b,c){var d=p.merge(this.constructor(),a);ret urn d.prevObject=this,d.context=this.context,b==="find"?d.selector=this.selector+(this.selector?" ":"")+c:b&&(d.selector=this.selector+"."+b+"("+c+")"),d},each:function(a,b){return p.each(this,a,b)},ready:function(a){return p.ready.promise().done(a),this},eq:function(a){return a=+a,a===-1?this.slice(a):this.slice(a,a+1)},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},slice:function(){return this.pushStack(k.apply(this,arguments),"slice",k.call(arguments).join(","))},map:function(a){return this.pushStack(p.map(this,function(b,c){return a.call(b,c,b)}))},end:function(){return this.prevObject||this.constructor(null)},push:j,sort:[].sort,splice:[].splice},p.fn.init.prototype=p.fn,p.extend=p.fn.extend=function(){var a,c,d,e,f,g,h=arguments[0]||{},i=1,j=arguments.length,k=!1;typeof h=="boolean"&&(k=h,h=arguments[1]||{},i=2),type
[26/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/CleanShuffle.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/CleanShuffle.html b/site/docs/2.3.2/api/java/org/apache/spark/CleanShuffle.html new file mode 100644 index 000..0bf6dfe --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/CleanShuffle.html @@ -0,0 +1,370 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +CleanShuffle (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":5,"i1":5,"i2":5,"i3":5,"i4":9,"i5":9,"i6":10}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class CleanShuffle + + + +Object + + +org.apache.spark.CleanShuffle + + + + + + + +All Implemented Interfaces: +java.io.Serializable, CleanupTask, scala.Equals, scala.Product + + + +public class CleanShuffle +extends Object +implements CleanupTask, scala.Product, scala.Serializable + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +CleanShuffle(int shuffleId) + + + + + + + + + +Method Summary + +All Methods Static Methods Instance Methods Abstract Methods Concrete Methods + +Modifier and Type +Method and Description + + +abstract static boolean +canEqual(Object that) + + +abstract static boolean +equals(Object that) + + +abstract static int +productArity() + + +abstract static Object +productElement(int n) + + +static scala.collection.Iterator+productIterator() + + +static String +productPrefix() + + +int +shuffleId() + + + + + + +Methods inherited from class Object +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interface scala.Product +productArity, productElement, productIterator, productPrefix + + + + + +Methods inherited from interface scala.Equals +canEqual, equals + + + + + + + + + + + + + + +Constructor Detail + + + + + +CleanShuffle +public CleanShuffle(int shuffleId) + + + + + + + + + +Method Detail + + + + + +canEqual +public abstract static boolean canEqual(Object that) + + + + + + + +equals +public abstract static boolean equals(Object that) + + + + + + + +productElement +public abstract static Object productElement(int n) + + + + + + + +productArity +public abstract static int productArity() + + + + + + + +productIterator +public static scala.collection.Iterator productIterator() + + + + + + + +productPrefix +public static String productPrefix() + + + + + + + +shuffleId +public int shuffleId() + + + + + + + + + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/CleanupTask.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/CleanupTask.html b/site/docs/2.3.2/api/java/org/apache/spark/CleanupTask.html new file mode 100644 index 000..9634781 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/CleanupTask.html @@ -0,0 +1,170 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +CleanupTask (Spark 2.3.2 JavaDoc) + + + + + + + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package
[43/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/head.html -- diff --git a/site/docs/2.3.2/api/R/head.html b/site/docs/2.3.2/api/R/head.html new file mode 100644 index 000..30bee19 --- /dev/null +++ b/site/docs/2.3.2/api/R/head.html @@ -0,0 +1,115 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Head + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +head {SparkR}R Documentation + +Head + +Description + +Return the first num rows of a SparkDataFrame as a R data.frame. If num is not +specified, then head() returns the first 6 rows as with R data.frame. + + + +Usage + + +## S4 method for signature 'SparkDataFrame' +head(x, num = 6L) + + + +Arguments + + +x + +a SparkDataFrame. + +num + +the number of rows to return. Default is 6. + + + + +Value + +A data.frame. + + + +Note + +head since 1.4.0 + + + +See Also + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, alias, +arrange, as.data.frame, +attach,SparkDataFrame-method, +broadcast, cache, +checkpoint, coalesce, +collect, colnames, +coltypes, +createOrReplaceTempView, +crossJoin, cube, +dapplyCollect, dapply, +describe, dim, +distinct, dropDuplicates, +dropna, drop, +dtypes, except, +explain, filter, +first, gapplyCollect, +gapply, getNumPartitions, +group_by, hint, +histogram, insertInto, +intersect, isLocal, +isStreaming, join, +limit, localCheckpoint, +merge, mutate, +ncol, nrow, +persist, printSchema, +randomSplit, rbind, +registerTempTable, rename, +repartition, rollup, +sample, saveAsTable, +schema, selectExpr, +select, showDF, +show, storageLevel, +str, subset, +summary, take, +toJSON, unionByName, +union, unpersist, +withColumn, withWatermark, +with, write.df, +write.jdbc, write.json, +write.orc, write.parquet, +write.stream, write.text + + + +Examples + +## Not run: +##D sparkR.session() +##D path <- "path/to/file.json" +##D df <- read.json(path) +##D head(df) +## End(Not run) + + + +[Package SparkR version 2.3.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/hint.html -- diff --git a/site/docs/2.3.2/api/R/hint.html b/site/docs/2.3.2/api/R/hint.html new file mode 100644 index 000..11e12d6 --- /dev/null +++ b/site/docs/2.3.2/api/R/hint.html @@ -0,0 +1,120 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: hint + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +hint {SparkR}R Documentation + +hint + +Description + +Specifies execution plan hint and return a new SparkDataFrame. + + + +Usage + + +hint(x, name, ...) + +## S4 method for signature 'SparkDataFrame,character' +hint(x, name, ...) + + + +Arguments + + +x + +a SparkDataFrame. + +name + +a name of the hint. + +... + +optional parameters for the hint. + + + + +Value + +A SparkDataFrame. + + + +Note + +hint since 2.2.0 + + + +See Also + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, alias, +arrange, as.data.frame, +attach,SparkDataFrame-method, +broadcast, cache, +checkpoint, coalesce, +collect, colnames, +coltypes, +createOrReplaceTempView, +crossJoin, cube, +dapplyCollect, dapply, +describe, dim, +distinct, dropDuplicates, +dropna, drop, +dtypes, except, +explain, filter, +first, gapplyCollect, +gapply, getNumPartitions, +group_by, head, +histogram, insertInto, +intersect, isLocal, +isStreaming, join, +limit, localCheckpoint, +merge, mutate, +ncol, nrow, +persist, printSchema, +randomSplit, rbind, +registerTempTable, rename, +repartition, rollup, +sample, saveAsTable, +schema, selectExpr, +select, showDF, +show, storageLevel, +str, subset, +summary, take, +toJSON, unionByName, +union, unpersist, +withColumn, withWatermark, +with, write.df, +write.jdbc, write.json, +write.orc, write.parquet, +write.stream, write.text + + + +Examples + +## Not run: +##D df <- createDataFrame(mtcars) +##D avg_mpg <- mean(groupBy(createDataFrame(mtcars), "cyl"), "mpg") +##D +##D head(join(df, hint(avg_mpg, "broadcast"), df$cyl == avg_mpg$cyl)) +## End(Not run) + + + +[Package SparkR version 2.3.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/histogram.html -- diff --git a/site/docs/2.3.2/api/R/histogram.html b/site/docs/2.3.2/api/R/histogram.html new file mode 100644 index 000..375f
[02/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/package-summary.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/package-summary.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/package-summary.html new file mode 100644 index 000..eb84b4f --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/function/package-summary.html @@ -0,0 +1,300 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +org.apache.spark.api.java.function (Spark 2.3.2 JavaDoc) + + + + + + + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Package +Next Package + + +Frames +No Frames + + +All Classes + + + + + + + + + + +Package org.apache.spark.api.java.function + +Set of interfaces to represent functions in Spark's Java API. + +See: Description + + + + + +Interface Summary + +Interface +Description + + + +CoGroupFunction+ +A function that returns zero or more output records from each grouping key and its values from 2 + Datasets. + + + +DoubleFlatMapFunction + +A function that returns zero or more records of type Double from each input record. + + + +DoubleFunction + +A function that returns Doubles, and can be used to construct DoubleRDDs. + + + +FilterFunction + +Base interface for a function used in Dataset's filter function. + + + +FlatMapFunction + +A function that returns zero or more output records from each input record. + + + +FlatMapFunction2 + +A function that takes two inputs and returns zero or more output records. + + + +FlatMapGroupsFunction + +A function that returns zero or more output records from each grouping key and its values. + + + +FlatMapGroupsWithStateFunction + +::Experimental:: + Base interface for a map function used in + org.apache.spark.sql.KeyValueGroupedDataset.flatMapGroupsWithState( + FlatMapGroupsWithStateFunction, org.apache.spark.sql.streaming.OutputMode, + org.apache.spark.sql.Encoder, org.apache.spark.sql.Encoder) + + + +ForeachFunction + +Base interface for a function used in Dataset's foreach function. + + + +ForeachPartitionFunction + +Base interface for a function used in Dataset's foreachPartition function. + + + +Function + +Base interface for functions whose return types do not create special RDDs. + + + +Function0 + +A zero-argument function that returns an R. + + + +Function2 + +A two-argument function that takes arguments of type T1 and T2 and returns an R. + + + +Function3 + +A three-argument function that takes arguments of type T1, T2 and T3 and returns an R. + + + +Function4 + +A four-argument function that takes arguments of type T1, T2, T3 and T4 and returns an R. + + + +MapFunction + +Base interface for a map function used in Dataset's map function. + + + +MapGroupsFunction + +Base interface for a map function used in GroupedDataset's mapGroup function. + + + +MapGroupsWithStateFunction + +::Experimental:: + Base interface for a map function used in + KeyValueGroupedDataset.mapGroupsWithState( + MapGroupsWithStateFunction, org.apache.spark.sql.Encoder, org.apache.spark.sql.Encoder) + + + +MapPartitionsFunction + +Base interface for function used in Dataset's mapPartitions. + + + +PairFlatMapFunction + +A function that returns zero or more key-value pair records from each input record. + + + +PairFunction + +A function that returns key-value pairs (Tuple2 ), and can be used to + construct PairRDDs. + + + +ReduceFunction + +Base interface for function used in Dataset's reduce. + + + +VoidFunction + +A function with no return value. + + + +VoidFunction2 + +A two-argument function that takes arguments of type T1 and T2 with no return value. + + + + + + + + + +Package org.apache.spark.api.java.function Description +Set of interfaces to represent functions in Spark's Java API. Users create implementations of + these interfaces to pass functions to various Java API methods for Spark. Please visit Spark's + Java programming guide for more details. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Package +Next Package + + +Frames +No Frames + + +All Classes + + + +var methods = {"i0":10,"i1":10,"i2":10,"i3":10,"i4":10,"i5":10,"i6":10,"i7":10}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class SparkStageInfoImpl + + + +Object + + +org.apache.spark.SparkStageInfoImpl + + + + + + + +All Implemented Interfaces: +java.io.Serializable, SparkStageInfo + + + +public class SparkStageInfoImpl +extends Object +implements SparkStageInfo + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +SparkStageInfoImpl(int stageId, + int currentAttemptId, + long submissionTime, + String name, + int numTasks, + int numActiveTasks, + int numCompletedTasks, + int numFailedTasks) + + + + + + + + + +Method Summary + +All Methods Instance Methods Concrete Methods + +Modifier and Type +Method and Description + + +int +currentAttemptId() + + +String +name() + + +int +numActiveTasks() + + +int +numCompletedTasks() + + +int +numFailedTasks() + + +int +numTasks() + + +int +stageId() + + +long +submissionTime() + + + + + + +Methods inherited from class Object +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + + + + + + + + + + +Constructor Detail + + + + + +SparkStageInfoImpl +public SparkStageInfoImpl(int stageId, + int currentAttemptId, + long submissionTime, + String name, + int numTasks, + int numActiveTasks, + int numCompletedTasks, + int numFailedTasks) + + + + + + + + + +Method Detail + + + + + +stageId +public int stageId() + +Specified by: +stageId in interface SparkStageInfo + + + + + + + + +currentAttemptId +public int currentAttemptId() + +Specified by: +currentAttemptId in interface SparkStageInfo + + + + + + + + +submissionTime +public long submissionTime() + +Specified by: +submissionTime in interface SparkStageInfo + + + + + + + + +name +public String name() + +Specified by: +name in interface SparkStageInfo + + + + + + + + +numTasks +public int numTasks() + +Specified by: +numTasks in interface SparkStageInfo + + + + + + + + +numActiveTasks +public int numActiveTasks() + +Specified by: +numActiveTasks in interface SparkStageInfo + + + + + + + + +numCompletedTasks +public int numCompletedTasks() + +Specified by: +numCompletedTasks in interface SparkStageInfo + + + + + + + + +numFailedTasks +public int numFailedTasks() + +Specified by: +numFailedTasks in interface SparkStageInfo + + + + + + + + + + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/SparkStatusTracker.html -- diff --git a/site/docs/2.3.2/api/java/org
[10/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaPairRDD.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaPairRDD.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaPairRDD.html new file mode 100644 index 000..726bcd5 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaPairRDD.html @@ -0,0 +1,4020 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +JavaPairRDD (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":9,"i1":10,"i2":10,"i3":10,"i4":10,"i5":9,"i6":9,"i7":10,"i8":10,"i9":10,"i10":10,"i11":10,"i12":10,"i13":10,"i14":10,"i15":10,"i16":10,"i17":10,"i18":10,"i19":9,"i20":10,"i21":9,"i22":9,"i23":10,"i24":10,"i25":10,"i26":10,"i27":9,"i28":9,"i29":9,"i30":9,"i31":9,"i32":10,"i33":10,"i34":10,"i35":9,"i36":10,"i37":10,"i38":10,"i39":9,"i40":9,"i41":9,"i42":10,"i43":10,"i44":10,"i45":10,"i46":9,"i47":9,"i48":9,"i49":10,"i50":9,"i51":10,"i52":10,"i53":10,"i54":9,"i55":9,"i56":9,"i57":9,"i58":9,"i59":9,"i60":10,"i61":10,"i62":10,"i63":9,"i64":9,"i65":9,"i66":9,"i67":9,"i68":9,"i69":10,"i70":10,"i71":10,"i72":10,"i73":10,"i74":10,"i75":9,"i76":10,"i77":9,"i78":9,"i79":9,"i80":10,"i81":10,"i82":10,"i83":10,"i84":9,"i85":10,"i86":10,"i87":10,"i88":10,"i89":10,"i90":9,"i91":9,"i92":9,"i93":9,"i94":9,"i95":9,"i96":9,"i97":9,"i98":9,"i99":9,"i100":9,"i101":10,"i102":9,"i103":9,"i104":9,"i105":10,"i106":9,"i107":9,"i108":10,"i109":9,"i110":9,"i111":9,"i112":9,"i113":9,"i114":10 ,"i115":9,"i116":10,"i117":10,"i118":10,"i119":10,"i120":10,"i121":10,"i122":10,"i123":10,"i124":10,"i125":10,"i126":10,"i127":10,"i128":10,"i129":10,"i130":10,"i131":10,"i132":10,"i133":10,"i134":10,"i135":10,"i136":10,"i137":10,"i138":10,"i139":9,"i140":9,"i141":9,"i142":10,"i143":10,"i144":10,"i145":10,"i146":10,"i147":10,"i148":10,"i149":10,"i150":10,"i151":10,"i152":10,"i153":10,"i154":10,"i155":9,"i156":9,"i157":9,"i158":9,"i159":9,"i160":9,"i161":9,"i162":9,"i163":9,"i164":9,"i165":9,"i166":9,"i167":9,"i168":9,"i169":9,"i170":10,"i171":10,"i172":10,"i173":10,"i174":10,"i175":10,"i176":9,"i177":9,"i178":9,"i179":9}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark.api.java +Class JavaPairRDD+ + + +Object + + +org.apache.spark.api.java.JavaPairRDD + + + + + + + +All Implemented Interfaces: +java.io.Serializable, JavaRDDLike ,JavaPairRDD > + + +Direct Known Subclasses: +JavaHadoopRDD, JavaNewHadoopRDD + + + +public class JavaPairRDD +extends Object + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +JavaPairRDD(RDD > rdd, + scala.reflect.ClassTag kClassTag, + scala.reflect.ClassTag vClassTag) + + + + + + + + + +Method Summary + +All Methods Static Methods Instance Methods Concrete Methods + +Modifier and Type +Method and Description + + +static U +aggregate(U zeroValue, + Function2 seqOp, + Function2 combOp) + + + JavaPairRDD +aggregateByKey(U zeroValue, + Function2 seqFunc, + Function2 combFunc) +Aggregate the values of each key, using given combine functions and a neutral "zero value". + + + + JavaPairRDD +aggregateByKey(U zeroValue, + int numPartitions, + Function2 seqFunc, + Function2 combFunc) +Aggregate the values of each key, using given combine functions and a neutral "zero value". + + + + JavaPairRDD +aggregateByKey(U zeroValue, + Partitioner partitioner, + Function2 seqFunc, + Function2 combFunc) +Aggregate the values of each key, using given combine functions and a neutral "zero value". + + + +JavaPairRDD +cache() +Persist this RDD with the default storage level (MEMORY_ONLY). + + + +static Ja
[44/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/dropTempView.html -- diff --git a/site/docs/2.3.2/api/R/dropTempView.html b/site/docs/2.3.2/api/R/dropTempView.html new file mode 100644 index 000..5ee2883 --- /dev/null +++ b/site/docs/2.3.2/api/R/dropTempView.html @@ -0,0 +1,63 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Drops the temporary view with the given view name in the... + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +dropTempView {SparkR}R Documentation + +Drops the temporary view with the given view name in the catalog. + +Description + +Drops the temporary view with the given view name in the catalog. +If the view has been cached before, then it will also be uncached. + + + +Usage + + +dropTempView(viewName) + + + +Arguments + + +viewName + +the name of the temporary view to be dropped. + + + + +Value + +TRUE if the view is dropped successfully, FALSE otherwise. + + + +Note + +since 2.0.0 + + + +Examples + +## Not run: +##D sparkR.session() +##D df <- read.df(path, "parquet") +##D createOrReplaceTempView(df, "table") +##D dropTempView("table") +## End(Not run) + + + +[Package SparkR version 2.3.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/dtypes.html -- diff --git a/site/docs/2.3.2/api/R/dtypes.html b/site/docs/2.3.2/api/R/dtypes.html new file mode 100644 index 000..b19e0fd --- /dev/null +++ b/site/docs/2.3.2/api/R/dtypes.html @@ -0,0 +1,106 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: DataTypes + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +dtypes {SparkR}R Documentation + +DataTypes + +Description + +Return all column names and their data types as a list + + + +Usage + + +dtypes(x) + +## S4 method for signature 'SparkDataFrame' +dtypes(x) + + + +Arguments + + +x + +A SparkDataFrame + + + + +Note + +dtypes since 1.4.0 + + + +See Also + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, alias, +arrange, as.data.frame, +attach,SparkDataFrame-method, +broadcast, cache, +checkpoint, coalesce, +collect, colnames, +coltypes, +createOrReplaceTempView, +crossJoin, cube, +dapplyCollect, dapply, +describe, dim, +distinct, dropDuplicates, +dropna, drop, +except, explain, +filter, first, +gapplyCollect, gapply, +getNumPartitions, group_by, +head, hint, +histogram, insertInto, +intersect, isLocal, +isStreaming, join, +limit, localCheckpoint, +merge, mutate, +ncol, nrow, +persist, printSchema, +randomSplit, rbind, +registerTempTable, rename, +repartition, rollup, +sample, saveAsTable, +schema, selectExpr, +select, showDF, +show, storageLevel, +str, subset, +summary, take, +toJSON, unionByName, +union, unpersist, +withColumn, withWatermark, +with, write.df, +write.jdbc, write.json, +write.orc, write.parquet, +write.stream, write.text + + + +Examples + +## Not run: +##D sparkR.session() +##D path <- "path/to/file.json" +##D df <- read.json(path) +##D dtypes(df) +## End(Not run) + + + +[Package SparkR version 2.3.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/endsWith.html -- diff --git a/site/docs/2.3.2/api/R/endsWith.html b/site/docs/2.3.2/api/R/endsWith.html new file mode 100644 index 000..24bea1f --- /dev/null +++ b/site/docs/2.3.2/api/R/endsWith.html @@ -0,0 +1,56 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: endsWith + + + + +endsWith {SparkR}R Documentation + +endsWith + +Description + +Determines if entries of x end with string (entries of) suffix respectively, +where strings are recycled to common lengths. + + + +Usage + + +endsWith(x, suffix) + +## S4 method for signature 'Column' +endsWith(x, suffix) + + + +Arguments + + +x + +vector of character string whose "ends" are considered + +suffix + +character vector (often of length one) + + + + +Note + +endsWith since 1.4.0 + + + +See Also + +Other colum_func: alias, +between, cast, +otherwise, over, +startsWith, substr + + +[Package SparkR version 2.3.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/eq_null_safe.html -- diff --git a/site/docs/2.3.2/
[41/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/rbind.html -- diff --git a/site/docs/2.3.2/api/R/rbind.html b/site/docs/2.3.2/api/R/rbind.html new file mode 100644 index 000..890ab98 --- /dev/null +++ b/site/docs/2.3.2/api/R/rbind.html @@ -0,0 +1,128 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Union two or more SparkDataFrames + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +rbind {SparkR}R Documentation + +Union two or more SparkDataFrames + +Description + +Union two or more SparkDataFrames by row. As in R's rbind, this method +requires that the input SparkDataFrames have the same column names. + + + +Usage + + +rbind(..., deparse.level = 1) + +## S4 method for signature 'SparkDataFrame' +rbind(x, ..., deparse.level = 1) + + + +Arguments + + +... + +additional SparkDataFrame(s). + +deparse.level + +currently not used (put here to match the signature of +the base implementation). + +x + +a SparkDataFrame. + + + + +Details + +Note: This does not remove duplicate rows across the two SparkDataFrames. + + + +Value + +A SparkDataFrame containing the result of the union. + + + +Note + +rbind since 1.5.0 + + + +See Also + +union unionByName + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, alias, +arrange, as.data.frame, +attach,SparkDataFrame-method, +broadcast, cache, +checkpoint, coalesce, +collect, colnames, +coltypes, +createOrReplaceTempView, +crossJoin, cube, +dapplyCollect, dapply, +describe, dim, +distinct, dropDuplicates, +dropna, drop, +dtypes, except, +explain, filter, +first, gapplyCollect, +gapply, getNumPartitions, +group_by, head, +hint, histogram, +insertInto, intersect, +isLocal, isStreaming, +join, limit, +localCheckpoint, merge, +mutate, ncol, +nrow, persist, +printSchema, randomSplit, +registerTempTable, rename, +repartition, rollup, +sample, saveAsTable, +schema, selectExpr, +select, showDF, +show, storageLevel, +str, subset, +summary, take, +toJSON, unionByName, +union, unpersist, +withColumn, withWatermark, +with, write.df, +write.jdbc, write.json, +write.orc, write.parquet, +write.stream, write.text + + + +Examples + +## Not run: +##D sparkR.session() +##D unions <- rbind(df, df2, df3, df4) +## End(Not run) + + + +[Package SparkR version 2.3.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/R/read.df.html -- diff --git a/site/docs/2.3.2/api/R/read.df.html b/site/docs/2.3.2/api/R/read.df.html new file mode 100644 index 000..2bd9c43 --- /dev/null +++ b/site/docs/2.3.2/api/R/read.df.html @@ -0,0 +1,106 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>http://www.w3.org/1999/xhtml";>R: Load a SparkDataFrame + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js";> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js";> +hljs.initHighlightingOnLoad(); + + +read.df {SparkR}R Documentation + +Load a SparkDataFrame + +Description + +Returns the dataset in a data source as a SparkDataFrame + + + +Usage + + +## Default S3 method: +read.df(path = NULL, source = NULL, schema = NULL, + na.strings = "NA", ...) + +## Default S3 method: +loadDF(path = NULL, source = NULL, schema = NULL, + ...) + + + +Arguments + + +path + +The path of files to load + +source + +The name of external data source + +schema + +The data schema defined in structType or a DDL-formatted string. + +na.strings + +Default string value for NA when source is "csv" + +... + +additional external data source specific named properties. + + + + +Details + +The data source is specified by the source and a set of options(...). +If source is not specified, the default data source configured by +"spark.sql.sources.default" will be used. +Similar to R read.csv, when source is "csv", by default, a value of "NA" will be +interpreted as NA. + + + +Value + +SparkDataFrame + + + +Note + +read.df since 1.4.0 + +loadDF since 1.6.0 + + + +See Also + +read.json + + + +Examples + +## Not run: +##D sparkR.session() +##D df1 <- read.df("path/to/file.json", source = "json") +##D schema <- structType(structField("name", "string"), +##D structField("info", "map")) +##D df2 <- read.df(mapTypeJsonPath, "json", schema, multiLine = TRUE) +##D df3 <- loadDF("data/test_table", "parquet", mergeSchema = "true") +##D stringSchema <- "name STRING, info MAP " +##D df4 <- read.df(mapTypeJsonPath, "json", stringSchema, multiLine = TRUE) +#
[14/51] [partial] spark-website git commit: Add docs for Spark 2.3.2
http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/TaskKilledException.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/TaskKilledException.html b/site/docs/2.3.2/api/java/org/apache/spark/TaskKilledException.html new file mode 100644 index 000..d92a973 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/TaskKilledException.html @@ -0,0 +1,313 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +TaskKilledException (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":10}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class TaskKilledException + + + +Object + + +Throwable + + +Exception + + +RuntimeException + + +org.apache.spark.TaskKilledException + + + + + + + + + + + + + +All Implemented Interfaces: +java.io.Serializable + + + +public class TaskKilledException +extends RuntimeException +:: DeveloperApi :: + Exception thrown when a task is explicitly killed (i.e., task failure is expected). + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +TaskKilledException() + + +TaskKilledException(String reason) + + + + + + + + + +Method Summary + +All Methods Instance Methods Concrete Methods + +Modifier and Type +Method and Description + + +String +reason() + + + + + + +Methods inherited from class Throwable +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString + + + + + +Methods inherited from class Object +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait + + + + + + + + + + + + + + +Constructor Detail + + + + + +TaskKilledException +public TaskKilledException(String reason) + + + + + + + +TaskKilledException +public TaskKilledException() + + + + + + + + + +Method Detail + + + + + +reason +public String reason() + + + + + + + + + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/TaskResultLost.html -- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/TaskResultLost.html b/site/docs/2.3.2/api/java/org/apache/spark/TaskResultLost.html new file mode 100644 index 000..67e2fc4 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/TaskResultLost.html @@ -0,0 +1,363 @@ +http://www.w3.org/TR/html4/loose.dtd";> + + + + +TaskResultLost (Spark 2.3.2 JavaDoc) + + + + + + +var methods = {"i0":5,"i1":9,"i2":5,"i3":5,"i4":5,"i5":9,"i6":9,"i7":9}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + +var methods = {"i0":5,"i1":10,"i2":10,"i3":10,"i4":5,"i5":10,"i6":10,"i7":5,"i8":5,"i9":9,"i10":9}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested | +Field | +Constr | +Method + + +Detail: +Field | +Constr | +Method + + + + + + + + +org.apache.spark +Class Aggregator+ + + +Object + + +org.apache.spark.Aggregator + + + + + + + +All Implemented Interfaces: +java.io.Serializable, scala.Equals, scala.Product + + + +public class Aggregator +extends Object +implements scala.Product, scala.Serializable +:: DeveloperApi :: + A set of functions used to aggregate data. + + param: createCombiner function to create the initial value of the aggregation. + param: mergeValue function to merge a new value into the aggregation result. + param: mergeCombiners function to merge outputs from multiple mergeValue function. + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +Aggregator(scala.Function1 createCombiner, + scala.Function2 mergeValue, + scala.Function2 mergeCombiners) + + + + + + + + + +Method Summary + +All Methods Static Methods Instance Methods Abstract Methods Concrete Methods + +Modifier and Type +Method and Description + + +abstract static boolean +canEqual(Object that) + + +scala.collection.Iterator > +combineCombinersByKey(scala.collection.Iterator extends scala.Product2 > iter, + TaskContext context) + + +scala.collection.Iterator > +combineValuesByKey(scala.collection.Iterator extends scala.Product2 > iter, + TaskContext context) + + +scala.Function1 +createCombiner() + + +abstract static boolean +equals(Object that) + + +scala.Function2 +mergeCombiners() + + +scala.Function2 +mergeValue() + + +abstract static int +productArity() + + +abstract static Object +productElement(int n) + + +static scala.collection.Iterator +productIterator() + + +static String +productPrefix() + + + + + + +Methods inherited from class Object +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interface scala.Product +productArity, productElement, productIterator, productPrefix + + + + + +Methods inherited from interface scala.Equals +canEqual, equals + + + + + + + + + + + + + + +Constructor Detail + + + + + +Aggregator +public Aggregator(scala.Function1 createCombiner, + scala.Function2 mergeValue, + scala.Function2 mergeCombiners) + + + + + + + + + +Method Detail + + + + + +canEqual +public abstract static boolean canEqual(Object that) + + + + + + + +equals +public abstract static boolean equals(Object that) + + + + + + + +productElement +public abstract static Object productElement(int n) + + + + + + + +productArity +public abstract static int productArity() + + + + + + + +productIterator +public static scala.collection.Iterator productIterator() + + + + + + + +productPrefix +public static String productPrefix() + + + + + + + +createCombiner +public scala.Function1 createCombiner() + + + + + + + +mergeValue +public scala.Function2 mergeValue() + + + + + + + +mergeCombiners +public scala.Function2 mergeCombiners() + + + + + + + +combineValuesByKey +public scala.collection.Iterator > combineValuesByKey(scala.collection.Iterator extends scala.Product2 > iter, +
spark git commit: [SPARK-25425][SQL][BACKPORT-2.3] Extra options should override session options in DataSource V2
Repository: spark Updated Branches: refs/heads/branch-2.3 9674d083e -> cbb228e48 [SPARK-25425][SQL][BACKPORT-2.3] Extra options should override session options in DataSource V2 ## What changes were proposed in this pull request? In the PR, I propose overriding session options by extra options in DataSource V2. Extra options are more specific and set via `.option()`, and should overwrite more generic session options. ## How was this patch tested? Added tests for read and write paths. Closes #22489 from MaxGekk/session-options-2.3. Authored-by: Maxim Gekk Signed-off-by: Dongjoon Hyun Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cbb228e4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cbb228e4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cbb228e4 Branch: refs/heads/branch-2.3 Commit: cbb228e48bb046e7d88d6bf1c9b9e3b252241552 Parents: 9674d08 Author: Maxim Gekk Authored: Tue Sep 25 23:35:57 2018 -0700 Committer: Dongjoon Hyun Committed: Tue Sep 25 23:35:57 2018 -0700 -- .../org/apache/spark/sql/DataFrameReader.scala | 8 ++-- .../org/apache/spark/sql/DataFrameWriter.scala | 8 ++-- .../sql/sources/v2/DataSourceV2Suite.scala | 50 .../sources/v2/SimpleWritableDataSource.scala | 7 ++- 4 files changed, 56 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cbb228e4/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 395e1c9..1d74b35 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -190,10 +190,10 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { val cls = DataSource.lookupDataSource(source, sparkSession.sessionState.conf) if (classOf[DataSourceV2].isAssignableFrom(cls)) { val ds = cls.newInstance() - val options = new DataSourceOptions((extraOptions ++ -DataSourceV2Utils.extractSessionConfigs( - ds = ds.asInstanceOf[DataSourceV2], - conf = sparkSession.sessionState.conf)).asJava) + val sessionOptions = DataSourceV2Utils.extractSessionConfigs( +ds = ds.asInstanceOf[DataSourceV2], +conf = sparkSession.sessionState.conf) + val options = new DataSourceOptions((sessionOptions ++ extraOptions).asJava) // Streaming also uses the data source V2 API. So it may be that the data source implements // v2, but has no v2 implementation for batch reads. In that case, we fall back to loading http://git-wip-us.apache.org/repos/asf/spark/blob/cbb228e4/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index 6c9fb52..3fcefb1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -243,10 +243,10 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { val ds = cls.newInstance() ds match { case ws: WriteSupport => - val options = new DataSourceOptions((extraOptions ++ -DataSourceV2Utils.extractSessionConfigs( - ds = ds.asInstanceOf[DataSourceV2], - conf = df.sparkSession.sessionState.conf)).asJava) + val sessionOptions = DataSourceV2Utils.extractSessionConfigs( +ds = ds.asInstanceOf[DataSourceV2], +conf = df.sparkSession.sessionState.conf) + val options = new DataSourceOptions((sessionOptions ++ extraOptions).asJava) // Using a timestamp and a random UUID to distinguish different writing jobs. This is good // enough as there won't be tons of writing jobs created at the same second. val jobId = new SimpleDateFormat("MMddHHmmss", Locale.US) http://git-wip-us.apache.org/repos/asf/spark/blob/cbb228e4/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala index 6ad0e5f..ec81e89 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala +++ b/sql/core/src/test/scal
spark git commit: [SPARK-25534][SQL] Make `SQLHelper` trait
Repository: spark Updated Branches: refs/heads/master 473d0d862 -> 81cbcca60 [SPARK-25534][SQL] Make `SQLHelper` trait ## What changes were proposed in this pull request? Currently, Spark has 7 `withTempPath` and 6 `withSQLConf` functions. This PR aims to remove duplicated and inconsistent code and reduce them to the following meaningful implementations. **withTempPath** - `SQLHelper.withTempPath`: The one which was used in `SQLTestUtils`. **withSQLConf** - `SQLHelper.withSQLConf`: The one which was used in `PlanTest`. - `ExecutorSideSQLConfSuite.withSQLConf`: The one which doesn't throw `AnalysisException` on StaticConf changes. - `SQLTestUtils.withSQLConf`: The one which overrides intentionally to change the active session. ```scala protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = { SparkSession.setActiveSession(spark) super.withSQLConf(pairs: _*)(f) } ``` ## How was this patch tested? Pass the Jenkins with the existing tests. Closes #22548 from dongjoon-hyun/SPARK-25534. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/81cbcca6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/81cbcca6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/81cbcca6 Branch: refs/heads/master Commit: 81cbcca60099fd267492769b465d01e90d7deeac Parents: 473d0d8 Author: Dongjoon Hyun Authored: Tue Sep 25 23:03:54 2018 -0700 Committer: Dongjoon Hyun Committed: Tue Sep 25 23:03:54 2018 -0700 -- .../spark/sql/catalyst/plans/PlanTest.scala | 31 +- .../spark/sql/catalyst/plans/SQLHelper.scala| 64 .../benchmark/DataSourceReadBenchmark.scala | 23 +-- .../benchmark/FilterPushdownBenchmark.scala | 24 +--- .../datasources/csv/CSVBenchmarks.scala | 12 +--- .../datasources/json/JsonBenchmarks.scala | 11 +--- .../streaming/CheckpointFileManagerSuite.scala | 10 +-- .../apache/spark/sql/test/SQLTestUtils.scala| 13 .../spark/sql/hive/orc/OrcReadBenchmark.scala | 25 ++-- 9 files changed, 81 insertions(+), 132 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/81cbcca6/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala index 67740c3..3081ff9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala @@ -22,7 +22,6 @@ import org.scalatest.Suite import org.scalatest.Tag import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode @@ -57,7 +56,7 @@ trait CodegenInterpretedPlanTest extends PlanTest { * Provides helper methods for comparing plans, but without the overhead of * mandating a FunSuite. */ -trait PlanTestBase extends PredicateHelper { self: Suite => +trait PlanTestBase extends PredicateHelper with SQLHelper { self: Suite => // TODO(gatorsmile): remove this from PlanTest and all the analyzer rules protected def conf = SQLConf.get @@ -174,32 +173,4 @@ trait PlanTestBase extends PredicateHelper { self: Suite => plan1 == plan2 } } - - /** - * Sets all SQL configurations specified in `pairs`, calls `f`, and then restores all SQL - * configurations. - */ - protected def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = { -val conf = SQLConf.get -val (keys, values) = pairs.unzip -val currentValues = keys.map { key => - if (conf.contains(key)) { -Some(conf.getConfString(key)) - } else { -None - } -} -(keys, values).zipped.foreach { (k, v) => - if (SQLConf.staticConfKeys.contains(k)) { -throw new AnalysisException(s"Cannot modify the value of a static config: $k") - } - conf.setConfString(k, v) -} -try f finally { - keys.zip(currentValues).foreach { -case (key, Some(value)) => conf.setConfString(key, value) -case (key, None) => conf.unsetConf(key) - } -} - } } http://git-wip-us.apache.org/repos/asf/spark/blob/81cbcca6/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SQLHelper.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SQL
svn commit: r29697 - in /dev/spark/2.4.1-SNAPSHOT-2018_09_25_22_02-3f20305-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Wed Sep 26 05:17:17 2018 New Revision: 29697 Log: Apache Spark 2.4.1-SNAPSHOT-2018_09_25_22_02-3f20305 docs [This commit notification would consist of 1472 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: [MINOR][PYTHON] Use a helper in `PythonUtils` instead of direct accessing Scala package
Repository: spark Updated Branches: refs/heads/branch-2.2 4f10aff40 -> ef3616825 [MINOR][PYTHON] Use a helper in `PythonUtils` instead of direct accessing Scala package ## What changes were proposed in this pull request? This PR proposes to use add a helper in `PythonUtils` instead of direct accessing Scala package. ## How was this patch tested? Jenkins tests. Closes #22483 from HyukjinKwon/minor-refactoring. Authored-by: hyukjinkwon Signed-off-by: hyukjinkwon Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8ad6693b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8ad6693b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8ad6693b Branch: refs/heads/branch-2.2 Commit: 8ad6693bd27f3e130fbd51518de880997a1cdcc7 Parents: 4f10aff Author: hyukjinkwon Authored: Fri Sep 21 00:41:42 2018 +0800 Committer: hyukjinkwon Committed: Wed Sep 26 10:50:38 2018 +0800 -- .../src/main/scala/org/apache/spark/api/python/PythonUtils.scala | 4 python/pyspark/context.py| 4 +--- 2 files changed, 5 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8ad6693b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala index 27a5e19..cdce371 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala @@ -74,4 +74,8 @@ private[spark] object PythonUtils { def toScalaMap[K, V](jm: java.util.Map[K, V]): Map[K, V] = { jm.asScala.toMap } + + def getEncryptionEnabled(sc: JavaSparkContext): Boolean = { +sc.conf.get(org.apache.spark.internal.config.IO_ENCRYPTION_ENABLED) + } } http://git-wip-us.apache.org/repos/asf/spark/blob/8ad6693b/python/pyspark/context.py -- diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 68e4c17..171e143 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -192,9 +192,7 @@ class SparkContext(object): # If encryption is enabled, we need to setup a server in the jvm to read broadcast # data via a socket. # scala's mangled names w/ $ in them require special treatment. -encryption_conf = self._jvm.org.apache.spark.internal.config.__getattr__("package$")\ -.__getattr__("MODULE$").IO_ENCRYPTION_ENABLED() -self._encryption_enabled = self._jsc.sc().conf().get(encryption_conf) +self._encryption_enabled = self._jvm.PythonUtils.getEncryptionEnabled(self._jsc) self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python') self.pythonVer = "%d.%d" % sys.version_info[:2] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: [SPARKR] Match pyspark features in SparkR communication protocol
[SPARKR] Match pyspark features in SparkR communication protocol Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ef361682 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ef361682 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ef361682 Branch: refs/heads/branch-2.2 Commit: ef36168258b8ad15362312e0562794f4f07322d0 Parents: 8ad6693 Author: hyukjinkwon Authored: Mon Sep 24 19:25:02 2018 +0800 Committer: hyukjinkwon Committed: Wed Sep 26 10:50:46 2018 +0800 -- R/pkg/R/context.R | 43 ++-- R/pkg/tests/fulltests/test_Serde.R | 32 +++ R/pkg/tests/fulltests/test_sparkSQL.R | 12 -- .../scala/org/apache/spark/api/r/RRDD.scala | 33 ++- .../scala/org/apache/spark/api/r/RUtils.scala | 4 ++ 5 files changed, 98 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ef361682/R/pkg/R/context.R -- diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index 50856e3..c1a12f5 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -168,18 +168,30 @@ parallelize <- function(sc, coll, numSlices = 1) { # 2-tuples of raws serializedSlices <- lapply(slices, serialize, connection = NULL) - # The PRC backend cannot handle arguments larger than 2GB (INT_MAX) + # The RPC backend cannot handle arguments larger than 2GB (INT_MAX) # If serialized data is safely less than that threshold we send it over the PRC channel. # Otherwise, we write it to a file and send the file name if (objectSize < sizeLimit) { jrdd <- callJStatic("org.apache.spark.api.r.RRDD", "createRDDFromArray", sc, serializedSlices) } else { -fileName <- writeToTempFile(serializedSlices) -jrdd <- tryCatch(callJStatic( -"org.apache.spark.api.r.RRDD", "createRDDFromFile", sc, fileName, as.integer(numSlices)), - finally = { -file.remove(fileName) -}) +if (callJStatic("org.apache.spark.api.r.RUtils", "getEncryptionEnabled", sc)) { + # the length of slices here is the parallelism to use in the jvm's sc.parallelize() + parallelism <- as.integer(numSlices) + jserver <- newJObject("org.apache.spark.api.r.RParallelizeServer", sc, parallelism) + authSecret <- callJMethod(jserver, "secret") + port <- callJMethod(jserver, "port") + conn <- socketConnection(port = port, blocking = TRUE, open = "wb", timeout = 1500) + doServerAuth(conn, authSecret) + writeToConnection(serializedSlices, conn) + jrdd <- callJMethod(jserver, "getResult") +} else { + fileName <- writeToTempFile(serializedSlices) + jrdd <- tryCatch(callJStatic( + "org.apache.spark.api.r.RRDD", "createRDDFromFile", sc, fileName, as.integer(numSlices)), +finally = { + file.remove(fileName) + }) +} } RDD(jrdd, "byte") @@ -195,14 +207,21 @@ getMaxAllocationLimit <- function(sc) { )) } +writeToConnection <- function(serializedSlices, conn) { + tryCatch({ +for (slice in serializedSlices) { + writeBin(as.integer(length(slice)), conn, endian = "big") + writeBin(slice, conn, endian = "big") +} + }, finally = { +close(conn) + }) +} + writeToTempFile <- function(serializedSlices) { fileName <- tempfile() conn <- file(fileName, "wb") - for (slice in serializedSlices) { -writeBin(as.integer(length(slice)), conn, endian = "big") -writeBin(slice, conn, endian = "big") - } - close(conn) + writeToConnection(serializedSlices, conn) fileName } http://git-wip-us.apache.org/repos/asf/spark/blob/ef361682/R/pkg/tests/fulltests/test_Serde.R -- diff --git a/R/pkg/tests/fulltests/test_Serde.R b/R/pkg/tests/fulltests/test_Serde.R index 6bbd201..092f9b8 100644 --- a/R/pkg/tests/fulltests/test_Serde.R +++ b/R/pkg/tests/fulltests/test_Serde.R @@ -77,3 +77,35 @@ test_that("SerDe of list of lists", { }) sparkR.session.stop() + +# Note that this test should be at the end of tests since the configruations used here are not +# specific to sessions, and the Spark context is restarted. +test_that("createDataFrame large objects", { + for (encryptionEnabled in list("true", "false")) { +# To simulate a large object scenario, we set spark.r.maxAllocationLimit to a smaller value +conf <- list(spark.r.maxAllocationLimit = "100", + spark.io.encryption.enabled = encryptionEnabled) + +suppressWarnings(sparkR.session(master = sparkRTestMaster, +sparkConfig = conf, +enableHiveSupport = FALSE)) + +sc <- getSparkContext() +actual <- callJSt
svn commit: r29683 - in /dev/spark/2.5.0-SNAPSHOT-2018_09_25_20_02-473d0d8-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Wed Sep 26 03:17:32 2018 New Revision: 29683 Log: Apache Spark 2.5.0-SNAPSHOT-2018_09_25_20_02-473d0d8 docs [This commit notification would consist of 1485 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25514][SQL] Generating pretty JSON by to_json
Repository: spark Updated Branches: refs/heads/master cb77a6689 -> 473d0d862 [SPARK-25514][SQL] Generating pretty JSON by to_json ## What changes were proposed in this pull request? The PR introduces new JSON option `pretty` which allows to turn on `DefaultPrettyPrinter` of `Jackson`'s Json generator. New option is useful in exploring of deep nested columns and in converting of JSON columns in more readable representation (look at the added test). ## How was this patch tested? Added rount trip test which convert an JSON string to pretty representation via `from_json()` and `to_json()`. Closes #22534 from MaxGekk/pretty-json. Lead-authored-by: Maxim Gekk Co-authored-by: Maxim Gekk Signed-off-by: hyukjinkwon Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/473d0d86 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/473d0d86 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/473d0d86 Branch: refs/heads/master Commit: 473d0d862de54ec1c7a8f0354fa5e06f3d66e455 Parents: cb77a66 Author: Maxim Gekk Authored: Wed Sep 26 09:52:15 2018 +0800 Committer: hyukjinkwon Committed: Wed Sep 26 09:52:15 2018 +0800 -- R/pkg/R/functions.R | 5 +++-- python/pyspark/sql/functions.py | 4 +++- .../spark/sql/catalyst/json/JSONOptions.scala | 5 + .../sql/catalyst/json/JacksonGenerator.scala| 5 - .../scala/org/apache/spark/sql/functions.scala | 4 .../apache/spark/sql/JsonFunctionsSuite.scala | 21 6 files changed, 40 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 572dee5..6425c9d 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -198,8 +198,9 @@ NULL #' } #' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains #'additional named properties to control how it is converted, accepts the same -#'options as the JSON data source. In \code{arrays_zip}, this contains additional -#'Columns of arrays to be merged. +#'options as the JSON data source. Additionally \code{to_json} supports the "pretty" +#'option which enables pretty JSON generation. In \code{arrays_zip}, this contains +#'additional Columns of arrays to be merged. #' @name column_collection_functions #' @rdname column_collection_functions #' @family collection functions http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/python/pyspark/sql/functions.py -- diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 6da5237..1c3d972 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2295,7 +2295,9 @@ def to_json(col, options={}): into a JSON string. Throws an exception, in the case of an unsupported type. :param col: name of column containing a struct, an array or a map. -:param options: options to control converting. accepts the same options as the JSON datasource +:param options: options to control converting. accepts the same options as the JSON datasource. +Additionally the function supports the `pretty` option which enables +pretty JSON generation. >>> from pyspark.sql import Row >>> from pyspark.sql.types import * http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala index 47eeb70..64152e0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala @@ -113,6 +113,11 @@ private[sql] class JSONOptions( } val lineSeparatorInWrite: String = lineSeparator.getOrElse("\n") + /** + * Generating JSON strings in pretty representation if the parameter is enabled. + */ + val pretty: Boolean = parameters.get("pretty").map(_.toBoolean).getOrElse(false) + /** Sets config options on a Jackson [[JsonFactory]]. */ def setJacksonOptions(factory: JsonFactory): Unit = { factory.configure(JsonParser.Feature.ALLOW_COMMENTS, allowComments) http://git-wip-us.apache.org/repos/asf/spark/blob/473d0d86/sql/catalyst/src/main/scala/org/apache/spark/sql/catalys
spark git commit: [SPARK-21291][R] add R partitionBy API in DataFrame
Repository: spark Updated Branches: refs/heads/master 8c2edf46d -> cb77a6689 [SPARK-21291][R] add R partitionBy API in DataFrame ## What changes were proposed in this pull request? add R partitionBy API in write.df I didn't add bucketBy in write.df. The last line of write.df is ``` write <- handledCallJMethod(write, "save") ``` save doesn't support bucketBy right now. ``` assertNotBucketed("save") ``` ## How was this patch tested? Add unit test in test_sparkSQL.R Closes #22537 from huaxingao/spark-21291. Authored-by: Huaxin Gao Signed-off-by: hyukjinkwon Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cb77a668 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cb77a668 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cb77a668 Branch: refs/heads/master Commit: cb77a6689137916e64bc5692b0c942e86ca1a0ea Parents: 8c2edf4 Author: Huaxin Gao Authored: Wed Sep 26 09:37:44 2018 +0800 Committer: hyukjinkwon Committed: Wed Sep 26 09:37:44 2018 +0800 -- R/pkg/R/DataFrame.R | 17 +++-- R/pkg/tests/fulltests/test_sparkSQL.R | 8 2 files changed, 23 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cb77a668/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index a1cb478..3469188 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2954,6 +2954,9 @@ setMethod("exceptAll", #' @param source a name for external data source. #' @param mode one of 'append', 'overwrite', 'error', 'errorifexists', 'ignore' #' save mode (it is 'error' by default) +#' @param partitionBy a name or a list of names of columns to partition the output by on the file +#'system. If specified, the output is laid out on the file system similar +#'to Hive's partitioning scheme. #' @param ... additional argument(s) passed to the method. #' #' @family SparkDataFrame functions @@ -2965,13 +2968,13 @@ setMethod("exceptAll", #' sparkR.session() #' path <- "path/to/file.json" #' df <- read.json(path) -#' write.df(df, "myfile", "parquet", "overwrite") +#' write.df(df, "myfile", "parquet", "overwrite", partitionBy = c("col1", "col2")) #' saveDF(df, parquetPath2, "parquet", mode = "append", mergeSchema = TRUE) #' } #' @note write.df since 1.4.0 setMethod("write.df", signature(df = "SparkDataFrame"), - function(df, path = NULL, source = NULL, mode = "error", ...) { + function(df, path = NULL, source = NULL, mode = "error", partitionBy = NULL, ...) { if (!is.null(path) && !is.character(path)) { stop("path should be character, NULL or omitted.") } @@ -2985,8 +2988,18 @@ setMethod("write.df", if (is.null(source)) { source <- getDefaultSqlSource() } +cols <- NULL +if (!is.null(partitionBy)) { + if (!all(sapply(partitionBy, function(c) is.character(c { +stop("All partitionBy column names should be characters.") + } + cols <- as.list(partitionBy) +} write <- callJMethod(df@sdf, "write") write <- callJMethod(write, "format", source) +if (!is.null(cols)) { + write <- callJMethod(write, "partitionBy", cols) +} write <- setWriteOptions(write, path = path, mode = mode, ...) write <- handledCallJMethod(write, "save") }) http://git-wip-us.apache.org/repos/asf/spark/blob/cb77a668/R/pkg/tests/fulltests/test_sparkSQL.R -- diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index a874bfb..50eff37 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -2701,8 +2701,16 @@ test_that("read/write text files", { expect_equal(colnames(df2), c("value")) expect_equal(count(df2), count(df) * 2) + df3 <- createDataFrame(list(list(1L, "1"), list(2L, "2"), list(1L, "1"), list(2L, "2")), + schema = c("key", "value")) + textPath3 <- tempfile(pattern = "textPath3", fileext = ".txt") + write.df(df3, textPath3, "text", mode = "overwrite", partitionBy = "key") + df4 <- read.df(textPath3, "text") + expect_equal(count(df3), count(df4)) + unlink(textPath) unlink(textPath2) + unlink(textPath3) }) test_that("read/write text files - compression option", { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@
spark git commit: [SPARK-24324][PYTHON][FOLLOW-UP] Rename the Conf to spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName
Repository: spark Updated Branches: refs/heads/branch-2.4 f91247f81 -> 3f203050a [SPARK-24324][PYTHON][FOLLOW-UP] Rename the Conf to spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName ## What changes were proposed in this pull request? Add the legacy prefix for spark.sql.execution.pandas.groupedMap.assignColumnsByPosition and rename it to spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName ## How was this patch tested? The existing tests. Closes #22540 from gatorsmile/renameAssignColumnsByPosition. Lead-authored-by: gatorsmile Co-authored-by: Hyukjin Kwon Signed-off-by: hyukjinkwon (cherry picked from commit 8c2edf46d0f89e5ec54968218d89f30a3f8190bc) Signed-off-by: hyukjinkwon Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3f203050 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3f203050 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3f203050 Branch: refs/heads/branch-2.4 Commit: 3f203050ac764516e68fb43628bba0df5963e44d Parents: f91247f Author: gatorsmile Authored: Wed Sep 26 09:32:51 2018 +0800 Committer: hyukjinkwon Committed: Wed Sep 26 09:33:13 2018 +0800 -- python/pyspark/sql/tests.py | 3 ++- python/pyspark/worker.py | 7 --- .../org/apache/spark/sql/internal/SQLConf.scala | 18 +- .../spark/sql/execution/arrow/ArrowUtils.scala| 9 +++-- 4 files changed, 18 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3f203050/python/pyspark/sql/tests.py -- diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 9fa1577..cb186de 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -5799,7 +5799,8 @@ class GroupedMapPandasUDFTests(ReusedSQLTestCase): import pandas as pd from pyspark.sql.functions import pandas_udf, PandasUDFType -with self.sql_conf({"spark.sql.execution.pandas.groupedMap.assignColumnsByPosition": True}): +with self.sql_conf({ + "spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName": False}): @pandas_udf("a string, b float", PandasUDFType.GROUPED_MAP) def foo(_): http://git-wip-us.apache.org/repos/asf/spark/blob/3f203050/python/pyspark/worker.py -- diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py index 974344f..8c59f1f 100644 --- a/python/pyspark/worker.py +++ b/python/pyspark/worker.py @@ -97,8 +97,9 @@ def wrap_scalar_pandas_udf(f, return_type): def wrap_grouped_map_pandas_udf(f, return_type, argspec, runner_conf): -assign_cols_by_pos = runner_conf.get( -"spark.sql.execution.pandas.groupedMap.assignColumnsByPosition", False) +assign_cols_by_name = runner_conf.get( +"spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName", "true") +assign_cols_by_name = assign_cols_by_name.lower() == "true" def wrapped(key_series, value_series): import pandas as pd @@ -119,7 +120,7 @@ def wrap_grouped_map_pandas_udf(f, return_type, argspec, runner_conf): "Expected: {} Actual: {}".format(len(return_type), len(result.columns))) # Assign result columns by schema name if user labeled with strings, else use position -if not assign_cols_by_pos and any(isinstance(name, basestring) for name in result.columns): +if assign_cols_by_name and any(isinstance(name, basestring) for name in result.columns): return [(result[field.name], to_arrow_type(field.dataType)) for field in return_type] else: return [(result[result.columns[i]], to_arrow_type(field.dataType)) http://git-wip-us.apache.org/repos/asf/spark/blob/3f203050/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 2788402..68daf9d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1275,15 +1275,15 @@ object SQLConf { .booleanConf .createWithDefault(true) - val PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_POSITION = -buildConf("spark.sql.execution.pandas.groupedMap.assignColumnsByPosition") + val PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME = + buildConf("spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName") .internal() - .doc("When true, a grouped map P
spark git commit: [SPARK-24324][PYTHON][FOLLOW-UP] Rename the Conf to spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName
Repository: spark Updated Branches: refs/heads/master 9bb3a0c67 -> 8c2edf46d [SPARK-24324][PYTHON][FOLLOW-UP] Rename the Conf to spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName ## What changes were proposed in this pull request? Add the legacy prefix for spark.sql.execution.pandas.groupedMap.assignColumnsByPosition and rename it to spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName ## How was this patch tested? The existing tests. Closes #22540 from gatorsmile/renameAssignColumnsByPosition. Lead-authored-by: gatorsmile Co-authored-by: Hyukjin Kwon Signed-off-by: hyukjinkwon Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8c2edf46 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8c2edf46 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8c2edf46 Branch: refs/heads/master Commit: 8c2edf46d0f89e5ec54968218d89f30a3f8190bc Parents: 9bb3a0c Author: gatorsmile Authored: Wed Sep 26 09:32:51 2018 +0800 Committer: hyukjinkwon Committed: Wed Sep 26 09:32:51 2018 +0800 -- python/pyspark/sql/tests.py | 3 ++- python/pyspark/worker.py | 7 --- .../org/apache/spark/sql/internal/SQLConf.scala | 18 +- .../spark/sql/execution/arrow/ArrowUtils.scala| 9 +++-- 4 files changed, 18 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8c2edf46/python/pyspark/sql/tests.py -- diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index b829bae..74642d4 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -5802,7 +5802,8 @@ class GroupedMapPandasUDFTests(ReusedSQLTestCase): import pandas as pd from pyspark.sql.functions import pandas_udf, PandasUDFType -with self.sql_conf({"spark.sql.execution.pandas.groupedMap.assignColumnsByPosition": True}): +with self.sql_conf({ + "spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName": False}): @pandas_udf("a string, b float", PandasUDFType.GROUPED_MAP) def foo(_): http://git-wip-us.apache.org/repos/asf/spark/blob/8c2edf46/python/pyspark/worker.py -- diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py index 974344f..8c59f1f 100644 --- a/python/pyspark/worker.py +++ b/python/pyspark/worker.py @@ -97,8 +97,9 @@ def wrap_scalar_pandas_udf(f, return_type): def wrap_grouped_map_pandas_udf(f, return_type, argspec, runner_conf): -assign_cols_by_pos = runner_conf.get( -"spark.sql.execution.pandas.groupedMap.assignColumnsByPosition", False) +assign_cols_by_name = runner_conf.get( +"spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName", "true") +assign_cols_by_name = assign_cols_by_name.lower() == "true" def wrapped(key_series, value_series): import pandas as pd @@ -119,7 +120,7 @@ def wrap_grouped_map_pandas_udf(f, return_type, argspec, runner_conf): "Expected: {} Actual: {}".format(len(return_type), len(result.columns))) # Assign result columns by schema name if user labeled with strings, else use position -if not assign_cols_by_pos and any(isinstance(name, basestring) for name in result.columns): +if assign_cols_by_name and any(isinstance(name, basestring) for name in result.columns): return [(result[field.name], to_arrow_type(field.dataType)) for field in return_type] else: return [(result[result.columns[i]], to_arrow_type(field.dataType)) http://git-wip-us.apache.org/repos/asf/spark/blob/8c2edf46/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 0e0a01d..e7c9a83 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1295,15 +1295,15 @@ object SQLConf { .booleanConf .createWithDefault(true) - val PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_POSITION = -buildConf("spark.sql.execution.pandas.groupedMap.assignColumnsByPosition") + val PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME = + buildConf("spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName") .internal() - .doc("When true, a grouped map Pandas UDF will assign columns from the returned " + -"Pandas DataFrame based on position, regard
svn commit: r29678 - in /dev/spark/2.4.1-SNAPSHOT-2018_09_25_18_02-f91247f-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Wed Sep 26 01:16:56 2018 New Revision: 29678 Log: Apache Spark 2.4.1-SNAPSHOT-2018_09_25_18_02-f91247f docs [This commit notification would consist of 1472 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25422][CORE] Don't memory map blocks streamed to disk.
Repository: spark Updated Branches: refs/heads/branch-2.4 544f86a69 -> f91247f81 [SPARK-25422][CORE] Don't memory map blocks streamed to disk. After data has been streamed to disk, the buffers are inserted into the memory store in some cases (eg., with broadcast blocks). But broadcast code also disposes of those buffers when the data has been read, to ensure that we don't leave mapped buffers using up memory, which then leads to garbage data in the memory store. ## How was this patch tested? Ran the old failing test in a loop. Full tests on jenkins Closes #22546 from squito/SPARK-25422-master. Authored-by: Imran Rashid Signed-off-by: Wenchen Fan (cherry picked from commit 9bb3a0c67bd851b09ff4701ef1d280e2a77d791b) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f91247f8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f91247f8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f91247f8 Branch: refs/heads/branch-2.4 Commit: f91247f812f87daa9fe4ec23b100f2310254df22 Parents: 544f86a Author: Imran Rashid Authored: Wed Sep 26 08:45:27 2018 +0800 Committer: Wenchen Fan Committed: Wed Sep 26 08:45:56 2018 +0800 -- .../org/apache/spark/storage/BlockManager.scala | 13 +++--- .../spark/util/io/ChunkedByteBuffer.scala | 47 +++- 2 files changed, 31 insertions(+), 29 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f91247f8/core/src/main/scala/org/apache/spark/storage/BlockManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index 2234146..0fe82ac 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -438,10 +438,8 @@ private[spark] class BlockManager( // stream. channel.close() // TODO SPARK-25035 Even if we're only going to write the data to disk after this, we end up -// using a lot of memory here. With encryption, we'll read the whole file into a regular -// byte buffer and OOM. Without encryption, we'll memory map the file and won't get a jvm -// OOM, but might get killed by the OS / cluster manager. We could at least read the tmp -// file as a stream in both cases. +// using a lot of memory here. We'll read the whole file into a regular +// byte buffer and OOM. We could at least read the tmp file as a stream. val buffer = securityManager.getIOEncryptionKey() match { case Some(key) => // we need to pass in the size of the unencrypted block @@ -453,7 +451,7 @@ private[spark] class BlockManager( new EncryptedBlockData(tmpFile, blockSize, conf, key).toChunkedByteBuffer(allocator) case None => -ChunkedByteBuffer.map(tmpFile, conf.get(config.MEMORY_MAP_LIMIT_FOR_TESTS).toInt) +ChunkedByteBuffer.fromFile(tmpFile, conf.get(config.MEMORY_MAP_LIMIT_FOR_TESTS).toInt) } putBytes(blockId, buffer, level)(classTag) tmpFile.delete() @@ -726,10 +724,9 @@ private[spark] class BlockManager( */ def getRemoteBytes(blockId: BlockId): Option[ChunkedByteBuffer] = { // TODO if we change this method to return the ManagedBuffer, then getRemoteValues -// could just use the inputStream on the temp file, rather than memory-mapping the file. +// could just use the inputStream on the temp file, rather than reading the file into memory. // Until then, replication can cause the process to use too much memory and get killed -// by the OS / cluster manager (not a java OOM, since it's a memory-mapped file) even though -// we've read the data to disk. +// even though we've read the data to disk. logDebug(s"Getting remote block $blockId") require(blockId != null, "BlockId is null") var runningFailureCount = 0 http://git-wip-us.apache.org/repos/asf/spark/blob/f91247f8/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala index 39f050f..4aa8d45 100644 --- a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala +++ b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala @@ -19,17 +19,16 @@ package org.apache.spark.util.io import java.io.{File, FileInputStream, InputStream} import java.nio.ByteBuffer -import java.nio.channels.{FileChannel, WritableByteChannel} -impor
spark git commit: [SPARK-25422][CORE] Don't memory map blocks streamed to disk.
Repository: spark Updated Branches: refs/heads/master 66d29870c -> 9bb3a0c67 [SPARK-25422][CORE] Don't memory map blocks streamed to disk. After data has been streamed to disk, the buffers are inserted into the memory store in some cases (eg., with broadcast blocks). But broadcast code also disposes of those buffers when the data has been read, to ensure that we don't leave mapped buffers using up memory, which then leads to garbage data in the memory store. ## How was this patch tested? Ran the old failing test in a loop. Full tests on jenkins Closes #22546 from squito/SPARK-25422-master. Authored-by: Imran Rashid Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9bb3a0c6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9bb3a0c6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9bb3a0c6 Branch: refs/heads/master Commit: 9bb3a0c67bd851b09ff4701ef1d280e2a77d791b Parents: 66d2987 Author: Imran Rashid Authored: Wed Sep 26 08:45:27 2018 +0800 Committer: Wenchen Fan Committed: Wed Sep 26 08:45:27 2018 +0800 -- .../org/apache/spark/storage/BlockManager.scala | 13 +++--- .../spark/util/io/ChunkedByteBuffer.scala | 47 +++- 2 files changed, 31 insertions(+), 29 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9bb3a0c6/core/src/main/scala/org/apache/spark/storage/BlockManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index 2234146..0fe82ac 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -438,10 +438,8 @@ private[spark] class BlockManager( // stream. channel.close() // TODO SPARK-25035 Even if we're only going to write the data to disk after this, we end up -// using a lot of memory here. With encryption, we'll read the whole file into a regular -// byte buffer and OOM. Without encryption, we'll memory map the file and won't get a jvm -// OOM, but might get killed by the OS / cluster manager. We could at least read the tmp -// file as a stream in both cases. +// using a lot of memory here. We'll read the whole file into a regular +// byte buffer and OOM. We could at least read the tmp file as a stream. val buffer = securityManager.getIOEncryptionKey() match { case Some(key) => // we need to pass in the size of the unencrypted block @@ -453,7 +451,7 @@ private[spark] class BlockManager( new EncryptedBlockData(tmpFile, blockSize, conf, key).toChunkedByteBuffer(allocator) case None => -ChunkedByteBuffer.map(tmpFile, conf.get(config.MEMORY_MAP_LIMIT_FOR_TESTS).toInt) +ChunkedByteBuffer.fromFile(tmpFile, conf.get(config.MEMORY_MAP_LIMIT_FOR_TESTS).toInt) } putBytes(blockId, buffer, level)(classTag) tmpFile.delete() @@ -726,10 +724,9 @@ private[spark] class BlockManager( */ def getRemoteBytes(blockId: BlockId): Option[ChunkedByteBuffer] = { // TODO if we change this method to return the ManagedBuffer, then getRemoteValues -// could just use the inputStream on the temp file, rather than memory-mapping the file. +// could just use the inputStream on the temp file, rather than reading the file into memory. // Until then, replication can cause the process to use too much memory and get killed -// by the OS / cluster manager (not a java OOM, since it's a memory-mapped file) even though -// we've read the data to disk. +// even though we've read the data to disk. logDebug(s"Getting remote block $blockId") require(blockId != null, "BlockId is null") var runningFailureCount = 0 http://git-wip-us.apache.org/repos/asf/spark/blob/9bb3a0c6/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala index 39f050f..4aa8d45 100644 --- a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala +++ b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala @@ -19,17 +19,16 @@ package org.apache.spark.util.io import java.io.{File, FileInputStream, InputStream} import java.nio.ByteBuffer -import java.nio.channels.{FileChannel, WritableByteChannel} -import java.nio.file.StandardOpenOption - -import scala.collection.mutable.ListBuffer +import java.nio.channel
svn commit: r29675 - in /dev/spark/2.4.1-SNAPSHOT-2018_09_25_14_02-544f86a-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Tue Sep 25 21:17:02 2018 New Revision: 29675 Log: Apache Spark 2.4.1-SNAPSHOT-2018_09_25_14_02-544f86a docs [This commit notification would consist of 1472 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r29672 - in /dev/spark/2.5.0-SNAPSHOT-2018_09_25_12_02-66d2987-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Tue Sep 25 19:17:33 2018 New Revision: 29672 Log: Apache Spark 2.5.0-SNAPSHOT-2018_09_25_12_02-66d2987 docs [This commit notification would consist of 1485 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25495][SS] FetchedData.reset should reset all fields
Repository: spark Updated Branches: refs/heads/branch-2.4 a709718da -> 544f86a69 [SPARK-25495][SS] FetchedData.reset should reset all fields ## What changes were proposed in this pull request? `FetchedData.reset` should reset `_nextOffsetInFetchedData` and `_offsetAfterPoll`. Otherwise it will cause inconsistent cached data and may make Kafka connector return wrong results. ## How was this patch tested? The new unit test. Closes #22507 from zsxwing/fix-kafka-reset. Lead-authored-by: Shixiong Zhu Co-authored-by: Shixiong Zhu Signed-off-by: Shixiong Zhu (cherry picked from commit 66d29870c09e6050dd846336e596faaa8b0d14ad) Signed-off-by: Shixiong Zhu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/544f86a6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/544f86a6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/544f86a6 Branch: refs/heads/branch-2.4 Commit: 544f86a69bba94dfcb241e41c799ed63ef4210fc Parents: a709718 Author: Shixiong Zhu Authored: Tue Sep 25 11:42:27 2018 -0700 Committer: Shixiong Zhu Committed: Tue Sep 25 11:42:39 2018 -0700 -- .../spark/sql/kafka010/KafkaDataConsumer.scala | 5 +- .../kafka010/KafkaMicroBatchSourceSuite.scala | 52 2 files changed, 56 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/544f86a6/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala -- diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala index ceb9e31..7b1314b 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala @@ -134,6 +134,8 @@ private[kafka010] case class InternalKafkaConsumer( /** Reset the internal pre-fetched data. */ def reset(): Unit = { _records = ju.Collections.emptyListIterator() + _nextOffsetInFetchedData = UNKNOWN_OFFSET + _offsetAfterPoll = UNKNOWN_OFFSET } /** @@ -361,8 +363,9 @@ private[kafka010] case class InternalKafkaConsumer( if (offset < fetchedData.offsetAfterPoll) { // Offsets in [offset, fetchedData.offsetAfterPoll) are invisible. Return a record to ask // the next call to start from `fetchedData.offsetAfterPoll`. +val nextOffsetToFetch = fetchedData.offsetAfterPoll fetchedData.reset() -return fetchedRecord.withRecord(null, fetchedData.offsetAfterPoll) +return fetchedRecord.withRecord(null, nextOffsetToFetch) } else { // Fetch records from Kafka and update `fetchedData`. fetchData(offset, pollTimeoutMs) http://git-wip-us.apache.org/repos/asf/spark/blob/544f86a6/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala -- diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala index 65615fd..e0b6d8c 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala @@ -853,6 +853,58 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { ) } } + + test("SPARK-25495: FetchedData.reset should reset all fields") { +val topic = newTopic() +val topicPartition = new TopicPartition(topic, 0) +testUtils.createTopic(topic, partitions = 1) + +val ds = spark + .readStream + .format("kafka") + .option("kafka.bootstrap.servers", testUtils.brokerAddress) + .option("kafka.metadata.max.age.ms", "1") + .option("kafka.isolation.level", "read_committed") + .option("subscribe", topic) + .option("startingOffsets", "earliest") + .load() + .select($"value".as[String]) + +testUtils.withTranscationalProducer { producer => + producer.beginTransaction() + (0 to 3).foreach { i => +producer.send(new ProducerRecord[String, String](topic, i.toString)).get() + } + producer.commitTransaction() +} +testUtils.waitUntilOffsetAppears(topicPartition, 5) + +val q = ds.writeStream.foreachBatch { (ds, epochId) => + if (epochId == 0) { +// Send more message before the tasks of the current batch start reading the current bat
spark git commit: [SPARK-25495][SS] FetchedData.reset should reset all fields
Repository: spark Updated Branches: refs/heads/master 04db03537 -> 66d29870c [SPARK-25495][SS] FetchedData.reset should reset all fields ## What changes were proposed in this pull request? `FetchedData.reset` should reset `_nextOffsetInFetchedData` and `_offsetAfterPoll`. Otherwise it will cause inconsistent cached data and may make Kafka connector return wrong results. ## How was this patch tested? The new unit test. Closes #22507 from zsxwing/fix-kafka-reset. Lead-authored-by: Shixiong Zhu Co-authored-by: Shixiong Zhu Signed-off-by: Shixiong Zhu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/66d29870 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/66d29870 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/66d29870 Branch: refs/heads/master Commit: 66d29870c09e6050dd846336e596faaa8b0d14ad Parents: 04db035 Author: Shixiong Zhu Authored: Tue Sep 25 11:42:27 2018 -0700 Committer: Shixiong Zhu Committed: Tue Sep 25 11:42:27 2018 -0700 -- .../spark/sql/kafka010/KafkaDataConsumer.scala | 5 +- .../kafka010/KafkaMicroBatchSourceSuite.scala | 52 2 files changed, 56 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/66d29870/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala -- diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala index ceb9e31..7b1314b 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala @@ -134,6 +134,8 @@ private[kafka010] case class InternalKafkaConsumer( /** Reset the internal pre-fetched data. */ def reset(): Unit = { _records = ju.Collections.emptyListIterator() + _nextOffsetInFetchedData = UNKNOWN_OFFSET + _offsetAfterPoll = UNKNOWN_OFFSET } /** @@ -361,8 +363,9 @@ private[kafka010] case class InternalKafkaConsumer( if (offset < fetchedData.offsetAfterPoll) { // Offsets in [offset, fetchedData.offsetAfterPoll) are invisible. Return a record to ask // the next call to start from `fetchedData.offsetAfterPoll`. +val nextOffsetToFetch = fetchedData.offsetAfterPoll fetchedData.reset() -return fetchedRecord.withRecord(null, fetchedData.offsetAfterPoll) +return fetchedRecord.withRecord(null, nextOffsetToFetch) } else { // Fetch records from Kafka and update `fetchedData`. fetchData(offset, pollTimeoutMs) http://git-wip-us.apache.org/repos/asf/spark/blob/66d29870/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala -- diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala index e5f0088..39c2cde 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala @@ -874,6 +874,58 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { ) } } + + test("SPARK-25495: FetchedData.reset should reset all fields") { +val topic = newTopic() +val topicPartition = new TopicPartition(topic, 0) +testUtils.createTopic(topic, partitions = 1) + +val ds = spark + .readStream + .format("kafka") + .option("kafka.bootstrap.servers", testUtils.brokerAddress) + .option("kafka.metadata.max.age.ms", "1") + .option("kafka.isolation.level", "read_committed") + .option("subscribe", topic) + .option("startingOffsets", "earliest") + .load() + .select($"value".as[String]) + +testUtils.withTranscationalProducer { producer => + producer.beginTransaction() + (0 to 3).foreach { i => +producer.send(new ProducerRecord[String, String](topic, i.toString)).get() + } + producer.commitTransaction() +} +testUtils.waitUntilOffsetAppears(topicPartition, 5) + +val q = ds.writeStream.foreachBatch { (ds, epochId) => + if (epochId == 0) { +// Send more message before the tasks of the current batch start reading the current batch +// data, so that the executors will prefetch messages in the next batch and drop them. In +
spark git commit: [SPARK-25486][TEST] Refactor SortBenchmark to use main method
Repository: spark Updated Branches: refs/heads/master 9cbd001e2 -> 04db03537 [SPARK-25486][TEST] Refactor SortBenchmark to use main method ## What changes were proposed in this pull request? Refactor SortBenchmark to use main method. Generate benchmark result: ``` SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.benchmark.SortBenchmark" ``` ## How was this patch tested? manual tests Closes #22495 from yucai/SPARK-25486. Authored-by: yucai Signed-off-by: Dongjoon Hyun Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/04db0353 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/04db0353 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/04db0353 Branch: refs/heads/master Commit: 04db035378012907c93f6e5b4faa6ec11f1fc67b Parents: 9cbd001 Author: yucai Authored: Tue Sep 25 11:13:05 2018 -0700 Committer: Dongjoon Hyun Committed: Tue Sep 25 11:13:05 2018 -0700 -- sql/core/benchmarks/SortBenchmark-results.txt | 17 + .../sql/execution/benchmark/SortBenchmark.scala | 38 +--- 2 files changed, 33 insertions(+), 22 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/04db0353/sql/core/benchmarks/SortBenchmark-results.txt -- diff --git a/sql/core/benchmarks/SortBenchmark-results.txt b/sql/core/benchmarks/SortBenchmark-results.txt new file mode 100644 index 000..0d00a0c --- /dev/null +++ b/sql/core/benchmarks/SortBenchmark-results.txt @@ -0,0 +1,17 @@ + +radix sort + + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +radix sort 2500: Best/Avg Time(ms)Rate(M/s) Per Row(ns) Relative + +reference TimSort key prefix array 11770 / 11960 2.1 470.8 1.0X +reference Arrays.sort 2106 / 2128 11.9 84.3 5.6X +radix sort one byte 93 / 100269.7 3.7 126.9X +radix sort two bytes 171 / 179146.0 6.9 68.7X +radix sort eight bytes 659 / 664 37.9 26.4 17.9X +radix sort key prefix array 1024 / 1053 24.4 41.0 11.5X + + http://git-wip-us.apache.org/repos/asf/spark/blob/04db0353/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala index 17619ec..958a064 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.benchmark import java.util.{Arrays, Comparator} -import org.apache.spark.benchmark.Benchmark +import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} import org.apache.spark.unsafe.array.LongArray import org.apache.spark.unsafe.memory.MemoryBlock import org.apache.spark.util.collection.Sorter @@ -28,12 +28,15 @@ import org.apache.spark.util.random.XORShiftRandom /** * Benchmark to measure performance for aggregate primitives. - * To run this: - * build/sbt "sql/test-only *benchmark.SortBenchmark" - * - * Benchmarks in this file are skipped in normal builds. + * {{{ + * To run this benchmark: + * 1. without sbt: bin/spark-submit --class + * 2. build/sbt "sql/test:runMain " + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * Results will be written to "benchmarks/-results.txt". + * }}} */ -class SortBenchmark extends BenchmarkWithCodegen { +object SortBenchmark extends BenchmarkBase { private def referenceKeyPrefixSort(buf: LongArray, lo: Int, hi: Int, refCmp: PrefixComparator) { val sortBuffer = new LongArray(MemoryBlock.fromLongArray(new Array[Long](buf.size().toInt))) @@ -54,10 +57,10 @@ class SortBenchmark extends BenchmarkWithCodegen { new LongArray(MemoryBlock.fromLongArray(extended))) } - ignore("sort") { + def sortBenchmark(): Unit = { val size = 2500 val rand = new XORShiftRandom(123) -val benchmark = new Bench
[3/3] spark git commit: [PYSPARK][SQL] Updates to RowQueue
[PYSPARK][SQL] Updates to RowQueue Tested with updates to RowQueueSuite (cherry picked from commit 6d742d1bd71aa3803dce91a830b37284cb18cf70) Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f10aff4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f10aff4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f10aff4 Branch: refs/heads/branch-2.2 Commit: 4f10aff403ccc8287a816cb94ddf7f11e185907a Parents: dd0e7cf Author: Imran Rashid Authored: Thu Sep 6 12:11:47 2018 -0500 Committer: Imran Rashid Committed: Tue Sep 25 11:46:06 2018 -0500 -- .../spark/sql/execution/python/RowQueue.scala | 27 ++- .../sql/execution/python/RowQueueSuite.scala| 28 +++- 2 files changed, 41 insertions(+), 14 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4f10aff4/sql/core/src/main/scala/org/apache/spark/sql/execution/python/RowQueue.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/RowQueue.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/RowQueue.scala index cd1e77f..4d6820c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/RowQueue.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/RowQueue.scala @@ -21,9 +21,10 @@ import java.io._ import com.google.common.io.Closeables -import org.apache.spark.SparkException +import org.apache.spark.{SparkEnv, SparkException} import org.apache.spark.io.NioBufferedFileInputStream import org.apache.spark.memory.{MemoryConsumer, TaskMemoryManager} +import org.apache.spark.serializer.SerializerManager import org.apache.spark.sql.catalyst.expressions.UnsafeRow import org.apache.spark.unsafe.Platform import org.apache.spark.unsafe.memory.MemoryBlock @@ -108,9 +109,13 @@ private[python] abstract class InMemoryRowQueue(val page: MemoryBlock, numFields * A RowQueue that is backed by a file on disk. This queue will stop accepting new rows once any * reader has begun reading from the queue. */ -private[python] case class DiskRowQueue(file: File, fields: Int) extends RowQueue { - private var out = new DataOutputStream( -new BufferedOutputStream(new FileOutputStream(file.toString))) +private[python] case class DiskRowQueue( +file: File, +fields: Int, +serMgr: SerializerManager) extends RowQueue { + + private var out = new DataOutputStream(serMgr.wrapForEncryption( +new BufferedOutputStream(new FileOutputStream(file.toString private var unreadBytes = 0L private var in: DataInputStream = _ @@ -131,7 +136,8 @@ private[python] case class DiskRowQueue(file: File, fields: Int) extends RowQueu if (out != null) { out.close() out = null - in = new DataInputStream(new NioBufferedFileInputStream(file)) + in = new DataInputStream(serMgr.wrapForEncryption( +new NioBufferedFileInputStream(file))) } if (unreadBytes > 0) { @@ -166,7 +172,8 @@ private[python] case class DiskRowQueue(file: File, fields: Int) extends RowQueu private[python] case class HybridRowQueue( memManager: TaskMemoryManager, tempDir: File, -numFields: Int) +numFields: Int, +serMgr: SerializerManager) extends MemoryConsumer(memManager) with RowQueue { // Each buffer should have at least one row @@ -212,7 +219,7 @@ private[python] case class HybridRowQueue( } private def createDiskQueue(): RowQueue = { -DiskRowQueue(File.createTempFile("buffer", "", tempDir), numFields) +DiskRowQueue(File.createTempFile("buffer", "", tempDir), numFields, serMgr) } private def createNewQueue(required: Long): RowQueue = { @@ -279,3 +286,9 @@ private[python] case class HybridRowQueue( } } } + +private[python] object HybridRowQueue { + def apply(taskMemoryMgr: TaskMemoryManager, file: File, fields: Int): HybridRowQueue = { +HybridRowQueue(taskMemoryMgr, file, fields, SparkEnv.get.serializerManager) + } +} http://git-wip-us.apache.org/repos/asf/spark/blob/4f10aff4/sql/core/src/test/scala/org/apache/spark/sql/execution/python/RowQueueSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/RowQueueSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/RowQueueSuite.scala index ffda33c..1ec9986 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/RowQueueSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/RowQueueSuite.scala @@ -20,12 +20,15 @@ package org.apache.spark.sql.execution.python import java.io.File import org.apache.spark.{SparkConf, SparkFunSuite} -import org.apache
[1/3] spark git commit: [SPARK-25253][PYSPARK] Refactor local connection & auth code
Repository: spark Updated Branches: refs/heads/branch-2.2 bd12eb75d -> 4f10aff40 [SPARK-25253][PYSPARK] Refactor local connection & auth code This eliminates some duplication in the code to connect to a server on localhost to talk directly to the jvm. Also it gives consistent ipv6 and error handling. Two other incidental changes, that shouldn't matter: 1) python barrier tasks perform authentication immediately (rather than waiting for the BARRIER_FUNCTION indicator) 2) for `rdd._load_from_socket`, the timeout is only increased after authentication. Closes #22247 from squito/py_connection_refactor. Authored-by: Imran Rashid Signed-off-by: hyukjinkwon (cherry picked from commit 38391c9aa8a88fcebb337934f30298a32d91596b) (cherry picked from commit a2a54a5f49364a1825932c9f04eb0ff82dd7d465) Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc1c4e7d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc1c4e7d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc1c4e7d Branch: refs/heads/branch-2.2 Commit: fc1c4e7d24f7d0afb3b79d66aa9812e7dddc2f38 Parents: bd12eb7 Author: Imran Rashid Authored: Wed Aug 29 09:47:38 2018 +0800 Committer: Imran Rashid Committed: Tue Sep 25 11:45:59 2018 -0500 -- python/pyspark/java_gateway.py | 32 +++- python/pyspark/rdd.py | 24 ++-- python/pyspark/worker.py | 7 ++- 3 files changed, 35 insertions(+), 28 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fc1c4e7d/python/pyspark/java_gateway.py -- diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index 7abf2c1..191dfce 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -133,7 +133,7 @@ def launch_gateway(conf=None): return gateway -def do_server_auth(conn, auth_secret): +def _do_server_auth(conn, auth_secret): """ Performs the authentication protocol defined by the SocketAuthHelper class on the given file-like object 'conn'. @@ -144,3 +144,33 @@ def do_server_auth(conn, auth_secret): if reply != "ok": conn.close() raise Exception("Unexpected reply from iterator server.") + + +def local_connect_and_auth(port, auth_secret): +""" +Connect to local host, authenticate with it, and return a (sockfile,sock) for that connection. +Handles IPV4 & IPV6, does some error handling. +:param port +:param auth_secret +:return: a tuple with (sockfile, sock) +""" +sock = None +errors = [] +# Support for both IPv4 and IPv6. +# On most of IPv6-ready systems, IPv6 will take precedence. +for res in socket.getaddrinfo("127.0.0.1", port, socket.AF_UNSPEC, socket.SOCK_STREAM): +af, socktype, proto, _, sa = res +try: +sock = socket.socket(af, socktype, proto) +sock.settimeout(15) +sock.connect(sa) +sockfile = sock.makefile("rwb", 65536) +_do_server_auth(sockfile, auth_secret) +return (sockfile, sock) +except socket.error as e: +emsg = _exception_message(e) +errors.append("tried to connect to %s, but an error occured: %s" % (sa, emsg)) +sock.close() +sock = None +else: +raise Exception("could not open socket: %s" % errors) http://git-wip-us.apache.org/repos/asf/spark/blob/fc1c4e7d/python/pyspark/rdd.py -- diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 864cebb..7d84cbd 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -39,7 +39,7 @@ if sys.version > '3': else: from itertools import imap as map, ifilter as filter -from pyspark.java_gateway import do_server_auth +from pyspark.java_gateway import local_connect_and_auth from pyspark.serializers import NoOpSerializer, CartesianDeserializer, \ BatchedSerializer, CloudPickleSerializer, PairDeserializer, \ PickleSerializer, pack_long, AutoBatchedSerializer, write_with_length, \ @@ -122,30 +122,10 @@ def _parse_memory(s): def _load_from_socket(sock_info, serializer): -port, auth_secret = sock_info -sock = None -# Support for both IPv4 and IPv6. -# On most of IPv6-ready systems, IPv6 will take precedence. -for res in socket.getaddrinfo("localhost", port, socket.AF_UNSPEC, socket.SOCK_STREAM): -af, socktype, proto, canonname, sa = res -sock = socket.socket(af, socktype, proto) -try: -sock.settimeout(15) -sock.connect(sa) -except socket.error: -sock.close() -sock = None -continue -break -
[2/3] spark git commit: [PYSPARK] Updates to pyspark broadcast
[PYSPARK] Updates to pyspark broadcast (cherry picked from commit 09dd34cb1706f2477a89174d6a1a0f17ed5b0a65) Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dd0e7cf5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dd0e7cf5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dd0e7cf5 Branch: refs/heads/branch-2.2 Commit: dd0e7cf5287148618404593ca095dd900b6e993f Parents: fc1c4e7 Author: Imran Rashid Authored: Mon Aug 13 21:35:34 2018 -0500 Committer: Imran Rashid Committed: Tue Sep 25 11:46:03 2018 -0500 -- .../org/apache/spark/api/python/PythonRDD.scala | 349 --- .../spark/api/python/PythonRDDSuite.scala | 23 +- dev/sparktestsupport/modules.py | 2 + python/pyspark/broadcast.py | 58 ++- python/pyspark/context.py | 63 +++- python/pyspark/serializers.py | 58 +++ python/pyspark/test_broadcast.py| 126 +++ python/pyspark/test_serializers.py | 90 + python/pyspark/worker.py| 24 +- 9 files changed, 705 insertions(+), 88 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/dd0e7cf5/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index 7b5a179..2f4e3bc 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -24,8 +24,10 @@ import java.util.{ArrayList => JArrayList, List => JList, Map => JMap} import scala.collection.JavaConverters._ import scala.collection.mutable +import scala.concurrent.Promise +import scala.concurrent.duration.Duration import scala.language.existentials -import scala.util.control.NonFatal +import scala.util.Try import org.apache.hadoop.conf.Configuration import org.apache.hadoop.io.compress.CompressionCodec @@ -37,6 +39,7 @@ import org.apache.spark.api.java.{JavaPairRDD, JavaRDD, JavaSparkContext} import org.apache.spark.broadcast.Broadcast import org.apache.spark.input.PortableDataStream import org.apache.spark.internal.Logging +import org.apache.spark.network.util.JavaUtils import org.apache.spark.rdd.RDD import org.apache.spark.security.SocketAuthHelper import org.apache.spark.util._ @@ -293,19 +296,51 @@ private[spark] class PythonRunner( val newBids = broadcastVars.map(_.id).toSet // number of different broadcasts val toRemove = oldBids.diff(newBids) -val cnt = toRemove.size + newBids.diff(oldBids).size +val addedBids = newBids.diff(oldBids) +val cnt = toRemove.size + addedBids.size +val needsDecryptionServer = env.serializerManager.encryptionEnabled && addedBids.nonEmpty +dataOut.writeBoolean(needsDecryptionServer) dataOut.writeInt(cnt) -for (bid <- toRemove) { - // remove the broadcast from worker - dataOut.writeLong(- bid - 1) // bid >= 0 - oldBids.remove(bid) +def sendBidsToRemove(): Unit = { + for (bid <- toRemove) { +// remove the broadcast from worker +dataOut.writeLong(-bid - 1) // bid >= 0 +oldBids.remove(bid) + } } -for (broadcast <- broadcastVars) { - if (!oldBids.contains(broadcast.id)) { +if (needsDecryptionServer) { + // if there is encryption, we setup a server which reads the encrypted files, and sends +// the decrypted data to python + val idsAndFiles = broadcastVars.flatMap { broadcast => + if (oldBids.contains(broadcast.id)) { + None +} else { + Some((broadcast.id, broadcast.value.path)) +} +} + val server = new EncryptedPythonBroadcastServer(env, idsAndFiles) + dataOut.writeInt(server.port) + logTrace(s"broadcast decryption server setup on ${server.port}") + PythonRDD.writeUTF(server.secret, dataOut) + sendBidsToRemove() + idsAndFiles.foreach { case (id, _) => // send new broadcast -dataOut.writeLong(broadcast.id) -PythonRDD.writeUTF(broadcast.value.path, dataOut) -oldBids.add(broadcast.id) +dataOut.writeLong(id) +oldBids.add(id) + } + dataOut.flush() + logTrace("waiting for python to read decrypted broadcast data from server") + server.waitTillBroadcastDataSent() + logTrace("done sending decrypted data to python") +} else { +
svn commit: r29666 - in /dev/spark/2.5.0-SNAPSHOT-2018_09_25_08_02-9cbd001-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Tue Sep 25 15:17:19 2018 New Revision: 29666 Log: Apache Spark 2.5.0-SNAPSHOT-2018_09_25_08_02-9cbd001 docs [This commit notification would consist of 1485 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r29662 - in /dev/spark/2.4.1-SNAPSHOT-2018_09_25_06_02-a709718-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Tue Sep 25 13:17:02 2018 New Revision: 29662 Log: Apache Spark 2.4.1-SNAPSHOT-2018_09_25_06_02-a709718 docs [This commit notification would consist of 1472 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23907][SQL] Revert regr_* functions entirely
Repository: spark Updated Branches: refs/heads/branch-2.4 4ca4ef7b9 -> a709718da [SPARK-23907][SQL] Revert regr_* functions entirely ## What changes were proposed in this pull request? This patch reverts entirely all the regr_* functions added in SPARK-23907. These were added by mgaido91 (and proposed by gatorsmile) to improve compatibility with other database systems, without any actual use cases. However, they are very rarely used, and in Spark there are much better ways to compute these functions, due to Spark's flexibility in exposing real programming APIs. I'm going through all the APIs added in Spark 2.4 and I think we should revert these. If there are strong enough demands and more use cases, we can add them back in the future pretty easily. ## How was this patch tested? Reverted test cases also. Closes #22541 from rxin/SPARK-23907. Authored-by: Reynold Xin Signed-off-by: hyukjinkwon (cherry picked from commit 9cbd001e2476cd06aa0bcfcc77a21a9077d5797a) Signed-off-by: hyukjinkwon Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a709718d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a709718d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a709718d Branch: refs/heads/branch-2.4 Commit: a709718dae495725af4e961b1e0f85bce5d34368 Parents: 4ca4ef7 Author: Reynold Xin Authored: Tue Sep 25 20:13:07 2018 +0800 Committer: hyukjinkwon Committed: Tue Sep 25 20:13:22 2018 +0800 -- .../catalyst/analysis/FunctionRegistry.scala| 9 - .../expressions/aggregate/regression.scala | 190 --- .../sql-tests/inputs/udaf-regrfunctions.sql | 56 -- .../results/udaf-regrfunctions.sql.out | 93 - 4 files changed, 348 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a709718d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 77860e1..695267a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -299,15 +299,6 @@ object FunctionRegistry { expression[CollectList]("collect_list"), expression[CollectSet]("collect_set"), expression[CountMinSketchAgg]("count_min_sketch"), -expression[RegrCount]("regr_count"), -expression[RegrSXX]("regr_sxx"), -expression[RegrSYY]("regr_syy"), -expression[RegrAvgX]("regr_avgx"), -expression[RegrAvgY]("regr_avgy"), -expression[RegrSXY]("regr_sxy"), -expression[RegrSlope]("regr_slope"), -expression[RegrR2]("regr_r2"), -expression[RegrIntercept]("regr_intercept"), // string functions expression[Ascii]("ascii"), http://git-wip-us.apache.org/repos/asf/spark/blob/a709718d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/regression.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/regression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/regression.scala deleted file mode 100644 index d8f4505..000 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/regression.scala +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.expressions.aggregate - -import org.apache.spark.sql.catalyst.dsl.expressions._ -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.types.{AbstractDataType, DoubleType} - -/** - * Base trait for all regression functions. - */ -trait RegrLike extends AggregateFunction with ImplicitCastI
spark git commit: [SPARK-23907][SQL] Revert regr_* functions entirely
Repository: spark Updated Branches: refs/heads/master 7d8f5b62c -> 9cbd001e2 [SPARK-23907][SQL] Revert regr_* functions entirely ## What changes were proposed in this pull request? This patch reverts entirely all the regr_* functions added in SPARK-23907. These were added by mgaido91 (and proposed by gatorsmile) to improve compatibility with other database systems, without any actual use cases. However, they are very rarely used, and in Spark there are much better ways to compute these functions, due to Spark's flexibility in exposing real programming APIs. I'm going through all the APIs added in Spark 2.4 and I think we should revert these. If there are strong enough demands and more use cases, we can add them back in the future pretty easily. ## How was this patch tested? Reverted test cases also. Closes #22541 from rxin/SPARK-23907. Authored-by: Reynold Xin Signed-off-by: hyukjinkwon Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9cbd001e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9cbd001e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9cbd001e Branch: refs/heads/master Commit: 9cbd001e2476cd06aa0bcfcc77a21a9077d5797a Parents: 7d8f5b6 Author: Reynold Xin Authored: Tue Sep 25 20:13:07 2018 +0800 Committer: hyukjinkwon Committed: Tue Sep 25 20:13:07 2018 +0800 -- .../catalyst/analysis/FunctionRegistry.scala| 9 - .../expressions/aggregate/regression.scala | 190 --- .../sql-tests/inputs/udaf-regrfunctions.sql | 56 -- .../results/udaf-regrfunctions.sql.out | 93 - 4 files changed, 348 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9cbd001e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 8b69a47..7dafebf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -300,15 +300,6 @@ object FunctionRegistry { expression[CollectList]("collect_list"), expression[CollectSet]("collect_set"), expression[CountMinSketchAgg]("count_min_sketch"), -expression[RegrCount]("regr_count"), -expression[RegrSXX]("regr_sxx"), -expression[RegrSYY]("regr_syy"), -expression[RegrAvgX]("regr_avgx"), -expression[RegrAvgY]("regr_avgy"), -expression[RegrSXY]("regr_sxy"), -expression[RegrSlope]("regr_slope"), -expression[RegrR2]("regr_r2"), -expression[RegrIntercept]("regr_intercept"), // string functions expression[Ascii]("ascii"), http://git-wip-us.apache.org/repos/asf/spark/blob/9cbd001e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/regression.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/regression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/regression.scala deleted file mode 100644 index d8f4505..000 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/regression.scala +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.expressions.aggregate - -import org.apache.spark.sql.catalyst.dsl.expressions._ -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.types.{AbstractDataType, DoubleType} - -/** - * Base trait for all regression functions. - */ -trait RegrLike extends AggregateFunction with ImplicitCastInputTypes { - def y: Expression - def x: Expression - - override def children: Seq[Expression] = Seq(y
svn commit: r29652 - in /dev/spark/2.5.0-SNAPSHOT-2018_09_25_00_02-7d8f5b6-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Tue Sep 25 07:17:33 2018 New Revision: 29652 Log: Apache Spark 2.5.0-SNAPSHOT-2018_09_25_00_02-7d8f5b6 docs [This commit notification would consist of 1485 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org