This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch fix-lonlat-order in repository https://gitbox.apache.org/repos/asf/sedona.git
commit b17c78f141abd1bec228533f6e4f94924127d7e7 Author: Jia Yu <[email protected]> AuthorDate: Tue Aug 8 01:32:04 2023 -0700 Drop the constructors in Python --- python/sedona/core/SpatialRDD/linestring_rdd.py | 264 -------------------- python/sedona/core/SpatialRDD/point_rdd.py | 266 -------------------- python/sedona/core/SpatialRDD/polygon_rdd.py | 270 --------------------- python/sedona/core/SpatialRDD/rectangle_rdd.py | 269 -------------------- python/tests/core/test_rdd.py | 37 --- python/tests/core/test_spatial_rdd_from_disc.py | 18 +- python/tests/spatial_operator/test_join_base.py | 9 +- .../test_join_query_correctness.py | 45 ++-- .../spatial_operator/test_linestring_range.py | 5 +- .../tests/spatial_operator/test_polygon_range.py | 6 +- .../tests/spatial_operator/test_rectangle_range.py | 6 +- python/tests/spatial_rdd/test_circle_rdd.py | 5 +- python/tests/spatial_rdd/test_linestring_rdd.py | 70 +----- python/tests/spatial_rdd/test_point_rdd.py | 38 +-- python/tests/spatial_rdd/test_polygon_rdd.py | 169 +------------ python/tests/spatial_rdd/test_rectangle_rdd.py | 10 +- python/tests/spatial_rdd/test_spatial_rdd.py | 70 +----- .../tests/spatial_rdd/test_spatial_rdd_writer.py | 7 +- .../sql/test_spatial_rdd_to_spatial_dataframe.py | 4 +- python/tests/test_assign_raw_spatial_rdd.py | 7 +- python/tests/test_scala_example.py | 38 +-- python/tests/utils/test_crs_transformation.py | 120 +-------- 22 files changed, 74 insertions(+), 1659 deletions(-) diff --git a/python/sedona/core/SpatialRDD/linestring_rdd.py b/python/sedona/core/SpatialRDD/linestring_rdd.py index dff4e780..0bd720f5 100644 --- a/python/sedona/core/SpatialRDD/linestring_rdd.py +++ b/python/sedona/core/SpatialRDD/linestring_rdd.py @@ -22,7 +22,6 @@ from sedona.core.SpatialRDD.spatial_rdd_factory import SpatialRDDFactory from sedona.core.enums import FileDataSplitter from sedona.core.enums.file_data_splitter import FileSplitterJvm from sedona.core.jvm.translate import PythonRddToJavaRDDAdapter -from sedona.utils.jvm import JvmStorageLevel from sedona.utils.meta import MultipleMeta @@ -36,16 +35,6 @@ class LineStringRDD(SpatialRDD, metaclass=MultipleMeta): srdd = self._jvm_spatial_rdd(spatial_rdd) self._srdd = srdd - def __init__(self, rdd: RDD, newLevel: StorageLevel): - self._sc = rdd.ctx - self._jvm = self._sc._jvm - - spatial_rdd = PythonRddToJavaRDDAdapter(self._jvm).deserialize_to_linestring_raw_rdd(rdd._jrdd) - - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - srdd = self._jvm_spatial_rdd(spatial_rdd, new_level_jvm) - self._srdd = srdd - def __init__(self): self._do_init() self._srdd = self._jvm_spatial_rdd() @@ -59,17 +48,6 @@ class LineStringRDD(SpatialRDD, metaclass=MultipleMeta): jsrdd = rawSpatialRDD.jsrdd self._srdd = self._jvm_spatial_rdd(jsrdd) - def __init__(self, rawSpatialRDD: JvmSpatialRDD, sourceEpsgCode: str, targetEpsgCode: str): - """ - - :param rawSpatialRDD: RDD - :param sourceEpsgCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - self._srdd = self._jvm_spatial_rdd(jsrdd, sourceEpsgCode, targetEpsgCode) - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, splitter: FileDataSplitter, carryInputData: bool, partitions: int): """ @@ -158,248 +136,6 @@ class LineStringRDD(SpatialRDD, metaclass=MultipleMeta): carryInputData ) - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel): - """ - :param rawSpatialRDD: RDD - :param newLevel: StorageLevel - """ - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd(jsrdd, new_level_jvm) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel): - - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param startOffset: int, starting offset - :param endOffset: int, ending offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param partitions: int, number of partitions - :param newLevel: StorageLevel - """ - super().__init__(sparkContext) - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - FileSplitterJvm(self._jvm, splitter).jvm_instance, - carryInputData, - partitions, - JvmStorageLevel(self._jvm, newLevel).jvm_instance - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param startOffset: int, starting offset - :param endOffset: int, ending offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - jvm_splitter, - carryInputData, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, - carryInputData: bool, partitions: int, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param partitions: int, number of partitions - :param newLevel: StorageLevel - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - new_level_jvm - ) - - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param rawSpatialRDD: RDD - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd(jsrdd, new_level_jvm, sourceEpsgCRSCode, targetEpsgCode) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel, - sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param startOffset: int, starting offset - :param endOffset: int, ending offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param targetEpsgCode: str, epsg code to transform SpatialRDD - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param startOffset: int, starting offset - :param endOffset: int, ending offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - jvm_splitter, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - partitions: int, newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param targetEpsgCode: str, epsg code to transform SpatialRDD - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - @property def _jvm_spatial_rdd(self): if self._sc is not None: diff --git a/python/sedona/core/SpatialRDD/point_rdd.py b/python/sedona/core/SpatialRDD/point_rdd.py index 6354742a..658e5921 100644 --- a/python/sedona/core/SpatialRDD/point_rdd.py +++ b/python/sedona/core/SpatialRDD/point_rdd.py @@ -21,26 +21,11 @@ from sedona.core.SpatialRDD.spatial_rdd import SpatialRDD, JvmSpatialRDD from sedona.core.SpatialRDD.spatial_rdd_factory import SpatialRDDFactory from sedona.core.enums.file_data_splitter import FileSplitterJvm, FileDataSplitter from sedona.core.jvm.translate import PythonRddToJavaRDDAdapter -from sedona.utils.jvm import JvmStorageLevel from sedona.utils.meta import MultipleMeta class PointRDD(SpatialRDD, metaclass=MultipleMeta): - def __init__(self, rdd: RDD, newLevel: StorageLevel): - """ - - :param rdd: RDD - :param newLevel: StorageLevel StorageLevel - """ - super().__init__(rdd.ctx) - - spatial_rdd = PythonRddToJavaRDDAdapter(self._jvm).deserialize_to_point_raw_rdd(rdd._jrdd) - - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - srdd = self._jvm_spatial_rdd(spatial_rdd, new_level_jvm) - self._srdd = srdd - def __init__(self, rdd: RDD): """ @@ -66,18 +51,6 @@ class PointRDD(SpatialRDD, metaclass=MultipleMeta): jsrdd = rawSpatialRDD.jsrdd self._srdd = self._jvm_spatial_rdd(jsrdd) - def __init__(self, rawSpatialRDD: JvmSpatialRDD, sourceEpsgCode: str, targetEpsgCode: str): - """ - - :param rawSpatialRDD: JvmSpatialRDD, jvm representation of spatial rdd RDD - :param sourceEpsgCode: str - :param targetEpsgCode: str, epsg code to transform SpatialRDD str - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - self._srdd = self._jvm_spatial_rdd(jsrdd, sourceEpsgCode, targetEpsgCode) - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, splitter: FileDataSplitter, carryInputData: bool, partitions: int): """ @@ -160,245 +133,6 @@ class PointRDD(SpatialRDD, metaclass=MultipleMeta): carryInputData ) - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel): - """ - - :param rawSpatialRDD: - :param newLevel: - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd(jsrdd, new_level_jvm) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, splitter: FileDataSplitter, - carryInputData: bool, partitions: int, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param Offset: int, point offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param partitions: int, number of partitions - :param newLevel: StorageLevel - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, splitter: FileDataSplitter, - carryInputData: bool, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param Offset: int, point offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - """ - - super().__init__(sparkContext) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter, - carryInputData, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - partitions: int, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param partitions: int, number of partitions - :param newLevel: StorageLevel - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - new_level_jvm - ) - - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param rawSpatialRDD: JvmSpatialRDD, jvm representation of spatial rdd - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd(jsrdd, new_level_jvm, sourceEpsgCRSCode, targetEpsgCode) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, splitter: FileDataSplitter, - carryInputData: bool, partitions: int, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param Offset: int, point offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param partitions: int, number of partitions - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, splitter: FileDataSplitter, - carryInputData: bool, newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param Offset: int, point offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - partitions: int, newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param partitions: int, number of partitions - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - def MinimumBoundingRectangle(self): raise NotImplementedError("PointRDD has not MinimumBoundingRectangle method.") diff --git a/python/sedona/core/SpatialRDD/polygon_rdd.py b/python/sedona/core/SpatialRDD/polygon_rdd.py index a2758ebf..c72ba9c7 100644 --- a/python/sedona/core/SpatialRDD/polygon_rdd.py +++ b/python/sedona/core/SpatialRDD/polygon_rdd.py @@ -21,21 +21,11 @@ from sedona.core.SpatialRDD.spatial_rdd import SpatialRDD, JvmSpatialRDD from sedona.core.SpatialRDD.spatial_rdd_factory import SpatialRDDFactory from sedona.core.enums.file_data_splitter import FileSplitterJvm, FileDataSplitter from sedona.core.jvm.translate import PythonRddToJavaRDDAdapter -from sedona.utils.jvm import JvmStorageLevel from sedona.utils.meta import MultipleMeta class PolygonRDD(SpatialRDD, metaclass=MultipleMeta): - def __init__(self, rdd: RDD, newLevel: StorageLevel): - super().__init__(rdd.ctx) - - spatial_rdd = PythonRddToJavaRDDAdapter(self._jvm).deserialize_to_polygon_raw_rdd(rdd._jrdd) - - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - srdd = self._jvm_spatial_rdd(spatial_rdd, new_level_jvm) - self._srdd = srdd - def __init__(self, rdd: RDD): super().__init__(rdd.ctx) spatial_rdd = PythonRddToJavaRDDAdapter(self._jvm).deserialize_to_polygon_raw_rdd(rdd._jrdd) @@ -55,30 +45,6 @@ class PolygonRDD(SpatialRDD, metaclass=MultipleMeta): jsrdd = rawSpatialRDD.jsrdd self._srdd = self._jvm_spatial_rdd(jsrdd) - def __init__(self, rawSpatialRDD: JvmSpatialRDD, sourceEpsgCode: str, targetEpsgCode: str): - """ - - :param rawSpatialRDD: - :param sourceEpsgCode: - :param targetEpsgCode: - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - self._srdd = self._jvm_spatial_rdd(jsrdd, sourceEpsgCode, targetEpsgCode) - - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel): - """ - :param rawSpatialRDD: - :param sourceEpsgCode: - :param targetEpsgCode: - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd(jsrdd, new_level_jvm) - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, splitter: FileDataSplitter, carryInputData: bool, partitions: int): """ @@ -169,242 +135,6 @@ class PolygonRDD(SpatialRDD, metaclass=MultipleMeta): carryInputData ) - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param startOffset: - :param endOffset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - jvm_splitter.jvm_instance, - carryInputData, - partitions, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param startOffset: - :param endOffset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param newLevel: - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - jvm_splitter.jvm_instance, - carryInputData, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - partitions, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param newLevel: - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - new_level_jvm - ) - - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param rawSpatialRDD: - :param newLevel: - :param sourceEpsgCRSCode: - :param targetEpsgCode: - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd(jsrdd, new_level_jvm, sourceEpsgCRSCode, targetEpsgCode) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel, - sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param startOffset: - :param endOffset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param startOffset: - :param endOffset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - jvm_splitter, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - partitions: int, newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, - carryInputData: bool, newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: bool, - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - def MinimumBoundingRectangle(self): from sedona.core.SpatialRDD import RectangleRDD rectangle_rdd = RectangleRDD() diff --git a/python/sedona/core/SpatialRDD/rectangle_rdd.py b/python/sedona/core/SpatialRDD/rectangle_rdd.py index 5851bb04..48d9a222 100644 --- a/python/sedona/core/SpatialRDD/rectangle_rdd.py +++ b/python/sedona/core/SpatialRDD/rectangle_rdd.py @@ -26,16 +26,6 @@ from sedona.utils.meta import MultipleMeta class RectangleRDD(SpatialRDD, metaclass=MultipleMeta): - def __init__(self, rdd: RDD, newLevel: StorageLevel): - self._sc = rdd.ctx - self._jvm = self._sc._jvm - - spatial_rdd = self._jvm.GeoSerializerData.deserializeToPolygonRawRDD(rdd._jrdd) - - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - srdd = self._jvm_spatial_rdd(spatial_rdd, new_level_jvm) - self._srdd = srdd - def __init__(self): self._do_init() self._srdd = self._jvm_spatial_rdd() @@ -49,36 +39,6 @@ class RectangleRDD(SpatialRDD, metaclass=MultipleMeta): jsrdd = rawSpatialRDD.jsrdd self._srdd = self._jvm_spatial_rdd(jsrdd) - def __init__(self, rawSpatialRDD: JvmSpatialRDD, sourceEpsgCode: str, targetEpsgCode: str): - """ - - :param rawSpatialRDD: - :param sourceEpsgCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - - super().__init__(rawSpatialRDD.sc) - - self._srdd = self._jvm_spatial_rdd( - rawSpatialRDD.jsrdd, - sourceEpsgCode, - targetEpsgCode - ) - - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel): - """ - - :param rawSpatialRDD: - :param newLevel: - """ - super().__init__(rawSpatialRDD.sc) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - rawSpatialRDD.jsrdd, - new_level_jvm - ) - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, splitter: FileDataSplitter, carryInputData: bool, partitions: int): """ @@ -164,235 +124,6 @@ class RectangleRDD(SpatialRDD, metaclass=MultipleMeta): jvm_splitter.jvm_instance, carryInputData ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param Offset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter.jvm_instance, - carryInputData, - partitions, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param Offset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param newLevel: - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter.jvm_instance, - carryInputData, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - partitions, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param newLevel: - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - new_level_jvm - ) - - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param rawSpatialRDD: - :param newLevel: - :param sourceEpsgCRSCode: - :param targetEpsgCode: - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd(jsrdd, new_level_jvm, sourceEpsgCRSCode, targetEpsgCode) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel, - sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param Offset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param Offset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter.jvm_instance, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - partitions: int, newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, - carryInputData: bool, newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: bool, - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - @property def _jvm_spatial_rdd(self): spatial_factory = SpatialRDDFactory(self._sc) diff --git a/python/tests/core/test_rdd.py b/python/tests/core/test_rdd.py index 5560f4e0..201d4429 100644 --- a/python/tests/core/test_rdd.py +++ b/python/tests/core/test_rdd.py @@ -17,7 +17,6 @@ import logging -from pyspark import StorageLevel from shapely.geometry import Point from sedona.core.SpatialRDD import PointRDD, PolygonRDD, CircleRDD @@ -318,39 +317,3 @@ class TestSpatialRDD(TestBase): True, True ).count - - def test_crs_transformed_spatial_range_query(self): - object_rdd = PointRDD( - sparkContext=self.sc, - InputLocation=point_rdd_input_location, - Offset=point_rdd_offset, - splitter=point_rdd_splitter, - carryInputData=False, - newLevel=StorageLevel.DISK_ONLY, - sourceEpsgCRSCode="epsg:4326", - targetEpsgCode="epsg:3005" - ) - for i in range(each_query_loop_times): - result_size = RangeQuery.SpatialRangeQuery( - object_rdd, range_query_window, False, False - ) - - def test_crs_transformed_spatial_range_query_using_index(self): - object_rdd = PointRDD( - sparkContext=self.sc, - InputLocation=point_rdd_input_location, - Offset=point_rdd_offset, - splitter=point_rdd_splitter, - carryInputData=False, - newLevel=StorageLevel.DISK_ONLY, - sourceEpsgCRSCode="epsg:4326", - targetEpsgCode="epsg:3005" - ) - object_rdd.buildIndex(point_rdd_index_type, False) - for i in range(each_query_loop_times): - result_size = RangeQuery.SpatialRangeQuery( - object_rdd, - range_query_window, - False, - True - ).count diff --git a/python/tests/core/test_spatial_rdd_from_disc.py b/python/tests/core/test_spatial_rdd_from_disc.py index 6f9f6457..b2024181 100644 --- a/python/tests/core/test_spatial_rdd_from_disc.py +++ b/python/tests/core/test_spatial_rdd_from_disc.py @@ -19,7 +19,6 @@ import os import shutil import pytest -from pyspark import StorageLevel from sedona.core.SpatialRDD import PointRDD, PolygonRDD, LineStringRDD from sedona.core.enums import IndexType, GridType @@ -45,7 +44,7 @@ class TestDiscUtils(TestBase): from tests.properties.point_properties import input_location, offset, splitter, num_partitions point_rdd = PointRDD( - self.sc, input_location, offset, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY + self.sc, input_location, offset, splitter, True, num_partitions ) point_rdd.rawJvmSpatialRDD.saveAsObjectFile(os.path.join(disc_location, "point")) @@ -57,8 +56,7 @@ class TestDiscUtils(TestBase): input_location, splitter, True, - num_partitions, - StorageLevel.MEMORY_ONLY + num_partitions ) polygon_rdd.rawJvmSpatialRDD.saveAsObjectFile(os.path.join(disc_location, "polygon")) @@ -69,8 +67,7 @@ class TestDiscUtils(TestBase): input_location, splitter, True, - num_partitions, - StorageLevel.MEMORY_ONLY + num_partitions ) linestring_rdd.rawJvmSpatialRDD.saveAsObjectFile(os.path.join(disc_location, "line_string")) @@ -81,8 +78,7 @@ class TestDiscUtils(TestBase): input_location, splitter, True, - num_partitions, - StorageLevel.MEMORY_ONLY + num_partitions ) linestring_rdd.buildIndex(IndexType.RTREE, False) linestring_rdd.indexedRawRDD.saveAsObjectFile(os.path.join(disc_location, "line_string_index")) @@ -94,8 +90,7 @@ class TestDiscUtils(TestBase): input_location, splitter, True, - num_partitions, - StorageLevel.MEMORY_ONLY + num_partitions ) polygon_rdd.buildIndex(IndexType.RTREE, False) polygon_rdd.indexedRawRDD.saveAsObjectFile(os.path.join(disc_location, "polygon_index")) @@ -103,8 +98,7 @@ class TestDiscUtils(TestBase): def test_saving_to_disc_index_point(self): from tests.properties.point_properties import input_location, offset, splitter, num_partitions point_rdd = PointRDD( - self.sc, input_location, offset, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY - ) + self.sc, input_location, offset, splitter, True, num_partitions) point_rdd.buildIndex(IndexType.RTREE, False) point_rdd.indexedRawRDD.saveAsObjectFile(os.path.join(disc_location, "point_index")) diff --git a/python/tests/spatial_operator/test_join_base.py b/python/tests/spatial_operator/test_join_base.py index 434e9875..92214194 100644 --- a/python/tests/spatial_operator/test_join_base.py +++ b/python/tests/spatial_operator/test_join_base.py @@ -16,7 +16,6 @@ # under the License. import pytest -from pyspark import StorageLevel from sedona.core.SpatialRDD import RectangleRDD, PolygonRDD, LineStringRDD, PointRDD from sedona.core.SpatialRDD.spatial_rdd import SpatialRDD @@ -32,25 +31,25 @@ class TestJoinBase(TestBase): rdd = PointRDD( self.sc, location, 1, splitter, False, num_partitions ) - return PointRDD(rdd.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY) + return PointRDD(rdd.rawJvmSpatialRDD) def create_linestring_rdd(self, location, splitter, num_partitions): rdd = LineStringRDD( self.sc, location, splitter, True, num_partitions ) - return LineStringRDD(rdd.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY) + return LineStringRDD(rdd.rawJvmSpatialRDD) def create_polygon_rdd(self, location, splitter, num_partitions): rdd = PolygonRDD( self.sc, location, splitter, True, num_partitions ) - return PolygonRDD(rdd.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY) + return PolygonRDD(rdd.rawJvmSpatialRDD) def create_rectangle_rdd(self, location, splitter, num_partitions): rdd = RectangleRDD( self.sc, location, splitter, True, num_partitions) return RectangleRDD( - rdd.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY + rdd.rawJvmSpatialRDD ) def partition_rdds(self, query_rdd: SpatialRDD, spatial_rdd: SpatialRDD, grid_type): diff --git a/python/tests/spatial_operator/test_join_query_correctness.py b/python/tests/spatial_operator/test_join_query_correctness.py index 79e7aeb0..e2390b28 100644 --- a/python/tests/spatial_operator/test_join_query_correctness.py +++ b/python/tests/spatial_operator/test_join_query_correctness.py @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -from pyspark import StorageLevel from shapely.geometry import Point, Polygon, LineString from shapely.geometry.base import BaseGeometry @@ -47,8 +46,8 @@ class TestJoinQueryCorrectness(TestBase): self.verify_join_result(result_no_index) def test_on_boundary_point_join_correctness(self): - window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) - object_rdd = PointRDD(self.sc.parallelize(self.test_on_boundary_point_set), StorageLevel.MEMORY_ONLY) + window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) + object_rdd = PointRDD(self.sc.parallelize(self.test_on_boundary_point_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, False).collect() @@ -59,8 +58,8 @@ class TestJoinQueryCorrectness(TestBase): def test_outside_point_join_correctness(self): self.once_before_all() - window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) - object_rdd = PointRDD(self.sc.parallelize(self.test_outside_point_set), StorageLevel.MEMORY_ONLY) + window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) + object_rdd = PointRDD(self.sc.parallelize(self.test_outside_point_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, False).collect() @@ -71,9 +70,9 @@ class TestJoinQueryCorrectness(TestBase): def test_inside_linestring_join_correctness(self): window_rdd = PolygonRDD( - self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY + self.sc.parallelize(self.test_polygon_window_set) ) - object_rdd = LineStringRDD(self.sc.parallelize(self.test_inside_linestring_set), StorageLevel.MEMORY_ONLY) + object_rdd = LineStringRDD(self.sc.parallelize(self.test_inside_linestring_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) @@ -84,8 +83,8 @@ class TestJoinQueryCorrectness(TestBase): self.verify_join_result(result_no_index) def test_overlapped_linestring_join_correctness(self): - window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) - object_rdd = LineStringRDD(self.sc.parallelize(self.test_overlapped_linestring_set), StorageLevel.MEMORY_ONLY) + window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) + object_rdd = LineStringRDD(self.sc.parallelize(self.test_overlapped_linestring_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, True).collect() @@ -95,8 +94,8 @@ class TestJoinQueryCorrectness(TestBase): self.verify_join_result(result_no_index) def test_outside_line_string_join_correctness(self): - window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) - object_rdd = LineStringRDD(self.sc.parallelize(self.test_outside_linestring_set), StorageLevel.MEMORY_ONLY) + window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) + object_rdd = LineStringRDD(self.sc.parallelize(self.test_outside_linestring_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, False).collect() @@ -106,9 +105,9 @@ class TestJoinQueryCorrectness(TestBase): assert 0 == result_no_index.__len__() def test_inside_polygon_join_correctness(self): - window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) + window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) - object_rdd = PolygonRDD(self.sc.parallelize(self.test_inside_polygon_set), StorageLevel.MEMORY_ONLY) + object_rdd = PolygonRDD(self.sc.parallelize(self.test_inside_polygon_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, False).collect() @@ -118,8 +117,8 @@ class TestJoinQueryCorrectness(TestBase): self.verify_join_result(result_no_index) def test_overlapped_polygon_join_correctness(self): - window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) - object_rdd = PolygonRDD(self.sc.parallelize(self.test_overlapped_polygon_set), StorageLevel.MEMORY_ONLY) + window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) + object_rdd = PolygonRDD(self.sc.parallelize(self.test_overlapped_polygon_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, True).collect() @@ -129,8 +128,8 @@ class TestJoinQueryCorrectness(TestBase): self.verify_join_result(result_no_index) def test_outside_polygon_join_correctness(self): - window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) - object_rdd = PolygonRDD(self.sc.parallelize(self.test_outside_polygon_set), StorageLevel.MEMORY_ONLY) + window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) + object_rdd = PolygonRDD(self.sc.parallelize(self.test_outside_polygon_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, False).collect() @@ -140,9 +139,9 @@ class TestJoinQueryCorrectness(TestBase): assert 0 == result_no_index.__len__() def test_inside_polygon_distance_join_correctness(self): - center_geometry_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) + center_geometry_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) window_rdd = CircleRDD(center_geometry_rdd, 0.1) - object_rdd = PolygonRDD(self.sc.parallelize(self.test_inside_polygon_set), StorageLevel.MEMORY_ONLY) + object_rdd = PolygonRDD(self.sc.parallelize(self.test_inside_polygon_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.DistanceJoinQuery(object_rdd, window_rdd, True, False).collect() @@ -152,9 +151,9 @@ class TestJoinQueryCorrectness(TestBase): self.verify_join_result(result_no_index) def test_overlapped_polygon_distance_join_correctness(self): - center_geometry_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) + center_geometry_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) window_rdd = CircleRDD(center_geometry_rdd, 0.1) - object_rdd = PolygonRDD(self.sc.parallelize(self.test_overlapped_polygon_set), StorageLevel.MEMORY_ONLY) + object_rdd = PolygonRDD(self.sc.parallelize(self.test_overlapped_polygon_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.DistanceJoinQuery(object_rdd, window_rdd, True, True).collect() @@ -164,9 +163,9 @@ class TestJoinQueryCorrectness(TestBase): self.verify_join_result(result_no_index) def test_outside_polygon_distance_join_correctness(self): - center_geometry_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) + center_geometry_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) window_rdd = CircleRDD(center_geometry_rdd, 0.1) - object_rdd = PolygonRDD(self.sc.parallelize(self.test_outside_polygon_set), StorageLevel.MEMORY_ONLY) + object_rdd = PolygonRDD(self.sc.parallelize(self.test_outside_polygon_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.DistanceJoinQuery(object_rdd, window_rdd, True, True).collect() diff --git a/python/tests/spatial_operator/test_linestring_range.py b/python/tests/spatial_operator/test_linestring_range.py index e5827bca..1b60e924 100644 --- a/python/tests/spatial_operator/test_linestring_range.py +++ b/python/tests/spatial_operator/test_linestring_range.py @@ -17,7 +17,6 @@ import os -from pyspark import StorageLevel from sedona.core.SpatialRDD import LineStringRDD from sedona.core.enums import IndexType, FileDataSplitter @@ -39,7 +38,7 @@ class TestLineStringRange(TestBase): def test_spatial_range_query(self): spatial_rdd = LineStringRDD( - self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY + self.sc, input_location, splitter, True ) for i in range(self.loop_times): result_size = RangeQuery.SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False).count() @@ -50,7 +49,7 @@ class TestLineStringRange(TestBase): def test_spatial_range_query_using_index(self): spatial_rdd = LineStringRDD( - self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY + self.sc, input_location, splitter, True ) spatial_rdd.buildIndex(IndexType.RTREE, False) diff --git a/python/tests/spatial_operator/test_polygon_range.py b/python/tests/spatial_operator/test_polygon_range.py index 1d83c4e9..58c775c7 100644 --- a/python/tests/spatial_operator/test_polygon_range.py +++ b/python/tests/spatial_operator/test_polygon_range.py @@ -17,8 +17,6 @@ import os -from pyspark import StorageLevel - from sedona.core.SpatialRDD import PolygonRDD from sedona.core.enums import IndexType, FileDataSplitter from sedona.core.geom.envelope import Envelope @@ -38,7 +36,7 @@ class TestPolygonRange(TestBase): def test_spatial_range_query(self): spatial_rdd = PolygonRDD( - self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY + self.sc, input_location, splitter, True ) for i in range(self.loop_times): result_size = RangeQuery.\ @@ -50,7 +48,7 @@ class TestPolygonRange(TestBase): def test_spatial_range_query_using_index(self): spatial_rdd = PolygonRDD( - self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY + self.sc, input_location, splitter, True ) spatial_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): diff --git a/python/tests/spatial_operator/test_rectangle_range.py b/python/tests/spatial_operator/test_rectangle_range.py index 16136305..ac92a663 100644 --- a/python/tests/spatial_operator/test_rectangle_range.py +++ b/python/tests/spatial_operator/test_rectangle_range.py @@ -17,8 +17,6 @@ import os -from pyspark import StorageLevel - from sedona.core.SpatialRDD import RectangleRDD from sedona.core.enums import IndexType, FileDataSplitter from sedona.core.geom.envelope import Envelope @@ -46,7 +44,7 @@ class TestRectangleRange(TestBase): loop_times = 5 def test_spatial_range_query(self): - spatial_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True, StorageLevel.MEMORY_ONLY) + spatial_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True) for i in range(self.loop_times): result_size = RangeQuery.SpatialRangeQuery( @@ -58,7 +56,7 @@ class TestRectangleRange(TestBase): def test_spatial_range_query_using_index(self): spatial_rdd = RectangleRDD( - self.sc, inputLocation, offset, splitter, True, StorageLevel.MEMORY_ONLY) + self.sc, inputLocation, offset, splitter, True) spatial_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): diff --git a/python/tests/spatial_rdd/test_circle_rdd.py b/python/tests/spatial_rdd/test_circle_rdd.py index 9dc89486..746c5d20 100644 --- a/python/tests/spatial_rdd/test_circle_rdd.py +++ b/python/tests/spatial_rdd/test_circle_rdd.py @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -from pyspark import StorageLevel - from sedona.core.SpatialRDD import PointRDD, CircleRDD from tests.test_base import TestBase from tests.properties.point_properties import input_location, offset, splitter, num_partitions @@ -31,8 +29,7 @@ class TestCircleRDD(TestBase): offset, splitter, True, - num_partitions, - StorageLevel.MEMORY_ONLY + num_partitions ) circle_rdd = CircleRDD(spatial_rdd, 0.5) diff --git a/python/tests/spatial_rdd/test_linestring_rdd.py b/python/tests/spatial_rdd/test_linestring_rdd.py index f693e882..ea6d4e73 100644 --- a/python/tests/spatial_rdd/test_linestring_rdd.py +++ b/python/tests/spatial_rdd/test_linestring_rdd.py @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -from pyspark import StorageLevel - from sedona.core.SpatialRDD import LineStringRDD from sedona.core.enums import IndexType, GridType from sedona.core.geom.envelope import Envelope @@ -40,32 +38,17 @@ class TestLineStringRDD(TestBase): InputLocation=input_location, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) self.compare_count(spatial_rdd_core, input_boundary, input_count) - spatial_rdd = LineStringRDD() - - spatial_rdd_core = LineStringRDD( - self.sc, - input_location, - splitter, - True, - num_partitions, - StorageLevel.MEMORY_ONLY - ) self.compare_count(spatial_rdd_core, input_boundary, input_count) spatial_rdd = LineStringRDD(spatial_rdd_core.rawJvmSpatialRDD) self.compare_count(spatial_rdd, input_boundary, input_count) - spatial_rdd = LineStringRDD(spatial_rdd_core.rawJvmSpatialRDD, "epsg:4326", "epsg:5070") - - self.compare_count(spatial_rdd, transformed_envelope, input_count) - spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, num_partitions) self.compare_count(spatial_rdd, input_boundary_2, input_count) @@ -82,52 +65,14 @@ class TestLineStringRDD(TestBase): self.compare_count(spatial_rdd, input_boundary, input_count) - spatial_rdd = LineStringRDD(spatial_rdd_core.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY) + spatial_rdd = LineStringRDD(spatial_rdd_core.rawJvmSpatialRDD) self.compare_count(spatial_rdd, input_boundary, input_count) - spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY) - - self.compare_count(spatial_rdd, input_boundary_2, input_count) - - spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, - StorageLevel.MEMORY_ONLY) + spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, num_partitions) self.compare_count(spatial_rdd, input_boundary_2, input_count) - spatial_rdd = LineStringRDD(self.sc, input_location, splitter, True, num_partitions, - StorageLevel.MEMORY_ONLY) - - self.compare_count(spatial_rdd, input_boundary, input_count) - - spatial_rdd = LineStringRDD(self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY) - - self.compare_count(spatial_rdd, input_boundary, input_count) - - spatial_rdd = LineStringRDD(spatial_rdd_core.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - - self.compare_count(spatial_rdd, transformed_envelope, input_count) - - spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, num_partitions, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - - self.compare_count(spatial_rdd, transformed_envelope_2, input_count) - - spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - - self.compare_count(spatial_rdd, transformed_envelope_2, input_count) - - spatial_rdd = LineStringRDD(self.sc, input_location, splitter, True, num_partitions, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - - self.compare_count(spatial_rdd, transformed_envelope, input_count) - - spatial_rdd = LineStringRDD(self.sc, input_location, splitter, True, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - - self.compare_count(spatial_rdd, transformed_envelope, input_count) - def test_empty_constructor(self): spatial_rdd = LineStringRDD( @@ -135,8 +80,7 @@ class TestLineStringRDD(TestBase): InputLocation=input_location, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) spatial_rdd.analyze() @@ -152,8 +96,7 @@ class TestLineStringRDD(TestBase): InputLocation=input_location, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) spatial_rdd.analyze() @@ -165,8 +108,7 @@ class TestLineStringRDD(TestBase): InputLocation=input_location, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) rectangle_rdd = linestring_rdd.MinimumBoundingRectangle() diff --git a/python/tests/spatial_rdd/test_point_rdd.py b/python/tests/spatial_rdd/test_point_rdd.py index 24b7bdf7..9cb15454 100644 --- a/python/tests/spatial_rdd/test_point_rdd.py +++ b/python/tests/spatial_rdd/test_point_rdd.py @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -from pyspark import StorageLevel - from sedona.core.SpatialRDD import PointRDD from sedona.core.SpatialRDD.spatial_rdd import SpatialRDD from sedona.core.enums import IndexType, GridType @@ -52,37 +50,10 @@ class TestPointRDD(TestBase): self.compare_count(spatial_rdd_copy, input_count, input_boundary) spatial_rdd_copy = PointRDD(spatial_rdd.rawJvmSpatialRDD) self.compare_count(spatial_rdd_copy, input_count, input_boundary) - spatial_rdd_copy = PointRDD(spatial_rdd.rawJvmSpatialRDD, "epsg:4326", "epsg:5070") - self.compare_count(spatial_rdd_copy, input_count, transformed_envelope) spatial_rdd_copy = PointRDD(self.sc, input_location, offset, splitter, True, num_partitions) self.compare_count(spatial_rdd_copy, input_count, input_boundary) spatial_rdd_copy = PointRDD(self.sc, crs_point_test, splitter, True) self.compare_count(spatial_rdd_copy, 20000, crs_envelope) - spatial_rdd_copy = PointRDD(spatial_rdd.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY) - self.compare_count(spatial_rdd_copy, input_count, input_boundary) - spatial_rdd_copy = PointRDD(self.sc, input_location, offset, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY) - self.compare_count(spatial_rdd_copy, input_count, input_boundary) - spatial_rdd_copy = PointRDD(self.sc, input_location, offset, splitter, True, StorageLevel.MEMORY_ONLY) - self.compare_count(spatial_rdd_copy, input_count, input_boundary) - spatial_rdd_copy = PointRDD(self.sc, crs_point_test, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY) - self.compare_count(spatial_rdd_copy, 20000, crs_envelope) - spatial_rdd_copy = PointRDD(self.sc, crs_point_test, splitter, True, StorageLevel.MEMORY_ONLY) - self.compare_count(spatial_rdd_copy, 20000, crs_envelope) - spatial_rdd_copy = PointRDD(spatial_rdd.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - self.compare_count(spatial_rdd_copy, input_count, transformed_envelope) - spatial_rdd_copy = PointRDD(self.sc, input_location, offset, splitter, True, num_partitions, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - self.compare_count(spatial_rdd_copy, input_count, transformed_envelope) - spatial_rdd_copy = PointRDD(self.sc, input_location, offset, splitter, True, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - self.compare_count(spatial_rdd_copy, input_count, transformed_envelope) - spatial_rdd_copy = PointRDD(self.sc, crs_point_test, splitter, True, - num_partitions, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - - self.compare_count(spatial_rdd_copy, 20000, crs_envelope_transformed) - spatial_rdd_copy = PointRDD(self.sc, crs_point_test, splitter, True, StorageLevel.MEMORY_ONLY, - "epsg:4326", "epsg:5070") - self.compare_count(spatial_rdd_copy, 20000, crs_envelope_transformed) def test_empty_constructor(self): spatial_rdd = PointRDD( @@ -91,8 +62,7 @@ class TestPointRDD(TestBase): Offset=offset, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) spatial_rdd.buildIndex(IndexType.RTREE, False) spatial_rdd_copy = PointRDD() @@ -106,8 +76,7 @@ class TestPointRDD(TestBase): Offset=offset, splitter=splitter, carryInputData=False, - partitions=10, - newLevel=StorageLevel.MEMORY_ONLY + partitions=10 ) spatial_rdd.analyze() spatial_rdd.spatialPartitioning(GridType.QUADTREE) @@ -121,7 +90,6 @@ class TestPointRDD(TestBase): Offset=offset, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) spatial_rdd.buildIndex(IndexType.RTREE, False) diff --git a/python/tests/spatial_rdd/test_polygon_rdd.py b/python/tests/spatial_rdd/test_polygon_rdd.py index 61b8528b..3897f785 100644 --- a/python/tests/spatial_rdd/test_polygon_rdd.py +++ b/python/tests/spatial_rdd/test_polygon_rdd.py @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -from pyspark import StorageLevel - from sedona.core.SpatialRDD import PolygonRDD from sedona.core.SpatialRDD.spatial_rdd import SpatialRDD from sedona.core.enums import IndexType, FileDataSplitter, GridType @@ -44,36 +42,13 @@ class TestPolygonRDD(TestBase): InputLocation=input_location, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) self.compare_spatial_rdd(spatial_rdd_core, input_boundary) - spatial_rdd_core = PolygonRDD( - self.sc, - input_location, - splitter, - True, - num_partitions, - StorageLevel.MEMORY_ONLY - ) - self.compare_spatial_rdd(spatial_rdd_core, input_boundary) spatial_rdd = PolygonRDD(rawSpatialRDD=spatial_rdd_core.rawJvmSpatialRDD) self.compare_spatial_rdd(spatial_rdd, input_boundary) - spatial_rdd = PolygonRDD(spatial_rdd_core.rawJvmSpatialRDD, "epsg:4326", "epsg:5070") - self.compare_spatial_rdd(spatial_rdd, query_envelope) - assert spatial_rdd.getSourceEpsgCode() == "epsg:4326" - assert spatial_rdd.getTargetEpsgCode() == "epsg:5070" - spatial_rdd = PolygonRDD(rawSpatialRDD=spatial_rdd_core.rawJvmSpatialRDD, sourceEpsgCode="epsg:4326", targetEpsgCode="epsg:5070") - assert spatial_rdd.getSourceEpsgCode() == "epsg:4326" - assert spatial_rdd.getTargetEpsgCode() == "epsg:5070" - self.compare_spatial_rdd(spatial_rdd, query_envelope) - spatial_rdd = PolygonRDD(rawSpatialRDD=spatial_rdd.rawJvmSpatialRDD, newLevel=StorageLevel.MEMORY_ONLY) - self.compare_spatial_rdd(spatial_rdd, query_envelope) - spatial_rdd = PolygonRDD(spatial_rdd_core.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY) - self.compare_spatial_rdd(spatial_rdd, input_boundary) - spatial_rdd = PolygonRDD() query_window_rdd = PolygonRDD( self.sc, @@ -117,132 +92,13 @@ class TestPolygonRDD(TestBase): self.compare_spatial_rdd(spatial_rdd_core, input_boundary) - query_window_rdd = PolygonRDD( - self.sc, - polygon_rdd_input_location, - polygon_rdd_start_offset, - polygon_rdd_end_offset, - polygon_rdd_splitter, - True, - 5, - StorageLevel.MEMORY_ONLY - ) - - assert query_window_rdd.analyze() - assert query_window_rdd.approximateTotalCount == 3000 - - query_window_rdd = PolygonRDD( - self.sc, - polygon_rdd_input_location, - polygon_rdd_start_offset, - polygon_rdd_end_offset, - polygon_rdd_splitter, - True, - StorageLevel.MEMORY_ONLY - ) - - assert query_window_rdd.analyze() - assert query_window_rdd.approximateTotalCount == 3000 - - spatial_rdd_core = PolygonRDD( - self.sc, - input_location, - splitter, - True, - 5, - StorageLevel.MEMORY_ONLY - ) - - self.compare_spatial_rdd(spatial_rdd_core, input_boundary) - - spatial_rdd_core = PolygonRDD( - self.sc, - input_location, - splitter, - True, - StorageLevel.MEMORY_ONLY - ) - - self.compare_spatial_rdd(spatial_rdd_core, input_boundary) - - spatial_rdd = PolygonRDD( - spatial_rdd_core.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070" - ) - self.compare_spatial_rdd(spatial_rdd, query_envelope) - - query_window_rdd = PolygonRDD( - self.sc, - polygon_rdd_input_location, - polygon_rdd_start_offset, - polygon_rdd_end_offset, - polygon_rdd_splitter, - True, - 5, - StorageLevel.MEMORY_ONLY, - "epsg:4326", - "epsg:5070" - ) - - assert query_window_rdd.analyze() - assert query_window_rdd.approximateTotalCount == 3000 - - query_window_rdd = PolygonRDD( - self.sc, - polygon_rdd_input_location, - polygon_rdd_start_offset, - polygon_rdd_end_offset, - polygon_rdd_splitter, - True, - StorageLevel.MEMORY_ONLY, - "epsg:4326", - "epsg:5070" - ) - - assert query_window_rdd.analyze() - assert query_window_rdd.approximateTotalCount == 3000 - - spatial_rdd_core = PolygonRDD( - self.sc, - input_location, - splitter, - True, - 5, - StorageLevel.MEMORY_ONLY, - "epsg:4326", - "epsg:5070" - ) - - self.compare_spatial_rdd(spatial_rdd_core, query_envelope) - spatial_rdd_core = PolygonRDD( - self.sc, - input_location, - splitter, - True, - StorageLevel.MEMORY_ONLY, - "epsg:4326", - "epsg:5070" - ) - - spatial_rdd_core = PolygonRDD( - sparkContext=self.sc, - InputLocation=input_location, - splitter=splitter, - carryInputData=True, - newLevel=StorageLevel.MEMORY_ONLY, - sourceEpsgCRSCode="epsg:4326", - targetEpsgCode="epsg:5070" - ) - - self.compare_spatial_rdd(spatial_rdd_core, query_envelope) - def test_empty_constructor(self): spatial_rdd = PolygonRDD( sparkContext=self.sc, InputLocation=input_location, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) spatial_rdd.analyze() spatial_rdd.spatialPartitioning(grid_type) @@ -257,8 +113,7 @@ class TestPolygonRDD(TestBase): InputLocation=input_location_geo_json, splitter=FileDataSplitter.GEOJSON, carryInputData=True, - partitions=4, - newLevel=StorageLevel.MEMORY_ONLY + partitions=4 ) spatial_rdd.analyze() assert spatial_rdd.approximateTotalCount == 1001 @@ -272,8 +127,7 @@ class TestPolygonRDD(TestBase): sparkContext=self.sc, InputLocation=input_location_wkt, splitter=FileDataSplitter.WKT, - carryInputData=True, - newLevel=StorageLevel.MEMORY_ONLY + carryInputData=True ) spatial_rdd.analyze() @@ -286,26 +140,13 @@ class TestPolygonRDD(TestBase): sparkContext=self.sc, InputLocation=input_location_wkb, splitter=FileDataSplitter.WKB, - carryInputData=True, - newLevel=StorageLevel.MEMORY_ONLY + carryInputData=True ) spatial_rdd.analyze() assert spatial_rdd.approximateTotalCount == 103 assert spatial_rdd.boundaryEnvelope is not None assert spatial_rdd.rawSpatialRDD.take(1)[0].getUserData() == "31\t039\t00835841\t31039\tCuming\tCuming County\t06\tH1\tG4020\t\t\t\tA\t1477895811\t10447360\t+41.9158651\t-096.7885168" - def test_build_index_without_set_grid(self): - spatial_rdd = PolygonRDD( - self.sc, - input_location, - FileDataSplitter.CSV, - carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY - ) - spatial_rdd.analyze() - spatial_rdd.buildIndex(IndexType.RTREE, False) - def test_mbr(self): polygon_rdd = PolygonRDD( sparkContext=self.sc, diff --git a/python/tests/spatial_rdd/test_rectangle_rdd.py b/python/tests/spatial_rdd/test_rectangle_rdd.py index 308ca798..6c598f8c 100644 --- a/python/tests/spatial_rdd/test_rectangle_rdd.py +++ b/python/tests/spatial_rdd/test_rectangle_rdd.py @@ -18,7 +18,6 @@ import os import pytest -from pyspark import StorageLevel from sedona.core.SpatialRDD import RectangleRDD from sedona.core.enums import IndexType, GridType, FileDataSplitter @@ -50,8 +49,7 @@ class TestRectangleRDD(TestBase): Offset=offset, splitter=splitter, carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=numPartitions ) spatial_rdd.analyze() @@ -65,8 +63,7 @@ class TestRectangleRDD(TestBase): offset, splitter, True, - numPartitions, - StorageLevel.MEMORY_ONLY + numPartitions ) spatial_rdd.analyze() @@ -81,8 +78,7 @@ class TestRectangleRDD(TestBase): Offset=offset, splitter=splitter, carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=numPartitions ) spatial_rdd.analyze() diff --git a/python/tests/spatial_rdd/test_spatial_rdd.py b/python/tests/spatial_rdd/test_spatial_rdd.py index df309c72..37fe3819 100644 --- a/python/tests/spatial_rdd/test_spatial_rdd.py +++ b/python/tests/spatial_rdd/test_spatial_rdd.py @@ -19,7 +19,7 @@ import os import pyspark import pytest -from pyspark import StorageLevel, RDD +from pyspark import RDD from shapely.geometry import Point from sedona.core.SpatialRDD import PointRDD @@ -49,8 +49,7 @@ class TestSpatialRDD(TestBase): Offset=offset, splitter=splitter, carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=numPartitions ) return spatial_rdd @@ -58,21 +57,6 @@ class TestSpatialRDD(TestBase): spatial_rdd = self.create_spatial_rdd() assert spatial_rdd.analyze() - def test_crs_transform(self): - spatial_rdd = PointRDD( - sparkContext=self.sc, - InputLocation=crs_test_point, - Offset=0, - splitter=splitter, - carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY - ) - - spatial_rdd.CRSTransform("epsg:4326", "epsg:3857") - - assert spatial_rdd.rawSpatialRDD.collect()[0].geom.wkt == "POINT (-9833016.710450118 3805934.914254189)" - def test_minimum_bounding_rectangle(self): spatial_rdd = self.create_spatial_rdd() @@ -127,22 +111,6 @@ class TestSpatialRDD(TestBase): except AssertionError: assert geo_json_rdd.fieldNames == ['id', 'zipcode', 'name'] - def test_get_crs_transformation(self): - spatial_rdd = PointRDD( - sparkContext=self.sc, - InputLocation=crs_test_point, - Offset=0, - splitter=splitter, - carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY - ) - - assert not spatial_rdd.getCRStransformation() - spatial_rdd.CRSTransform("epsg:4326", "epsg:3857") - - assert spatial_rdd.getCRStransformation() - def test_get_partitioner(self): spatial_rdd = self.create_spatial_rdd() @@ -171,40 +139,6 @@ class TestSpatialRDD(TestBase): spatial_rdd.setSampleNumber(10) assert spatial_rdd.getSampleNumber() == 10 - def test_get_source_epsg_code(self): - spatial_rdd = PointRDD( - sparkContext=self.sc, - InputLocation=crs_test_point, - Offset=0, - splitter=splitter, - carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY - ) - - assert spatial_rdd.getSourceEpsgCode() == "" - - spatial_rdd.CRSTransform("epsg:4326", "epsg:3857") - - assert spatial_rdd.getSourceEpsgCode() == "epsg:4326" - - def test_get_target_epsg_code(self): - spatial_rdd = PointRDD( - sparkContext=self.sc, - InputLocation=crs_test_point, - Offset=0, - splitter=splitter, - carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY - ) - - assert spatial_rdd.getTargetEpsgCode() == "" - - spatial_rdd.CRSTransform("epsg:4326", "epsg:3857") - - assert spatial_rdd.getTargetEpsgCode() == "epsg:3857" - def test_grids(self): for grid_type in GridType: diff --git a/python/tests/spatial_rdd/test_spatial_rdd_writer.py b/python/tests/spatial_rdd/test_spatial_rdd_writer.py index 32d10641..1685ded8 100644 --- a/python/tests/spatial_rdd/test_spatial_rdd_writer.py +++ b/python/tests/spatial_rdd/test_spatial_rdd_writer.py @@ -19,7 +19,6 @@ import os import shutil import pytest -from pyspark import StorageLevel from sedona.core.SpatialRDD import PointRDD from sedona.core.enums import FileDataSplitter @@ -80,8 +79,7 @@ class TestSpatialRDDWriter(TestBase): Offset=offset, splitter=splitter, carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=numPartitions ) spatial_rdd.saveAsGeoJSON(test_save_as_wkb_with_data) @@ -91,8 +89,7 @@ class TestSpatialRDDWriter(TestBase): InputLocation=test_save_as_wkb_with_data, splitter=FileDataSplitter.GEOJSON, carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=numPartitions ) assert result_wkb.rawSpatialRDD.count() == spatial_rdd.rawSpatialRDD.count() diff --git a/python/tests/sql/test_spatial_rdd_to_spatial_dataframe.py b/python/tests/sql/test_spatial_rdd_to_spatial_dataframe.py index 1b73360c..3d9030ec 100644 --- a/python/tests/sql/test_spatial_rdd_to_spatial_dataframe.py +++ b/python/tests/sql/test_spatial_rdd_to_spatial_dataframe.py @@ -17,7 +17,6 @@ import os -from pyspark import StorageLevel from pyspark.sql.types import StructType, StructField, StringType, IntegerType from sedona.core.SpatialRDD import PointRDD @@ -69,8 +68,7 @@ class TestSpatialRDDToDataFrame(TestBase): Offset=0, splitter=splitter, carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=numPartitions ) raw_spatial_rdd = spatial_rdd.rawSpatialRDD.map( diff --git a/python/tests/test_assign_raw_spatial_rdd.py b/python/tests/test_assign_raw_spatial_rdd.py index 1c288364..9011e93d 100644 --- a/python/tests/test_assign_raw_spatial_rdd.py +++ b/python/tests/test_assign_raw_spatial_rdd.py @@ -18,7 +18,6 @@ from sedona.core.SpatialRDD import PointRDD, CircleRDD from tests.properties.point_properties import input_location, offset, splitter, num_partitions from tests.test_base import TestBase -from pyspark import StorageLevel class TestSpatialRddAssignment(TestBase): @@ -30,8 +29,7 @@ class TestSpatialRddAssignment(TestBase): offset, splitter, True, - num_partitions, - StorageLevel.MEMORY_ONLY + num_partitions ) spatial_rdd.analyze() @@ -51,8 +49,7 @@ class TestSpatialRddAssignment(TestBase): offset, splitter, True, - num_partitions, - StorageLevel.MEMORY_ONLY + num_partitions ) circle_rdd = CircleRDD(point_rdd, 1.0) circle_rdd.analyze() diff --git a/python/tests/test_scala_example.py b/python/tests/test_scala_example.py index 25b1e7b4..fd2d3a24 100644 --- a/python/tests/test_scala_example.py +++ b/python/tests/test_scala_example.py @@ -52,14 +52,14 @@ class TestScalaExample(TestBase): def test_spatial_range_query(self): object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True ) object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY) for _ in range(each_query_loop_times): result_size = RangeQuery.SpatialRangeQuery(object_rdd, range_query_window, False, False).count() object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True ) object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY) for _ in range(each_query_loop_times): @@ -67,7 +67,7 @@ class TestScalaExample(TestBase): def test_spatial_range_query_using_index(self): object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY) + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True) object_rdd.buildIndex(point_rdd_index_type, False) object_rdd.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY) assert object_rdd.indexedRawRDD.is_cached @@ -77,7 +77,7 @@ class TestScalaExample(TestBase): def test_spatial_knn_query(self): object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True ) object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY) @@ -86,7 +86,7 @@ class TestScalaExample(TestBase): def test_spatial_knn_query_using_index(self): object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True ) object_rdd.buildIndex(point_rdd_index_type, False) object_rdd.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY) @@ -100,7 +100,7 @@ class TestScalaExample(TestBase): polygon_rdd_splitter, True ) object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY) + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True) object_rdd.spatialPartitioning(join_query_partitioning_type) query_window_rdd.spatialPartitioning(object_rdd.getPartitioner()) @@ -117,7 +117,7 @@ class TestScalaExample(TestBase): polygon_rdd_end_offset, polygon_rdd_splitter, True ) object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True ) object_rdd.spatialPartitioning(join_query_partitioning_type) @@ -135,7 +135,7 @@ class TestScalaExample(TestBase): def test_distance_join_query(self): object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY) + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True) query_window_rdd = CircleRDD(object_rdd, 0.1) object_rdd.spatialPartitioning(GridType.QUADTREE) @@ -151,7 +151,7 @@ class TestScalaExample(TestBase): def test_distance_join_using_index(self): object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY) + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True) query_window_rdd = CircleRDD(object_rdd, 0.1) @@ -168,26 +168,6 @@ class TestScalaExample(TestBase): for _ in range(each_query_loop_times): result_size = JoinQuery.DistanceJoinQuery(object_rdd, query_window_rdd, True, True).count() - def test_crs_transformation_spatial_range_query(self): - object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY, - "epsg:4326", "epsg:3005") - - object_rdd.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY) - assert object_rdd.rawSpatialRDD.is_cached - for _ in range(each_query_loop_times): - result_size = RangeQuery.SpatialRangeQuery(object_rdd, range_query_window, False, False).count() - assert result_size > -1 - - def test_crs_transformation_spatial_range_query_using_index(self): - object_rdd = PointRDD(self.sc, point_rdd_input_location, point_rdd_offset, - point_rdd_splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005") - object_rdd.buildIndex(point_rdd_index_type, False) - object_rdd.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY) - for _ in range(each_query_loop_times): - result_size = RangeQuery.SpatialRangeQuery(object_rdd, range_query_window, False, True).count() - assert result_size > -1 - def test_indexed_rdd_assignment(self): object_rdd = PointRDD( self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True) diff --git a/python/tests/utils/test_crs_transformation.py b/python/tests/utils/test_crs_transformation.py index 71b638b3..d43cc0ef 100644 --- a/python/tests/utils/test_crs_transformation.py +++ b/python/tests/utils/test_crs_transformation.py @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -from pyspark import StorageLevel - from sedona.core.SpatialRDD import PointRDD, PolygonRDD, CircleRDD from sedona.core.enums import GridType from sedona.core.geom.circle import Circle @@ -35,11 +33,9 @@ class TestCrsTransformation(TestBase): input_location, offset, splitter, - True, - StorageLevel.MEMORY_ONLY, - "epsg:4326", - "epsg:3005" + True ) + spatial_rdd.CRSTransform("epsg:4326", "epsg:3005") for i in range(loop_times): result_size = RangeQuery.SpatialRangeQuery(spatial_rdd, query_envelope, False, False).count() @@ -47,115 +43,3 @@ class TestCrsTransformation(TestBase): assert RangeQuery.SpatialRangeQuery( spatial_rdd, query_envelope, False, False).take(10)[1].getUserData() is not None - - def test_spatial_range_query_using_index(self): - spatial_rdd = PointRDD( - self.sc, - input_location, - offset, - splitter, - True, - StorageLevel.MEMORY_ONLY, - "epsg:4326", - "epsg:3005" - ) - spatial_rdd.buildIndex(IndexType.RTREE, False) - - for i in range(loop_times): - result_size = RangeQuery.SpatialRangeQuery(spatial_rdd, query_envelope, False, False).count() - assert result_size == 3127 - - assert RangeQuery.SpatialRangeQuery( - spatial_rdd, query_envelope, False, False).take(10)[1].getUserData() is not None - - def test_spatial_knn_query(self): - point_rdd = PointRDD( - self.sc, input_location, offset, splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005" - ) - - for i in range(loop_times): - result = KNNQuery.SpatialKnnQuery(point_rdd, query_point, top_k, False) - assert result.__len__() > 0 - assert result[0].getUserData() is not None - - def test_spatial_knn_query_using_index(self): - point_rdd = PointRDD( - self.sc, input_location, offset, splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005" - ) - point_rdd.buildIndex(IndexType.RTREE, False) - - for i in range(loop_times): - result = KNNQuery.SpatialKnnQuery(point_rdd, query_point, top_k, False) - assert result.__len__() > 0 - assert result[0].getUserData() is not None - - def test_spatial_knn_correctness(self): - point_rdd = PointRDD( - self.sc, input_location, offset, splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005" - ) - result_no_index = KNNQuery.SpatialKnnQuery(point_rdd, query_point, top_k, False) - point_rdd.buildIndex(IndexType.RTREE, False) - result_with_index = KNNQuery.SpatialKnnQuery(point_rdd, query_point, top_k, True) - - sorted_result_no_index = sorted(result_no_index, key=lambda geo_data: distance_sorting_functions( - geo_data, query_point)) - - sorted_result_with_index = sorted(result_with_index, key=lambda geo_data: distance_sorting_functions( - geo_data, query_point)) - - difference = 0 - for x in range(top_k): - difference += sorted_result_no_index[x].geom.distance(sorted_result_with_index[x].geom) - - assert difference == 0 - - def test_spatial_join_query_with_polygon_rdd(self): - query_rdd = PolygonRDD( - self.sc, input_location_query_polygon, splitter, True, - num_partitions, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005" - ) - - spatial_rdd = PointRDD( - self.sc, input_location, offset, splitter, True, num_partitions, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005" - ) - spatial_rdd.spatialPartitioning(grid_type) - query_rdd.spatialPartitioning(spatial_rdd.getPartitioner()) - - result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False, True).collect() - assert result[1][0].getUserData() is not None - - for data in result: - if data[1].__len__() != 0: - for right_data in data[1]: - assert right_data.getUserData() is not None - - def test_spatial_join_query_with_polygon_rdd_using_index(self): - query_rdd = PolygonRDD( - self.sc, input_location_query_polygon, splitter, True, - num_partitions, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005" - ) - - spatial_rdd = PointRDD( - self.sc, - input_location, offset, splitter, True, num_partitions, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005" - ) - - query_rdd.analyze() - spatial_rdd.analyze() - - spatial_rdd.spatialPartitioning(grid_type) - - spatial_rdd.buildIndex(IndexType.RTREE, True) - - query_rdd.spatialPartitioning(spatial_rdd.getPartitioner()) - - result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False, True).collect() - - assert result[1][0].getUserData() is not None - - for data in result: - if data[1].__len__() != 0: - for right_data in data[1]: - assert right_data.getUserData() is not None
