Github user dbtsai commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22418#discussion_r218272427
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
 ---
    @@ -50,6 +55,66 @@ abstract class OrcSuite extends OrcTest with 
BeforeAndAfterAll {
           .createOrReplaceTempView("orc_temp_table")
       }
     
    +  protected def testBloomFilterCreation(bloomFilterKind: Kind) {
    +    val tableName = "bloomFilter"
    +
    +    withTempDir { dir =>
    +      withTable(tableName) {
    +        val sqlStatement = orcImp match {
    +          case "native" =>
    +            s"""
    +               |CREATE TABLE $tableName (a INT, b STRING)
    +               |USING ORC
    +               |OPTIONS (
    +               |  path '${dir.toURI}',
    +               |  orc.bloom.filter.columns '*',
    +               |  orc.bloom.filter.fpp 0.1
    +               |)
    +            """.stripMargin
    +          case "hive" =>
    +            s"""
    +               |CREATE TABLE $tableName (a INT, b STRING)
    +               |STORED AS ORC
    +               |LOCATION '${dir.toURI}'
    +               |TBLPROPERTIES (
    +               |  orc.bloom.filter.columns='*',
    +               |  orc.bloom.filter.fpp=0.1
    +               |)
    +            """.stripMargin
    +          case impl =>
    +            throw new UnsupportedOperationException(s"Unknown ORC 
implementation: $impl")
    +        }
    +
    +        sql(sqlStatement)
    +        sql(s"INSERT INTO $tableName VALUES (1, 'str')")
    +
    +        val partFiles = dir.listFiles()
    +          .filter(f => f.isFile && !f.getName.startsWith(".") && 
!f.getName.startsWith("_"))
    +        assert(partFiles.length === 1)
    +
    +        val orcFilePath = new Path(partFiles.head.getAbsolutePath)
    +        val readerOptions = OrcFile.readerOptions(new Configuration())
    +        val reader = OrcFile.createReader(orcFilePath, readerOptions)
    +        var recordReader: RecordReaderImpl = null
    +        try {
    +          recordReader = reader.rows.asInstanceOf[RecordReaderImpl]
    +
    +          // BloomFilter array is created for all types; `struct`, int 
(`a`), string (`b`)
    +          val sargColumns = Array(true, true, true)
    +          val orcIndex = recordReader.readRowIndex(0, null, sargColumns)
    +
    +          // Check the types and counts of bloom filters
    +          assert(orcIndex.getBloomFilterKinds.forall(_ === 
bloomFilterKind))
    --- End diff --
    
    Something like
    
    ```
    == Physical Plan ==
    *(1) Project [_1#3]
    +- *(1) Filter (isnotnull(_1#3) && (_1#3._1 = true))
       +- *(1) FileScan parquet [_1#3] Batched: false, Format: Orc, 
      PushedFilters: [IsNotNull(_1), EqualTo(_1._1,true)]
      BloomFilters: [some information]
    ```
    
    Thanks.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to