HI

Which carbondata+spark version?   and can you provide the full configuration
inside "carbondata.properties"


Mick Yuan wrote
> Hi,all
>   I have a quering performane tuning case on carbondata.
> 
>   *Enviroment is as below:*:
>   spark on yarn 
>   4 nodemanagers
>   102G,55 cores each nodemanager
> 
>   *Spark properties:*
> spark.master yarn
> spark.submit.deployMode client
> spark.serializer org.apache.spark.serializer.KryoSerializer
> spark.driver.memory 2g
> spark.executor.memory 90g
> spark.executor.cores 54
> spark.sql.hive.convertmetastoreParquet false
> spark.executor.instances 10
> spark.sql.shuffle.partitions 432
> spark.speculation true
>   *Carbondata properties:*
> carbon.storelocation=hdfs://hacluster/Opt/CarbonStore
> carbon.ddl.base.hdfs.url=hdfs://hacluster/opt/data
> carbon.badRecords.location=/opt/Carbon/Spark/badrecords
> carbon.sort.file.buffer.size=80
> carbon.graph.rowset.size=100000
> carbon.number.of.cores.while.loading=24
> carbon.sort.size=1000000
> carbon.enableXXHash=true
> carbon.number.of.cores.while.alterPartition=2
> carbon.number.of.cores.while.compacting=2
> carbon.compaction.level.threshold=4,3
> carbon.major.compaction.size=1024
> carbon.number.of.cores=4
> carbon.inmemory.record.size=120000
> 
>   *Create table sql:*
> CREATE TABLE
>     yuan_yuan10_DATE_DIM
>     (
>         D_YEAR string,
>         D_MOY string,
>         D_DATE_SK string,
>         D_DATE_ID string,
>         D_DATE string,
>         D_MONTH_SEQ string,
>         D_WEEK_SEQ string,
>         D_QUARTER_SEQ string,
>         D_DOW string,
>         D_DOM string,
>         D_QOY string,
>         D_FY_YEAR string,
>         D_FY_QUARTER_SEQ string,
>         D_FY_WEEK_SEQ string,
>         D_DAY_NAME string,
>         D_QUARTER_NAME string,
>         D_HOLIDAY string,
>         D_WEEKEND string,
>         D_FOLLOWING_HOLIDAY string,
>         D_FIRST_DOM string,
>         D_LAST_DOM string,
>         D_SAME_DAY_LY string,
>         D_SAME_DAY_LQ string,
>         D_CURRENT_DAY string,
>         D_CURRENT_WEEK string,
>         D_CURRENT_MONTH string,
>         D_CURRENT_QUARTER string,
>         D_CURRENT_YEAR string,
>         REMARK string,
>         HYREN_S_DATE string,
>         HYREN_E_DATE string,
>         HYREN_MD5_VAL string
>     )
>     stored BY 'carbondata' TBLPROPERTIES
>     (
>         'TABLE_BLOCKSIZE'='256',
>         'SORT_COLUMNS'='D_YEAR,D_MOY,D_DATE_SK',
>         'DICTIONARY_INCLUDE'='D_MOY,D_YEAR,D_DATE_SK'
>     );
> 
> 
> 
> CREATE TABLE
>     yuan_yuan10_STORE_SALES
>     (
>         SS_SOLD_DATE_SK string,
>         SS_ITEM_SK string,
>         SS_CUSTOMER_SK string,
>         SS_STORE_SK string,
>         SS_SOLD_TIME_SK string,
>         SS_CDEMO_SK string,
>         SS_HDEMO_SK string,
>         SS_ADDR_SK string,
>         SS_PROMO_SK string,
>         SS_TICKET_NUMBER string,
>         SS_QUANTITY string,
>         SS_WHOLESALE_COST string,
>         SS_LIST_PRICE string,
>         SS_SALES_PRICE string,
>         SS_EXT_DISCOUNT_AMT string,
>         SS_EXT_SALES_PRICE string,
>         SS_EXT_WHOLESALE_COST string,
>         SS_EXT_LIST_PRICE string,
>         SS_EXT_TAX string,
>         SS_COUPON_AMT string,
>         SS_NET_PAID string,
>         SS_NET_PAID_INC_TAX string,
>         SS_NET_PROFIT string,
>         REMARK string,
>         HYREN_S_DATE string,
>         HYREN_E_DATE string,
>         HYREN_MD5_VAL string
>     )
>     stored BY 'carbondata' TBLPROPERTIES
>     (
>         'TABLE_BLOCKSIZE'='256',
>         'SORT_COLUMNS'='SS_SOLD_DATE_SK,SS_ITEM_SK,SS_CUSTOMER_SK',
>         'DICTIONARY_INCLUDE'='SS_SOLD_DATE_SK,SS_ITEM_SK,SS_CUSTOMER_SK'
>     );
> 
> 
> CREATE TABLE
>     yuan_yuan10_ITEM
>     (
>         I_MANAGER_ID string,
>         I_ITEM_SK string,
>         I_ITEM_ID string,
>         I_REC_START_DATE string,
>         I_REC_END_DATE string,
>         I_ITEM_DESC string,
>         I_CURRENT_PRICE string,
>         I_WHOLESALE_COST string,
>         I_BRAND_ID string,
>         I_BRAND string,
>         I_CLASS_ID string,
>         I_CLASS string,
>         I_CATEGORY_ID string,
>         I_CATEGORY string,
>         I_MANUFACT_ID string,
>         I_MANUFACT string,
>         I_SIZE string,
>         I_FORMULATION string,
>         I_COLOR string,
>         I_UNITS string,
>         I_CONTAINER string,
>         I_PRODUCT_NAME string,
>         REMARK string,
>         HYREN_S_DATE string,
>         HYREN_E_DATE string,
>         HYREN_MD5_VAL string
>     )
>     stored BY 'carbondata' TBLPROPERTIES
>     (
>         'TABLE_BLOCKSIZE'='256',
>         'SORT_COLUMNS'='I_MANAGER_ID,I_ITEM_SK',
>         'DICTIONARY_INCLUDE'='I_MANAGER_ID,I_ITEM_SK'
>     );
> 
> 
> CREATE TABLE
>     yuan_yuan10_CUSTOMER
>     (
>         C_CURRENT_ADDR_SK string,
>         C_CUSTOMER_SK string,
>         C_CUSTOMER_ID string,
>         C_CURRENT_CDEMO_SK string,
>         C_CURRENT_HDEMO_SK string,
>         C_FIRST_SHIPTO_DATE_SK string,
>         C_FIRST_SALES_DATE_SK string,
>         C_SALUTATION string,
>         C_FIRST_NAME string,
>         C_LAST_NAME string,
>         C_PREFERRED_CUST_FLAG string,
>         C_BIRTH_DAY string,
>         C_BIRTH_MONTH string,
>         C_BIRTH_YEAR string,
>         C_BIRTH_COUNTRY string,
>         C_LOGIN string,
>         C_EMAIL_ADDRESS string,
>         C_LAST_REVIEW_DATE string,
>         REMRK string,
>         HYREN_S_DATE string,
>         HYREN_E_DATE string,
>         HYREN_MD5_VAL string
>     )
>     stored BY 'carbondata' TBLPROPERTIES
>     (
>         'TABLE_BLOCKSIZE'='256',
>         'SORT_COLUMNS'='C_CURRENT_ADDR_SK,C_CUSTOMER_SK',
>         'DICTIONARY_INCLUDE'='C_CURRENT_ADDR_SK,C_CUSTOMER_SK'
>     );
> 
> 
> 
> CREATE TABLE
>     yuan_yuan10_CUSTOMER_ADDRESS
>     (
>         CA_ZIP string,
>         CA_ADDRESS_SK string,
>         CA_ADDRESS_ID string,
>         CA_STREET_NUMBER string,
>         CA_STREET_NAME string,
>         CA_STREET_TYPE string,
>         CA_SUITE_NUMBER string,
>         CA_CITY string,
>         CA_COUNTY string,
>         CA_STATE string,
>         CA_COUNTRY string,
>         CA_GMT_OFFSET string,
>         CA_LOCATION_TYPE string,
>         REMARK string,
>         HYREN_S_DATE string,
>         HYREN_E_DATE string,
>         HYREN_MD5_VAL string
>     )
>     stored BY 'carbondata' TBLPROPERTIES
>     (
>         'TABLE_BLOCKSIZE'='256',
>         'SORT_COLUMNS'='CA_ZIP,CA_ADDRESS_SK',
>         'DICTIONARY_INCLUDE'='CA_ZIP,CA_ADDRESS_SK'
>     );
> 
> 
> CREATE TABLE
>     yuan_yuan10_STORE
>     (
>         S_ZIP string,
>         S_STORE_SK string,
>         S_STORE_ID string,
>         S_REC_START_DATE string,
>         S_REC_END_DATE string,
>         S_CLOSED_DATE_SK string,
>         S_STORE_NAME string,
>         S_NUMBER_EMPLOYEES string,
>         S_FLOOR_SPACE string,
>         S_HOURS string,
>         S_MANAGER string,
>         S_MARKET_ID string,
>         S_GEOGRAPHY_CLASS string,
>         S_MARKET_DESC string,
>         S_MARKET_MANAGER string,
>         S_DIVISION_ID string,
>         S_DIVISION_NAME string,
>         S_COMPANY_ID string,
>         S_COMPANY_NAME string,
>         S_STREET_NUMBER string,
>         S_STREET_NAME string,
>         S_STREET_TYPE string,
>         S_SUITE_NUMBER string,
>         S_CITY string,
>         S_COUNTY string,
>         S_STATE string,
>         S_COUNTRY string,
>         S_GMT_OFFSET string,
>         S_TAX_PRECENTAGE string,
>         REMRK string,
>         HYREN_S_DATE string,
>         HYREN_E_DATE string,
>         HYREN_MD5_VAL string
>     )
>     stored BY 'carbondata' TBLPROPERTIES
>     (
>         'TABLE_BLOCKSIZE'='256',
>         'SORT_COLUMNS'='S_ZIP,S_STORE_SK',
>         'DICTIONARY_INCLUDE'='S_ZIP,S_STORE_SK'
>     );
> 
> *Query sql:*
> SELECT
>     i_brand_id brand_id,
>     i_brand    brand,
>     i_manufact_id,
>     i_manufact,
>     SUM(ss_ext_sales_price) ext_price
> FROM
>     yuan_yuan10_date_dim,
>     yuan_yuan10_store_sales,
>     yuan_yuan10_item,
>     yuan_yuan10_customer,
>     yuan_yuan10_customer_address,
>     yuan_yuan10_store
> WHERE
>     d_date_sk = ss_sold_date_sk
> AND ss_item_sk = i_item_sk
> AND i_manager_id = 7
> AND d_moy = 11
> AND d_year = 1999
> AND ss_customer_sk = c_customer_sk
> AND c_current_addr_sk = ca_address_sk
> AND SUBSTR(ca_zip, 1, 5) <> SUBSTR(s_zip, 1, 5)
> AND ss_store_sk = s_store_sk
> AND ss_sold_date_sk BETWEEN 2451484 AND 2451513 -- partition key filter
> GROUP BY
>     i_brand,
>     i_brand_id,
>     i_manufact_id,
>     i_manufact
> ORDER BY
>     ext_price DESC,
>     i_brand,
>     i_brand_id,
>     i_manufact_id,
>     i_manufact limit 100;
> 
> *Table records:*
> all about 171G
> yuan_yuan10_date_dim
>  73049
>  yuan_yuan10_store_sales
>  1439977468
>  yuan_yuan10_item
>  28000
>  yuan_yuan10_customer
>  2100000
>  yuan_yuan10_customer_address
>  1050000
>  yuan_yuan10_store
>  68
> 
> The query statement takes at least 6s,how can I reduce the time to 2s?
> 
> 
> 
> 
>   
>   
> 
> 
> 
> --
> Sent from:
> http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/





--
Sent from: 
http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/

Reply via email to