HI Which carbondata+spark version? and can you provide the full configuration inside "carbondata.properties"
Mick Yuan wrote > Hi,all > I have a quering performane tuning case on carbondata. > > *Enviroment is as below:*: > spark on yarn > 4 nodemanagers > 102G,55 cores each nodemanager > > *Spark properties:* > spark.master yarn > spark.submit.deployMode client > spark.serializer org.apache.spark.serializer.KryoSerializer > spark.driver.memory 2g > spark.executor.memory 90g > spark.executor.cores 54 > spark.sql.hive.convertmetastoreParquet false > spark.executor.instances 10 > spark.sql.shuffle.partitions 432 > spark.speculation true > *Carbondata properties:* > carbon.storelocation=hdfs://hacluster/Opt/CarbonStore > carbon.ddl.base.hdfs.url=hdfs://hacluster/opt/data > carbon.badRecords.location=/opt/Carbon/Spark/badrecords > carbon.sort.file.buffer.size=80 > carbon.graph.rowset.size=100000 > carbon.number.of.cores.while.loading=24 > carbon.sort.size=1000000 > carbon.enableXXHash=true > carbon.number.of.cores.while.alterPartition=2 > carbon.number.of.cores.while.compacting=2 > carbon.compaction.level.threshold=4,3 > carbon.major.compaction.size=1024 > carbon.number.of.cores=4 > carbon.inmemory.record.size=120000 > > *Create table sql:* > CREATE TABLE > yuan_yuan10_DATE_DIM > ( > D_YEAR string, > D_MOY string, > D_DATE_SK string, > D_DATE_ID string, > D_DATE string, > D_MONTH_SEQ string, > D_WEEK_SEQ string, > D_QUARTER_SEQ string, > D_DOW string, > D_DOM string, > D_QOY string, > D_FY_YEAR string, > D_FY_QUARTER_SEQ string, > D_FY_WEEK_SEQ string, > D_DAY_NAME string, > D_QUARTER_NAME string, > D_HOLIDAY string, > D_WEEKEND string, > D_FOLLOWING_HOLIDAY string, > D_FIRST_DOM string, > D_LAST_DOM string, > D_SAME_DAY_LY string, > D_SAME_DAY_LQ string, > D_CURRENT_DAY string, > D_CURRENT_WEEK string, > D_CURRENT_MONTH string, > D_CURRENT_QUARTER string, > D_CURRENT_YEAR string, > REMARK string, > HYREN_S_DATE string, > HYREN_E_DATE string, > HYREN_MD5_VAL string > ) > stored BY 'carbondata' TBLPROPERTIES > ( > 'TABLE_BLOCKSIZE'='256', > 'SORT_COLUMNS'='D_YEAR,D_MOY,D_DATE_SK', > 'DICTIONARY_INCLUDE'='D_MOY,D_YEAR,D_DATE_SK' > ); > > > > CREATE TABLE > yuan_yuan10_STORE_SALES > ( > SS_SOLD_DATE_SK string, > SS_ITEM_SK string, > SS_CUSTOMER_SK string, > SS_STORE_SK string, > SS_SOLD_TIME_SK string, > SS_CDEMO_SK string, > SS_HDEMO_SK string, > SS_ADDR_SK string, > SS_PROMO_SK string, > SS_TICKET_NUMBER string, > SS_QUANTITY string, > SS_WHOLESALE_COST string, > SS_LIST_PRICE string, > SS_SALES_PRICE string, > SS_EXT_DISCOUNT_AMT string, > SS_EXT_SALES_PRICE string, > SS_EXT_WHOLESALE_COST string, > SS_EXT_LIST_PRICE string, > SS_EXT_TAX string, > SS_COUPON_AMT string, > SS_NET_PAID string, > SS_NET_PAID_INC_TAX string, > SS_NET_PROFIT string, > REMARK string, > HYREN_S_DATE string, > HYREN_E_DATE string, > HYREN_MD5_VAL string > ) > stored BY 'carbondata' TBLPROPERTIES > ( > 'TABLE_BLOCKSIZE'='256', > 'SORT_COLUMNS'='SS_SOLD_DATE_SK,SS_ITEM_SK,SS_CUSTOMER_SK', > 'DICTIONARY_INCLUDE'='SS_SOLD_DATE_SK,SS_ITEM_SK,SS_CUSTOMER_SK' > ); > > > CREATE TABLE > yuan_yuan10_ITEM > ( > I_MANAGER_ID string, > I_ITEM_SK string, > I_ITEM_ID string, > I_REC_START_DATE string, > I_REC_END_DATE string, > I_ITEM_DESC string, > I_CURRENT_PRICE string, > I_WHOLESALE_COST string, > I_BRAND_ID string, > I_BRAND string, > I_CLASS_ID string, > I_CLASS string, > I_CATEGORY_ID string, > I_CATEGORY string, > I_MANUFACT_ID string, > I_MANUFACT string, > I_SIZE string, > I_FORMULATION string, > I_COLOR string, > I_UNITS string, > I_CONTAINER string, > I_PRODUCT_NAME string, > REMARK string, > HYREN_S_DATE string, > HYREN_E_DATE string, > HYREN_MD5_VAL string > ) > stored BY 'carbondata' TBLPROPERTIES > ( > 'TABLE_BLOCKSIZE'='256', > 'SORT_COLUMNS'='I_MANAGER_ID,I_ITEM_SK', > 'DICTIONARY_INCLUDE'='I_MANAGER_ID,I_ITEM_SK' > ); > > > CREATE TABLE > yuan_yuan10_CUSTOMER > ( > C_CURRENT_ADDR_SK string, > C_CUSTOMER_SK string, > C_CUSTOMER_ID string, > C_CURRENT_CDEMO_SK string, > C_CURRENT_HDEMO_SK string, > C_FIRST_SHIPTO_DATE_SK string, > C_FIRST_SALES_DATE_SK string, > C_SALUTATION string, > C_FIRST_NAME string, > C_LAST_NAME string, > C_PREFERRED_CUST_FLAG string, > C_BIRTH_DAY string, > C_BIRTH_MONTH string, > C_BIRTH_YEAR string, > C_BIRTH_COUNTRY string, > C_LOGIN string, > C_EMAIL_ADDRESS string, > C_LAST_REVIEW_DATE string, > REMRK string, > HYREN_S_DATE string, > HYREN_E_DATE string, > HYREN_MD5_VAL string > ) > stored BY 'carbondata' TBLPROPERTIES > ( > 'TABLE_BLOCKSIZE'='256', > 'SORT_COLUMNS'='C_CURRENT_ADDR_SK,C_CUSTOMER_SK', > 'DICTIONARY_INCLUDE'='C_CURRENT_ADDR_SK,C_CUSTOMER_SK' > ); > > > > CREATE TABLE > yuan_yuan10_CUSTOMER_ADDRESS > ( > CA_ZIP string, > CA_ADDRESS_SK string, > CA_ADDRESS_ID string, > CA_STREET_NUMBER string, > CA_STREET_NAME string, > CA_STREET_TYPE string, > CA_SUITE_NUMBER string, > CA_CITY string, > CA_COUNTY string, > CA_STATE string, > CA_COUNTRY string, > CA_GMT_OFFSET string, > CA_LOCATION_TYPE string, > REMARK string, > HYREN_S_DATE string, > HYREN_E_DATE string, > HYREN_MD5_VAL string > ) > stored BY 'carbondata' TBLPROPERTIES > ( > 'TABLE_BLOCKSIZE'='256', > 'SORT_COLUMNS'='CA_ZIP,CA_ADDRESS_SK', > 'DICTIONARY_INCLUDE'='CA_ZIP,CA_ADDRESS_SK' > ); > > > CREATE TABLE > yuan_yuan10_STORE > ( > S_ZIP string, > S_STORE_SK string, > S_STORE_ID string, > S_REC_START_DATE string, > S_REC_END_DATE string, > S_CLOSED_DATE_SK string, > S_STORE_NAME string, > S_NUMBER_EMPLOYEES string, > S_FLOOR_SPACE string, > S_HOURS string, > S_MANAGER string, > S_MARKET_ID string, > S_GEOGRAPHY_CLASS string, > S_MARKET_DESC string, > S_MARKET_MANAGER string, > S_DIVISION_ID string, > S_DIVISION_NAME string, > S_COMPANY_ID string, > S_COMPANY_NAME string, > S_STREET_NUMBER string, > S_STREET_NAME string, > S_STREET_TYPE string, > S_SUITE_NUMBER string, > S_CITY string, > S_COUNTY string, > S_STATE string, > S_COUNTRY string, > S_GMT_OFFSET string, > S_TAX_PRECENTAGE string, > REMRK string, > HYREN_S_DATE string, > HYREN_E_DATE string, > HYREN_MD5_VAL string > ) > stored BY 'carbondata' TBLPROPERTIES > ( > 'TABLE_BLOCKSIZE'='256', > 'SORT_COLUMNS'='S_ZIP,S_STORE_SK', > 'DICTIONARY_INCLUDE'='S_ZIP,S_STORE_SK' > ); > > *Query sql:* > SELECT > i_brand_id brand_id, > i_brand brand, > i_manufact_id, > i_manufact, > SUM(ss_ext_sales_price) ext_price > FROM > yuan_yuan10_date_dim, > yuan_yuan10_store_sales, > yuan_yuan10_item, > yuan_yuan10_customer, > yuan_yuan10_customer_address, > yuan_yuan10_store > WHERE > d_date_sk = ss_sold_date_sk > AND ss_item_sk = i_item_sk > AND i_manager_id = 7 > AND d_moy = 11 > AND d_year = 1999 > AND ss_customer_sk = c_customer_sk > AND c_current_addr_sk = ca_address_sk > AND SUBSTR(ca_zip, 1, 5) <> SUBSTR(s_zip, 1, 5) > AND ss_store_sk = s_store_sk > AND ss_sold_date_sk BETWEEN 2451484 AND 2451513 -- partition key filter > GROUP BY > i_brand, > i_brand_id, > i_manufact_id, > i_manufact > ORDER BY > ext_price DESC, > i_brand, > i_brand_id, > i_manufact_id, > i_manufact limit 100; > > *Table records:* > all about 171G > yuan_yuan10_date_dim > 73049 > yuan_yuan10_store_sales > 1439977468 > yuan_yuan10_item > 28000 > yuan_yuan10_customer > 2100000 > yuan_yuan10_customer_address > 1050000 > yuan_yuan10_store > 68 > > The query statement takes at least 6s,how can I reduce the time to 2s? > > > > > > > > > > -- > Sent from: > http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/