大家好:
目前在使用中遇到一个kylin查询慢的场景:
Cube  设计如下:
{
  "uuid": "dfb77a08-f51d-4088-b559-1da67c28a068",
  "last_modified": 1486997875953,
  "version": "1.6.0",
  "name": "insu_t",
  "model_name": "insu_jdmall_model_test",
  "description": "",
  "null_string": null,
  "dimensions": [
    {
      "name": "BRAND",
      "table": "DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D",
      "column": "BRAND_CD",
      "derived": null
    },
    {
      "name": "DT",
      "table": "DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D",
      "column": "DT",
      "derived": null
    },
    {
      "name": "DIM.DIM_DAY_DERIVED",
      "table": "DIM.DIM_DAY",
      "column": null,
      "derived": [
        "DIM_DAY_NAME",
        "DIM_WEEK_NAME",
        "DIM_MONTH_NAME"
      ]
    },
    {
      "name": "FIRST",
     "table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
      "column": "ITEM_FIRST_CATE_NAME",
      "derived": null
    },
    {
      "name": "SECOND",
      "table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
      "column": "ITEM_SECOND_CATE_NAME",
      "derived": null
    },
    {
      "name": "THIRD",
      "table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
      "column": "ITEM_THIRD_CATE_NAME",
      "derived": null
    }
  ],
  "measures": [
    {
      "name": "_COUNT_",
      "function": {
        "expression": "COUNT",
        "parameter": {
          "type": "constant",
          "value": "1",
          "next_parameter": null
        },
        "returntype": "bigint"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "QTTY",
      "function": {
        "expression": "SUM",
        "parameter": {
          "type": "column",
          "value": "SALE_QTTY",
          "next_parameter": null
        },
        "returntype": "bigint"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "BEFORE",
      "function": {
        "expression": "SUM",
        "parameter": {
          "type": "column",
          "value": "BEFORE_PREFR_AMOUNT",
          "next_parameter": null
        },
        "returntype": "decimal(25,4)"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "USER",
      "function": {
        "expression": "SUM",
        "parameter": {
          "type": "column",
          "value": "USER_ACTUAL_PAY_AMOUNT",
          "next_parameter": null
        },
       "returntype": "decimal(25,4)"
      },
      "dependent_measure_ref": null
    },
    {
      "name": "SALE",
      "function": {
        "expression": "COUNT_DISTINCT",
        "parameter": {
          "type": "column",
          "value": "SALE_ORD_ID",
          "next_parameter": null
        },
        "returntype": "bitmap"
      },
      "dependent_measure_ref": null
    }
  ],
  "dictionaries": [],
  "rowkey": {
    "rowkey_columns": [
      {
        "column": "BRAND_CD",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "DT",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "ITEM_FIRST_CATE_NAME",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "ITEM_SECOND_CATE_NAME",
        "encoding": "dict",
        "isShardBy": false
      },
      {
        "column": "ITEM_THIRD_CATE_NAME",
        "encoding": "dict",
        "isShardBy": false
      }
    ]
  },
  "hbase_mapping": {
    "column_family": [
      {
        "name": "F1",
        "columns": [
          {
            "qualifier": "M",
            "measure_refs": [
              "_COUNT_",
              "QTTY",
              "BEFORE",
              "USER"
            ]
          }
        ]
      },
      {
        "name": "F2",
        "columns": [
          {
            "qualifier": "M",
            "measure_refs": [
              "SALE"
            ]
          }
        ]
      }
    ]
  },
  "aggregation_groups": [
    {
      "includes": [
        "BRAND_CD",
        "DT",
        "ITEM_FIRST_CATE_NAME",
        "ITEM_SECOND_CATE_NAME",
        "ITEM_THIRD_CATE_NAME"
      ],
      "select_rule": {
        "hierarchy_dims": [],
        "mandatory_dims": [],
        "joint_dims": []
      }
    }
  ],
  "signature": "Kl5sPTVN78bEYTGKoUOsWg==",
  "notify_list": [],
  "status_need_notify": [
    "ERROR",
    "DISCARDED",
    "SUCCEED"
  ],
  "partition_date_start": 1483747200000,
  "partition_date_end": 3153600000000,
  "auto_merge_time_ranges": [
    604800000,
    2419200000
  ],
  "retention_range": 0,
  "engine_type": 2,
  "storage_type": 2,
  "override_kylin_properties": {
    "kylin.hbase.region.cut": "1"
  }
}

数据量是14天的数据,sale_ord_id的基数是1.5亿
Select dt,item_second_cate_name,count(distinct sale_ord_id),sum(sale_qtty)
from DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D a
left join dim.dim_day b on a.dt = b.dim_day_txdate
left join DIM.DIM_ITEM_GEN_THIRD_CATE_D c on a.item_third_cate_cd = 
c.item_third_cate_id
group by dt,item_second_cate_name;

这条语句执行时间是37秒,去掉count(distinct sale_ord_id)后查询时0.07秒
dt,item_second_cate_name 都是normal维度,从结果看是count_distinct度量导致的慢查询,请问这个有什么优化的建议吗?

谢谢!



Reply via email to