...
Info | ||
---|---|---|
| ||
Column statistics are introduced in Hive 0.10.0 by HIVE-1362. This is the design document. Column statistics auto gather is introduced in Hive 2.3 by HIVE-11160. This is also the design document. |
...
Please note that table and column aliases are not supported in the analyze statement.
To view column stats :
describe formatted [table_name] [column_name];
Metastore Schema
To persist column level statistics, we propose to add the following new tables,
...
LOW_VALUE RAW,
HIGH_VALUE RAW,
NUM_NULLS BIGINT,
NUM_DISTINCTS BIGINT,
BIT_VECTOR, BLOB, /* introduced in HIVE-16997 in Hive 3.0.0 */
AVG_COL_LEN DOUBLE,
MAX_COL_LEN BIGINT,
NUM_TRUES BIGINT,
NUM_FALSES BIGINT,
LAST_ANALYZED BIGINT NOT NULL)
...
LOW_VALUE RAW,
HIGH_VALUE RAW,
NUM_NULLS BIGINT,
NUM_DISTINCTS BIGINT,
BIT_VECTOR, BLOB, /* introduced in HIVE-16997 in Hive 3.0.0 */
AVG_COL_LEN DOUBLE,
MAX_COL_LEN BIGINT,
NUM_TRUES BIGINT,
NUM_FALSES BIGINT,
LAST_ANALYZED BIGINT NOT NULL)
...
struct DoubleColumnStatsData {
1: required double lowValue,
2: required double highValue,
3: required i64 numNulls,
4: required i64 numDVs,
5: optional string bitVectors
}
struct LongColumnStatsData {
1: required i64 lowValue,
2: required i64 highValue,
3: required i64 numNulls,
4: required i64 numDVs,
5: optional string bitVectors
}
struct StringColumnStatsData {
1: required i64 maxColLen,
2: required double avgColLen,
3: required i64 numNulls,
4: required i64 numDVs,
5: optional string bitVectors
}
struct BinaryColumnStatsData {
1: required i64 maxColLen,
2: required double avgColLen,
3: required i64 numNulls
}
struct Decimal {
1: required binary unscaled,
3: required i16 scale
}
struct DecimalColumnStatsData {
1: optional Decimal lowValue,
2: optional Decimal highValue,
3: required i64 numNulls,
4: required i64 numDVs,
5: optional string bitVectors
}
struct Date {
1: required i64 daysSinceEpoch
}
struct DateColumnStatsData {
1: optional Date lowValue,
2: optional Date highValue,
3: required i64 numNulls,
4: required i64 numDVs,
5: optional string bitVectors
}
union ColumnStatisticsData {
1: BooleanColumnStatsData booleanStats,
2: LongColumnStatsData longStats,
3: DoubleColumnStatsData doubleStats,
4: StringColumnStatsData stringStats,
5: BinaryColumnStatsData binaryStats,
6: DecimalColumnStatsData decimalStats,
7: DateColumnStatsData dateStats
}
struct ColumnStatisticsObj {
1: required string colName,
2: required string colType,
3: required ColumnStatisticsData statsData
}
...