#!/bin/bash
# 生成修正後的SQL文件
cat > hive2dlc_single_partition1.sql << 'EOF'
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
SET hive.exec.max.dynamic.partitions=10000;
SET hive.exec.max.dynamic.partitions.pernode=10000;
SET spark.sql.shuffle.partitions=50;
CREATE DATABASE IF NOT EXISTS journey;
CREATE TABLE IF NOT EXISTS journey.hive_single_partition (
shop_name STRING,
customer_id INT
)
PARTITIONED BY (dt STRING);
INSERT OVERWRITE TABLE journey.hive_single_partition PARTITION (dt)
SELECT
CONCAT('shop', CAST(partition_id AS STRING)) AS shop_name,
CAST(partition_id * 100 + (id % 2) AS INT) AS customer_id,
DATE_FORMAT(DATE_ADD('2000-01-01', CAST(partition_id AS INT)), 'yyyyMMdd') AS dt
FROM (
SELECT
id,
CAST(FLOOR(id / 2) AS INT) AS partition_id
FROM (
SELECT explode(sequence(0, 19999)) AS id
)
) generated_data;
EOF
# 執行Spark SQL
nohup spark-sql --master yarn -f hive_single_partition.sql &
ANALYZE TABLE hive_single_partition PARTITION(dt) COMPUTE STATISTICS;