SELECT
*
FROM
flight_data TABLESAMPLE(BUCKET 1 OUT OF 4 ON unique_carrier)s LIMIT 100;
CREATE EXTERNAL TABLE flight_data_b(
year INT,
month INT,
day INT,
day_of_week INT,
dep_time INT,
crs_dep_time INT,
arr_time INT,
crs_arr_time INT,
unique_carrier STRING,
flight_num INT,
tail_num STRING,
actual_elapsed_time INT,
crs_elapsed_time INT,
air_time INT,
arr_delay INT,
dep_delay INT,
origin STRING,
dest STRING,
distance INT,
taxi_in INT,
taxi_out INT,
cancelled INT,
cancellation_code STRING,
diverted INT,
carrier_delay STRING,
weather_delay STRING,
nas_delay STRING,
security_delay STRING,
late_aircraft_delay STRING
)
CLUSTERED BY (unique_carrier) INTO 4 BUCKETS
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LOCATION '/user/hive/warehouse/flight_data_b';
SET hive.enforce.bucketing = true;
INSERT OVERWRITE TABLE flight_data_b
SELECT
*
FROM
flight_data;
SET hive.enforce.bucketing = false;
hadoop fs -ls /user/hive/warehouse/flight_data_b/
SELECT
*
FROM
flight_data_b TABLESAMPLE(BUCKET 1 OUT OF 4 ON unique_carrier)s LIMIT 100;