Пономаренко Роман
@rerand0m
rerandom@ispras.ru
$ hive -e 'SHOW DATABASES'
$ echo 'SHOW DATABASES' > sh_db.sql $ hive -f sh_db.sql
$ hive hive> SHOW DATABASES; hive> !nproc; hive> dfs -ls;
$ ssh <USERNAME>@mipt-client.atp-fivt.org -L 8888:mipt-node03.atp-fivt.org:8888
Заходить через localhost:8888
Логин: hue_user
Пароль: hue_password
Создание БД
create database <YOUR_USER>_test location '/user/<YOUR_USER>/test_metastore';
Удаление БД
drop database if exists <YOUR_USER>_test cascade;
Вывод информации о БД
describe database <YOUR_USER>_test;
Создание таблицы
ADD JAR /opt/cloudera/parcels/CDH/lib/hive/lib/hive-contrib.jar;
USE ${env:USER}_test;
DROP TABLE IF EXISTS Subnets;
-- EXTERNAL, MANAGED, TEMPORARY
CREATE EXTERNAL TABLE Subnets (
ip STRING,
mask STRING
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE
LOCATION '/data/subnets/variant1';
Обращение
USE ${env:USER}_test;
SELECT * FROM Subnets LIMIT 10;
Партицирование
ADD JAR /opt/cloudera/parcels/CDH/lib/hive/lib/hive-contrib.jar;
SET hive.exec.dynamic.partition.mode=nonstrict;
USE ${env:USER}_test;
DROP TABLE IF EXISTS SubnetsPart;
CREATE EXTERNAL TABLE SubnetsPart (
ip STRING
)
PARTITIONED BY (mask STRING)
STORED AS TEXTFILE;
INSERT OVERWRITE TABLE SubnetsPart PARTITION (mask)
SELECT * FROM Subnets;
Парсинг входных данных
add jar /opt/cloudera/parcels/CDH/lib/hive/lib/hive-contrib.jar;
add jar /opt/cloudera/parcels/CDH/lib/hive/lib/hive-serde.jar;
USE ${env:USER}_test;
DROP TABLE IF EXISTS SerDeExample;
CREATE EXTERNAL TABLE SerDeExample (
ip STRING,
date STRING,
request STRING,
responseCode STRING
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
"input.regex" = '^(\\S*)\\t.*$'
)
STORED AS TEXTFILE
LOCATION '/data/user_logs/user_logs_S';
select * from SerDeExample limit 10;
regex101
Hive streaming
ADD JAR /opt/cloudera/parcels/CDH/lib/hive/lib/hive-contrib.jar;
USE ${env:USER}_test;
SELECT TRANSFORM(ip)
USING 'cut -d . -f 1' AS ip
FROM Subnets
LIMIT 10;
Hive streaming
ADD JAR /opt/cloudera/parcels/CDH/lib/hive/lib/hive-contrib.jar;
ADD FILE ./script.sh;
USE ${env:USER}_test;
SELECT TRANSFORM(ip)
USING './script.sh' AS ip2
FROM Subnets
LIMIT 10;