Hive语法

hive支持hql语法与sql语法区别不大

hive中的表

按存储位置分为：

内部表：在Hive默认路径(/user/hive/warehouse/)下的表
外部表：存储位置不在默认路径下的表

注意： 在删除内部表的时候，HDFS上对应的目录会随之删除，外部表则不会

按存储细粒度分为：

普通表
分区表：将分区字段值相同的数据放在同一目录下的表
桶表：类似分区表，按列值哈希分桶的表

ddl

创建表的语法

CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name    
  [(col_name data_type [COMMENT col_comment], ...)]
  [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
  [CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
  [ROW FORMAT row_format] 
  [STORED AS file_format] 
  [LOCATION hdfs_path]
  [TBLPROPERTIES (property_name=property_value, ...)]

说明：

PARTITIONED 指分区，不同的分区会以文件夹的形式存在
CLUSTERED 指按列聚合，
- 例如在插入数据中有两项“张三，数学”和“张三，英语”，若是CLUSTERED BY name，则只会有一项，“张三，(数学，英语)”，这个机制也是为了加快查询的操作
STORED BY 指定排序形式
BUCKETS 指定分桶信息
ROW FORMAT 指定分隔符
STORED AS 指定文件的存储格式
LOCATION 指在HDFS上存储的位置

创建表的默认格式

CREATE TABLE ...
ROW FORMAT DELIMITED
  FIELDS TERMINATED BY '\001'
  COLLECTION ITEMS TERMINATED BY '\002'
  MAP KEYS TERMINATED BY '\003'
  LINE TERMINATED BY '\n'
STORED AS TEXTFILE

分隔对象	分隔符	描述
行内字段	CTRL+A	FIELDS
容器类型的条目	CTRL+B	COLLECTION ITEMS
Map的Key和Value	CTRL+C	MAP
行与行	换行符	LINE

示例

#创建表
create table stus(id int,name string,score double);
create table t_phone_back as select * from t_phone sort by id;
#注意：不管原表是不是分区表，通过这样的方式创建的表是普通表		
#创建临时表 
create temporary table 表名 as select;
#创建表使用自定义的字段分隔符
create table cars(id int,name string,price double) row format delimited fields terminated by '\t';
#创建外部表
create external table phones(id int,name string,price double) row format delimited fields terminated by '\t' location '/phones';
#创建分区表
create table cars(id int,name string,price double) partitioned by (month string) row format delimited fields terminated by '\t';
#创建桶表
create table bucketed_user (id int,name string) clustered by (id) into 4 buckets;

#清空表
truncate table 表名;
#查询表结构
desc phones;
#删除表
drop table cars;

dml

#插入数据：
insert into stus values(2,'lisi',80),(2,'lisi',80),(2,'lisi',80);
#插入分区表
insert into/overwrite cars partition(month='4') values(1,"QQ",20);

#从本地加载文件到Hive的数据库表中
load data local inpath '/home/kevin/car4' into table cars;
#HDFS集群中加载文件到Hive的数据库表中（会删除原始文件，剪贴）：
load data inpath "/car4" into table cars;
#导入数据到分区表
load data local inpath "/home/kevin/car4" into table cars partition(month='6');

#动态插入数据
INSERT overwrite TABLE cdp_ods.sku_archive PARTITION (brand_id) select
#开启动态分区
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
#设置hive动态分区数,默认最大100个
set hive.exec.max.dynamic.partitions.pernode=10000;

#修复分区:
msck repair table 表名;
#删除分区:
alter table table_name drop if exists partition(par_col=col_name)

特殊结构

#数组
create table table(id int,course struct<cname:string,bname:string>) row format delimited fields terminated by '\t' collection items terminated by ',';
insert into table values(1,array('hello','world'))；

#结构体
create table table(id int,info array<string>) row format delimited fields terminated by '\t' collection items terminated by ',';
insert into table values(1,named_struct('cname','java','bname','corejava'));

#map集合
create table table(id int,info map<string,string>) row format delimited fields terminated by '\t' collection items terminated by ',' map keys terminated by ':';
insert into table values(1,str_to_map('a:1','b:2'));