Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[docker](hive) add hive3 docker compose and modify scripts #33115

Merged
merged 26 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
637ebe9
add hive3 docker compose
suxiaogang223 Apr 1, 2024
91ee085
use brigde mode not host
suxiaogang223 Apr 2, 2024
eb733bb
create_preinstalled_table for hive3
suxiaogang223 Apr 7, 2024
c03c36f
change hive3 to bridge
suxiaogang223 Apr 7, 2024
4469fc0
add env path in custom_settings.env for hive
suxiaogang223 Apr 7, 2024
7117420
change hive-2x.yaml.tpl to support custom_sttings.env
suxiaogang223 Apr 7, 2024
77048dd
fix
suxiaogang223 Apr 7, 2024
384762e
add HS_PORT in custom_settings
suxiaogang223 Apr 7, 2024
8de0e29
fix
suxiaogang223 Apr 7, 2024
e73d4f0
fix
suxiaogang223 Apr 7, 2024
9d65464
remove PG_PORT from custom_settings.env
suxiaogang223 Apr 7, 2024
abdb74b
set autogather
suxiaogang223 Apr 8, 2024
c3ae65d
not to start hive2 default
suxiaogang223 Apr 8, 2024
de1f299
can not both run hive2 and hive3
suxiaogang223 Apr 8, 2024
796f8b9
support to run both hive and hive3
suxiaogang223 Apr 9, 2024
2aceabd
fix hive3 read schema error
suxiaogang223 Apr 10, 2024
4c4f647
change hive docker to bridge network
suxiaogang223 Apr 10, 2024
13510bf
fix
suxiaogang223 Apr 10, 2024
28e4530
fix UnknownHostException: namenode
suxiaogang223 Apr 10, 2024
edff305
fix UnknownHostException:namenode
suxiaogang223 Apr 11, 2024
be01e1a
test hive3
suxiaogang223 Apr 11, 2024
4bde2ba
change unreasonable tests
suxiaogang223 Apr 11, 2024
2272bd8
fix test for hive3 about hive.stats.column.autogather
suxiaogang223 Apr 12, 2024
a3aacf8
fix test_hive_ddl for hive3
suxiaogang223 Apr 14, 2024
520e033
only run hive3 default
suxiaogang223 Apr 15, 2024
0ca926f
Revert "only run hive3 default"
suxiaogang223 Apr 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore
HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083
HIVE_SITE_CONF_hive_server2_thrift_bind_host=0.0.0.0
HIVE_SITE_CONF_hive_server2_thrift_port=10000
HIVE_SITE_CONF_metastore_storage_schema_reader_impl=org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader

CORE_CONF_fs_defaultFS=hdfs://${IP_HOST}:${FS_PORT}
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*

HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false

YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,24 @@
# limitations under the License.
#

HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://externalEnvIp:5432/metastore
HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore
HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
HIVE_SITE_CONF_hive_metastore_uris=thrift://externalEnvIp:9083
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083
HIVE_SITE_CONF_hive_server2_thrift_bind_host=0.0.0.0
HIVE_SITE_CONF_hive_server2_thrift_port=10000
HIVE_SITE_CONF_hive_compactor_initiator_on=true
HIVE_SITE_CONF_hive_compactor_worker_threads=2
HIVE_SITE_CONF_metastore_storage_schema_reader_impl=org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader

CORE_CONF_fs_defaultFS=hdfs://namenode:8020
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*
CORE_CONF_hadoop_proxyuser_hive_hosts=*

HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false

YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
Expand Down
70 changes: 33 additions & 37 deletions docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -19,87 +19,83 @@
version: "3.8"

services:
doris--namenode:
namenode:
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
environment:
- CLUSTER_NAME=test
env_file:
- ./hadoop-hive.env
container_name: doris--namenode
expose:
- "50070"
- "8020"
- "9000"
container_name: ${CONTAINER_UID}hadoop2-namenode
ports:
- "${FS_PORT}:8020"
healthcheck:
test: [ "CMD", "curl", "http://localhost:50070/" ]
interval: 5s
timeout: 120s
retries: 120
network_mode: "host"

doris--datanode:
datanode:
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
env_file:
- ./hadoop-hive.env
environment:
SERVICE_PRECONDITION: "externalEnvIp:50070"
container_name: doris--datanode
expose:
- "50075"
SERVICE_PRECONDITION: "namenode:50070"
container_name: ${CONTAINER_UID}hadoop2-datanode
healthcheck:
test: [ "CMD", "curl", "http://localhost:50075" ]
interval: 5s
timeout: 60s
retries: 120
network_mode: "host"

doris--hive-server:
hive-server:
image: bde2020/hive:2.3.2-postgresql-metastore
env_file:
- ./hadoop-hive.env
- ./hadoop-hive-metastore.env
environment:
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://externalEnvIp:5432/metastore"
SERVICE_PRECONDITION: "externalEnvIp:9083"
container_name: doris--hive-server
expose:
- "10000"
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore"
SERVICE_PRECONDITION: "hive-metastore:9083"
container_name: ${CONTAINER_UID}hive2-server
ports:
- "${HS_PORT}:10000"
depends_on:
- doris--datanode
- doris--namenode
- datanode
- namenode
healthcheck:
test: beeline -u "jdbc:hive2://127.0.0.1:10000/default" -n health_check -e "show databases;"
interval: 10s
timeout: 120s
retries: 120
network_mode: "host"


doris--hive-metastore:
hive-metastore:
image: bde2020/hive:2.3.2-postgresql-metastore
env_file:
- ./hadoop-hive.env
- ./hadoop-hive-metastore.env
command: /bin/bash /mnt/scripts/hive-metastore.sh
# command: /opt/hive/bin/hive --service metastore
environment:
SERVICE_PRECONDITION: "externalEnvIp:50070 externalEnvIp:50075 externalEnvIp:5432"
container_name: doris--hive-metastore
expose:
- "9083"
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 hive-metastore-postgresql:5432"
container_name: ${CONTAINER_UID}hive2-metastore
ports:
- "${HMS_PORT}:9083"
volumes:
- ./scripts:/mnt/scripts
depends_on:
- doris--hive-metastore-postgresql
network_mode: "host"
- hive-metastore-postgresql

doris--hive-metastore-postgresql:
hive-metastore-postgresql:
image: bde2020/hive-metastore-postgresql:2.3.0
restart: always
container_name: doris--hive-metastore-postgresql
expose:
- "5432"
container_name: ${CONTAINER_UID}hive2-metastore-postgresql
ports:
- "${PG_PORT}:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 5s
timeout: 60s
retries: 120
network_mode: "host"

# solve HiveServer2 connect error:
# java.net.URISyntaxException Illegal character in hostname :thrift://${CONTAINER_UID}hive2_default:9083
networks:
default:
name: ${CONTAINER_UID}hive2-default
29 changes: 7 additions & 22 deletions ...arties/docker-compose/hive/gen_env.sh.tpl → .../docker-compose/hive/hive-2x_settings.env
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,11 @@
# specific language governing permissions and limitations
# under the License.

####################################################################
# This script will generate hadoop-hive.env from hadoop-hive.env.tpl
####################################################################
# Change this to a specific string.
# Do not use "_" or other sepcial characters, only number and alphabeta.
# NOTICE: change this uid will modify hive-*.yaml

set -eo pipefail

ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
FS_PORT=8020
HMS_PORT=9083

cp "${ROOT}"/hadoop-hive.env.tpl "${ROOT}"/hadoop-hive.env
# Need to set hostname of container to same as host machine's.
# Otherwise, the doris process can not connect to namenode directly.
HOST_NAME="doris--"

{
echo "FS_PORT=${FS_PORT}"
echo "HMS_PORT=${HMS_PORT}"
echo "CORE_CONF_fs_defaultFS=hdfs://${externalEnvIp}:${FS_PORT}"
echo "HOST_NAME=${HOST_NAME}"
echo "externalEnvIp=${externalEnvIp}"

} >>"${ROOT}"/hadoop-hive.env
export FS_PORT=8220 #should be same in regression-conf.groovy
export HMS_PORT=9283 #should be same in regression-conf.groovy
export HS_PORT=12000 #should be same in regression-conf.groovy
export PG_PORT=5632 #should be same in regression-conf.groovy
102 changes: 102 additions & 0 deletions docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


version: "3.8"

services:
namenode:
image: bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8
environment:
- CLUSTER_NAME=test
env_file:
- ./hadoop-hive.env
container_name: ${CONTAINER_UID}hadoop3-namenode
ports:
- "${FS_PORT}:8020"
healthcheck:
test: [ "CMD", "curl", "http://localhost:9870/" ]
interval: 5s
timeout: 120s
retries: 120

datanode:
image: bde2020/hadoop-datanode:2.0.0-hadoop3.2.1-java8
env_file:
- ./hadoop-hive.env
environment:
SERVICE_PRECONDITION: "namenode:9870"
container_name: ${CONTAINER_UID}hadoop3-datanode
healthcheck:
test: [ "CMD", "curl", "http://localhost:9864" ]
interval: 5s
timeout: 60s
retries: 120

hive-server:
image: lishizhen/hive:3.1.2-postgresql-metastore
env_file:
- ./hadoop-hive-metastore.env
environment:
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore"
SERVICE_PRECONDITION: "hive-metastore:9083"
container_name: ${CONTAINER_UID}hive3-server
ports:
- "${HS_PORT}:10000"
depends_on:
- datanode
- namenode
healthcheck:
test: beeline -u "jdbc:hive2://127.0.0.1:10000/default" -n health_check -e "show databases;"
interval: 10s
timeout: 120s
retries: 120


hive-metastore:
image: lishizhen/hive:3.1.2-postgresql-metastore
env_file:
- ./hadoop-hive-metastore.env
command: /bin/bash /mnt/scripts/hive-metastore.sh
# command: /opt/hive/bin/hive --service metastore
environment:
SERVICE_PRECONDITION: "namenode:9870 datanode:9864 hive-metastore-postgresql:5432"
container_name: ${CONTAINER_UID}hive3-metastore
ports:
- "${HMS_PORT}:9083"
volumes:
- ./scripts:/mnt/scripts
depends_on:
- hive-metastore-postgresql

hive-metastore-postgresql:
image: bde2020/hive-metastore-postgresql:3.1.0
container_name: ${CONTAINER_UID}hive3-metastore-postgresql
ports:
- "${PG_PORT}:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 5s
timeout: 60s
retries: 120

# solve HiveServer2 connect error:
# java.net.URISyntaxException Illegal character in hostname :thrift://${CONTAINER_UID}hive3_default:9083

networks:
default:
name: ${CONTAINER_UID}hive3-default
26 changes: 26 additions & 0 deletions docker/thirdparties/docker-compose/hive/hive-3x_settings.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Change this to a specific string.
# Do not use "_" or other sepcial characters, only number and alphabeta.
# NOTICE: change this uid will modify hive-*.yaml

export FS_PORT=8020 #should be same in regression-conf.groovy
export HMS_PORT=9083 #should be same in regression-conf.groovy
export HS_PORT=10000 #should be same in regression-conf.groovy
export PG_PORT=5432 #should be same in regression-conf.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,8 @@ CREATE TABLE `unsupported_type_table`(
k6 int
);

set hive.stats.column.autogather=false;

CREATE TABLE `schema_evo_test_text`(
id int,
name string
Expand Down Expand Up @@ -628,6 +630,8 @@ insert into `schema_evo_test_orc` select 1, "kaka";
alter table `schema_evo_test_orc` ADD COLUMNS (`ts` timestamp);
insert into `schema_evo_test_orc` select 2, "messi", from_unixtime(to_unix_timestamp('20230101 13:01:03','yyyyMMdd HH:mm:ss'));

set hive.stats.column.autogather=true;

-- Currently docker is hive 2.x version. Hive 2.x versioned full-acid tables need to run major compaction.
SET hive.support.concurrency=true;
SET hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
Expand Down
Loading
Loading