diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
index 96018b1fa..a3597bce7 100644
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -50,7 +50,6 @@ jobs:
run: |
${{github.workspace}}/dev/ci/ci-local-test.sh
yarn-test:
- if: ${{ false }} # disable for now
name: Yarn Test for Examples (CPU)
runs-on: ubuntu-20.04
steps:
diff --git a/dev/ci/ci-yarn-test.sh b/dev/ci/ci-yarn-test.sh
index e9a93aee3..8b1a2f5c9 100755
--- a/dev/ci/ci-yarn-test.sh
+++ b/dev/ci/ci-yarn-test.sh
@@ -34,7 +34,7 @@ echo "========================================="
echo "Cluster Testing with Spark Version: $SPARK_VERSION"
echo "========================================="
-# Build and run all examples
+# Build and run all examplesdebug#./build-all-scala.sh
./build-all-scala.sh
./run-all-scala.sh
-./run-all-pyspark.sh
+#./run-all-pyspark.sh
diff --git a/dev/install-build-deps-ubuntu.sh b/dev/install-build-deps-ubuntu.sh
index 8ae9a4e2c..e4d417418 100755
--- a/dev/install-build-deps-ubuntu.sh
+++ b/dev/install-build-deps-ubuntu.sh
@@ -8,6 +8,7 @@ if [ ! -d /opt/intel/oneapi ]; then
| gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
sudo apt update
+ sudo -E apt-cache pkgnames intel | grep intel-oneapi-runtime
sudo apt-get install -y intel-oneapi-ccl-devel-2021.8.0 \
intel-oneapi-tbb-common-devel-2021.8.0 intel-oneapi-tbb-devel-2021.8.0 \
intel-oneapi-mpi-devel-2021.8.0 \
diff --git a/dev/test-cluster/yarn/env.sh b/dev/test-cluster/yarn/env.sh
index 3430d4c3e..94d468bc7 100755
--- a/dev/test-cluster/yarn/env.sh
+++ b/dev/test-cluster/yarn/env.sh
@@ -38,10 +38,10 @@ else
fi
# Set Spark resources, can be overwritten in example
-SPARK_DRIVER_MEMORY=1G
+SPARK_DRIVER_MEMORY=512M
SPARK_NUM_EXECUTORS=2
SPARK_EXECUTOR_CORES=1
-SPARK_EXECUTOR_MEMORY=1G
+SPARK_EXECUTOR_MEMORY=512M
SPARK_TOTAL_CORES=$((SPARK_NUM_EXECUTORS * SPARK_EXECUTOR_CORES))
SPARK_DEFAULT_PARALLELISM=$((SPARK_TOTAL_CORES * 2))
diff --git a/dev/test-cluster/yarn/hadoop-env.sh b/dev/test-cluster/yarn/hadoop-env.sh
index f60b65a0b..f6e93a3ce 100755
--- a/dev/test-cluster/yarn/hadoop-env.sh
+++ b/dev/test-cluster/yarn/hadoop-env.sh
@@ -96,3 +96,5 @@ export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER
+source /opt/intel/oneapi/setvars.sh --ccl-configuration=cpu
+export FI_TCP_IFACE=eth0
diff --git a/dev/test-cluster/yarn/load-spark-envs.sh b/dev/test-cluster/yarn/load-spark-envs.sh
index 7ee0cb452..19c7ad6b7 100755
--- a/dev/test-cluster/yarn/load-spark-envs.sh
+++ b/dev/test-cluster/yarn/load-spark-envs.sh
@@ -15,5 +15,6 @@ export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH
export PYSPARK_PYTHON=python3
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PATH
+export FI_TCP_IFACE=eth0
set +x
diff --git a/dev/test-cluster/yarn/setup-cluster.sh b/dev/test-cluster/yarn/setup-cluster.sh
index d57edf90c..18e9a6e15 100755
--- a/dev/test-cluster/yarn/setup-cluster.sh
+++ b/dev/test-cluster/yarn/setup-cluster.sh
@@ -40,6 +40,7 @@ cp ./yarn-site.xml ~/opt/hadoop-$HADOOP_VERSION/etc/hadoop/
cp ./hadoop-env.sh ~/opt/hadoop-$HADOOP_VERSION/etc/hadoop/
cp ../log4j.properties ~/opt/spark-$SPARK_VERSION-bin-$SPARK_HADOOP_VERSION/conf
cp ./spark-defaults.conf ~/opt/spark-$SPARK_VERSION-bin-$SPARK_HADOOP_VERSION/conf
+cp ~/opt/spark-$SPARK_VERSION-bin-$SPARK_HADOOP_VERSION/yarn/spark-*-yarn-shuffle.jar ~/opt/hadoop-$HADOOP_VERSION/share/hadoop/yarn/lib/
echo $HOST_IP > $HADOOP_HOME/etc/hadoop/slaves
echo $HOST_IP > $SPARK_HOME/conf/slaves
@@ -53,10 +54,20 @@ mkdir -p /tmp/run/hdfs/datanode
# hdfs format
$HADOOP_HOME/bin/hdfs namenode -format
+wget -P $HADOOP_HOME/share/hadoop/yarn/lib/ https://repo1.maven.org/maven2/javax/activation/activation/1.1.1/activation-1.1.1.jar
+
# start hdfs and yarn
$HADOOP_HOME/sbin/start-dfs.sh
$HADOOP_HOME/sbin/start-yarn.sh
+jps
+free -g
+df -h
+yarn application -list
+ls -ls $HADOOP_HOME/logs/
+cat $HADOOP_HOME/logs/hadoop-*-resourcemanager-*.log
+cat $HADOOP_HOME/logs/hadoop-*-nodemanager-*.log
+
hadoop fs -ls /
yarn node -list
diff --git a/dev/test-cluster/yarn/spark-defaults.conf b/dev/test-cluster/yarn/spark-defaults.conf
index 04ed10b97..19cba6e71 100644
--- a/dev/test-cluster/yarn/spark-defaults.conf
+++ b/dev/test-cluster/yarn/spark-defaults.conf
@@ -25,10 +25,3 @@
# spark.serializer org.apache.spark.serializer.KryoSerializer
# spark.driver.memory 5g
# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
-
-spark.master yarn
-spark.serializer org.apache.spark.serializer.KryoSerializer
-spark.driver.memory 1g
-spark.executor.num 2
-spark.executor.cores 1
-spark.executor.memory 2g
diff --git a/dev/test-cluster/yarn/yarn-site.xml b/dev/test-cluster/yarn/yarn-site.xml
index ff74d23a7..9f6627bef 100644
--- a/dev/test-cluster/yarn/yarn-site.xml
+++ b/dev/test-cluster/yarn/yarn-site.xml
@@ -33,7 +33,7 @@
yarn.nodemanager.resource.memory-mb
- 7168
+ 3072
yarn.nodemanager.resource.cpu-vcores
@@ -43,17 +43,21 @@
yarn.nodemanager.vmem-check-enabled
false
+
+ yarn.nodemanager.pmem-check-enabled
+ false
+
yarn.nodemanager.vmem-pmem-ratio
- 2
+ 1
yarn.scheduler.minimum-allocation-mb
- 1024
+ 256
yarn.scheduler.maximum-allocation-mb
- 7168
+ 3072
yarn.scheduler.minimum-allocation-vcores
@@ -63,5 +67,8 @@
yarn.scheduler.maximum-allocation-vcores
2
-
+
+ yarn.nodemanager.env-whitelist
+ JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,HADOOP_YARN_HOME,LD_LIBRARY_PATH,FI_TCP_IFACE,CMPLR_ROOT,DAALROOT,CCL_ROOT
+
diff --git a/examples/build-all-scala.sh b/examples/build-all-scala.sh
index 8babb9d97..ad507e031 100755
--- a/examples/build-all-scala.sh
+++ b/examples/build-all-scala.sh
@@ -1,6 +1,6 @@
#!/usr/bin/env bash
-exampleDirs=(kmeans pca als naive-bayes linear-regression correlation summarizer)
+exampleDirs=(kmeans)
for dir in ${exampleDirs[*]}
do
diff --git a/examples/run-all-scala.sh b/examples/run-all-scala.sh
index 04bab7f8a..241a861ca 100755
--- a/examples/run-all-scala.sh
+++ b/examples/run-all-scala.sh
@@ -1,6 +1,6 @@
#!/usr/bin/env bash
-exampleDirs=(kmeans pca als naive-bayes linear-regression correlation summarizer)
+exampleDirs=(kmeans)
for dir in ${exampleDirs[*]}
do