Created
December 17, 2024 01:10
-
-
Save momijiame/eb4b9f6dfd909b2ab8c9972d74aab28d to your computer and use it in GitHub Desktop.
Vagrantfile for Hadoop (3.3) Cluster with Hive (4.0) and Spark (3.5)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
set -euxo pipefail | |
vagrant up node1 | |
vagrant up node2 | |
vagrant up master |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
set -euxo pipefail | |
HOSTNAME=$1 | |
: "Set hostname" && { | |
sudo hostname $HOSTNAME | |
echo $HOSTNAME | sudo tee /etc/hostname > /dev/null | |
} | |
: "Edit hosts file" && { | |
if ! bash -c "grep 192.168.56.10 /etc/hosts" ; then | |
cat << 'EOF' | sudo tee -a /etc/hosts > /dev/null | |
192.168.56.10 master | |
192.168.56.11 node1 | |
192.168.56.12 node2 | |
EOF | |
fi | |
} | |
: "Install common packages" && { | |
sudo yum -y install epel-release | |
sudo yum -y install java-1.8.0-openjdk-devel openssh-clients rsync wget sshpass | |
} | |
: "Download Hadoop" && { | |
if ! bash -c "ls | grep hadoop-*.tar.gz"; then | |
wget http://ftp.riken.jp/net/apache/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz -nv | |
fi | |
tar xkf hadoop-3.3.6.tar.gz | |
} | |
: "Set environment variables to shell RC file" && { | |
if ! bash -c "grep JAVA_HOME ~/.bashrc"; then | |
cat << 'EOF' >> ~/.bashrc | |
export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk | |
export HADOOP_HOME=~/hadoop-3.3.6 | |
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop | |
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$PATH | |
EOF | |
fi | |
set +u | |
source ~/.bashrc | |
set -u | |
} | |
: "Hadoop execution check" && { | |
hadoop version | |
} | |
: "Install SSH public key to all nodes" && { | |
ssh-keygen -t ed25519 -P '' -f ~/.ssh/id_ed25519 | |
for node in master node1 node2; do | |
sshpass -p "vagrant" ssh-copy-id -i ~/.ssh/id_ed25519.pub -o "StrictHostKeyChecking no" $node | |
done; | |
} | |
: "Copy Hadoop directory to nodes" && { | |
for node in node1 node2; do | |
scp -r $HADOOP_HOME $node:~/ | |
done; | |
} | |
: "Setting configuration files" && { | |
: "etc/hadoop/workers" && { | |
cat << 'EOF' > $HADOOP_HOME/etc/hadoop/workers | |
node1 | |
node2 | |
EOF | |
} | |
: "etc/hadoop/core-site.xml" && { | |
if ! bash -c "grep fs.defaultFS $HADOOP_HOME/etc/hadoop/core-site.xml"; then | |
cat << 'EOF' > /tmp/core-site.xml.property | |
<property> | |
<name>fs.defaultFS</name> | |
<value>hdfs://master:9000</value> | |
</property> | |
EOF | |
sed -i -e ' | |
/^<configuration>$/r /tmp/core-site.xml.property | |
/^$/d | |
' $HADOOP_HOME/etc/hadoop/core-site.xml | |
fi | |
} | |
: "etc/hadoop/hdfs-site.xml" && { | |
if ! bash -c "grep dfs.replication $HADOOP_HOME/etc/hadoop/hdfs-site.xml" ; then | |
cat << 'EOF' > /tmp/hdfs-site.xml.property | |
<property> | |
<name>dfs.replication</name> | |
<value>2</value> | |
</property> | |
<property> | |
<name>dfs.namenode.secondary.http-address</name> | |
<value>master:50090</value> | |
</property> | |
EOF | |
sed -i -e ' | |
/^<configuration>$/r /tmp/hdfs-site.xml.property | |
/^$/d | |
' $HADOOP_HOME/etc/hadoop/hdfs-site.xml | |
fi | |
} | |
: "etc/hadoop/mapred-site.xml" && { | |
if ! bash -c "grep mapreduce.framework.nam $HADOOP_HOME/etc/hadoop/mapred-site.xml"; then | |
cat << 'EOF' > /tmp/mapred-site.xml.property | |
<property> | |
<name>mapreduce.framework.name</name> | |
<value>yarn</value> | |
</property> | |
<property> | |
<name>mapreduce.application.classpath</name> | |
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value> | |
</property> | |
EOF | |
sed -i -e ' | |
/^<configuration>$/r /tmp/mapred-site.xml.property | |
/^$/d | |
' $HADOOP_HOME/etc/hadoop/mapred-site.xml | |
fi | |
} | |
: "etc/hadoop/yarn-site.xml" && { | |
if ! bash -c "grep yarn.nodemanager.aux-service $HADOOP_HOME/etc/hadoop/yarn-site.xml"; then | |
cat << 'EOF' > /tmp/yarn-site.xml.property | |
<property> | |
<name>yarn.nodemanager.aux-services</name> | |
<value>mapreduce_shuffle</value> | |
</property> | |
<property> | |
<name>yarn.resourcemanager.hostname</name> | |
<value>master</value> | |
</property> | |
<property> | |
<name>yarn.nodemanager.env-whitelist</name> | |
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ,HADOOP_MAPRED_HOME</value> | |
</property> | |
<property> | |
<name>yarn.nodemanager.pmem-check-enabled</name> | |
<value>false</value> | |
</property> | |
<property> | |
<name>yarn.nodemanager.vmem-check-enabled</name> | |
<value>false</value> | |
</property> | |
EOF | |
sed -i -e ' | |
/^<configuration>$/r /tmp/yarn-site.xml.property | |
/^$/d | |
' $HADOOP_HOME/etc/hadoop/yarn-site.xml | |
fi | |
} | |
: "Copy to workers" && { | |
for node in node1 node2; do | |
scp -r $HADOOP_HOME/etc/hadoop/* $node:$HADOOP_HOME/etc/hadoop/ | |
done; | |
} | |
} | |
: "Format HDFS" && { | |
$HADOOP_HOME/bin/hdfs namenode -format -force | |
} | |
: "Start daemons" && { | |
: "HDFS" && { | |
if ! bash -c "jps | grep NameNode"; then | |
$HADOOP_HOME/sbin/start-dfs.sh | |
fi | |
} | |
: "YARN" && { | |
if ! bash -c "jps | grep ResourceManager"; then | |
$HADOOP_HOME/sbin/start-yarn.sh | |
fi | |
} | |
: "MapReduce JobHistory server" && { | |
if ! bash -c "jps | grep JobHistoryServer" ; then | |
$HADOOP_HOME/bin/mapred --daemon start historyserver | |
fi | |
} | |
: "Check YARN" && { | |
$HADOOP_HOME/bin/yarn app -list | |
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.6.jar pi 10 1000 | |
} | |
} | |
: "Setup Hive" && { | |
: "Download Hive" && { | |
if ! bash -c "ls | grep apache-hive-*.tar.gz" ; then | |
wget http://ftp.riken.jp/net/apache/hive/hive-4.0.1/apache-hive-4.0.1-bin.tar.gz -nv | |
fi | |
tar xkf apache-hive-4.0.1-bin.tar.gz | |
} | |
: "Set environment variables to shell RC file" && { | |
if ! bash -c "grep HIVE_HOME ~/.bashrc" ; then | |
cat << 'EOF' >> ~/.bashrc | |
export HIVE_HOME=~/apache-hive-4.0.1-bin | |
export PATH=$HIVE_HOME/bin:$PATH | |
EOF | |
fi | |
set +u | |
source ~/.bashrc | |
set -u | |
} | |
: "Setup MetaStore backend RDB" && { | |
: "Install MariaDB" && { | |
sudo yum -y install mariadb-server mariadb-java-client | |
} | |
: "Add [mysqld] sectin if not exists" && { | |
MARIADB_CONFIG_FILE="/etc/my.cnf.d/mariadb-server.cnf" | |
if ! grep -q '^\[mysqld\]' "$MARIADB_CONFIG_FILE"; then | |
echo -e "\n[mysqld]" >> "$MARIADB_CONFIG_FILE" | |
fi | |
} | |
: "Add or replace 'sql_mode'" && { | |
MARIADB_SQL_MODE="STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION,ANSI_QUOTES,NO_BACKSLASH_ESCAPES" | |
if grep -q '^\s*sql_mode' "$MARIADB_CONFIG_FILE"; then | |
sudo sed -i "/^\s*sql_mode/c\sql_mode=${MARIADB_SQL_MODE}" "$MARIADB_CONFIG_FILE" | |
else | |
sudo sed -i "/^\[mysqld\]/a\sql_mode=${MARIADB_SQL_MODE}" "$MARIADB_CONFIG_FILE" | |
fi | |
} | |
: "Enable and start MariaDB" && { | |
sudo systemctl enable mariadb | |
sudo systemctl start mariadb | |
} | |
: "Create MariaDB user for Hive" && { | |
sudo mysql -e " | |
CREATE USER 'hive'@'%' IDENTIFIED BY 'hive'; | |
GRANT ALL PRIVILEGES ON *.* TO 'hive'@'%'; FLUSH PRIVILEGES; | |
" | |
} | |
: "Create JDBC driver symlink" && { | |
ln -s /usr/lib/java/mariadb-java-client.jar $HIVE_HOME/lib | |
} | |
} | |
: "Setting configuration files" && { | |
: "conf/hive-site.xml" && { | |
if ! bash -c "grep hive.server2.authentication $HIVE_HOME/conf/hive-site.xml"; then | |
cat << 'EOF' > $HIVE_HOME/conf/hive-site.xml | |
<?xml version="1.0" encoding="UTF-8" standalone="no"?> | |
<configuration> | |
<property> | |
<name>javax.jdo.option.ConnectionURL</name> | |
<value>jdbc:mariadb://localhost/metastore?createDatabaseIfNotExist=true</value> | |
</property> | |
<property> | |
<name>javax.jdo.option.ConnectionDriverName</name> | |
<value>org.mariadb.jdbc.Driver</value> | |
</property> | |
<property> | |
<name>hive.metastore.warehouse.dir</name> | |
<value>/user/hive/warehouse</value> | |
</property> | |
<property> | |
<name>javax.jdo.option.ConnectionUserName</name> | |
<value>hive</value> | |
</property> | |
<property> | |
<name>javax.jdo.option.ConnectionPassword</name> | |
<value>hive</value> | |
</property> | |
<property> | |
<name>hive.server2.authentication</name> | |
<value>NONE</value> | |
</property> | |
<property> | |
<name>hive.server2.enable.doAs</name> | |
<value>false</value> | |
</property> | |
</configuration> | |
EOF | |
fi | |
} | |
} | |
: "Setup HDFS working directory" && { | |
$HADOOP_HOME/bin/hadoop fs -mkdir -p /user/hive/warehouse | |
$HADOOP_HOME/bin/hadoop fs -chmod g+w /user/hive/warehouse | |
$HADOOP_HOME/bin/hadoop fs -mkdir -p /tmp | |
$HADOOP_HOME/bin/hadoop fs -chmod g+w /tmp | |
$HIVE_HOME/bin/schematool -dbType mysql -initSchema --verbose | |
$HIVE_HOME/bin/schematool -dbType mysql -info --verbose | |
} | |
: "Start server process" && { | |
nohup $HIVE_HOME/bin/hive --service metastore & | |
sleep 5 | |
nohup $HIVE_HOME/bin/hive --service hiveserver2 & | |
sleep 5 | |
} | |
: "Check Hive" && { | |
$HIVE_HOME/bin/beeline -u jdbc:hive2://localhost:10000 -e "select 1" | |
} | |
} | |
: "Setup Spark" && { | |
: "Download Spark" && { | |
if ! bash -c "ls | grep spark-*.tar.gz" ; then | |
wget -nv https://ftp.riken.jp/net/apache/spark/spark-3.5.3/spark-3.5.3-bin-hadoop3.tgz | |
fi | |
tar xkf spark-3.5.3-bin-hadoop3.tgz | |
} | |
: "Set environment variables to shell RC file" && { | |
if ! bash -c "grep SPARK_HOME ~/.bashrc" ; then | |
cat << 'EOF' >> ~/.bashrc | |
export SPARK_HOME=~/spark-3.5.3-bin-hadoop3 | |
export PATH=$SPARK_HOME/bin:$PATH | |
EOF | |
fi | |
set +u | |
source ~/.bashrc | |
set -u | |
} | |
: "Create JDBC driver symlink" && { | |
ln -s /usr/lib/java/mariadb-java-client.jar $SPARK_HOME/jars/ | |
} | |
: "Check Spark" && { | |
$SPARK_HOME/bin/spark-submit \ | |
--class org.apache.spark.examples.SparkPi \ | |
--master yarn \ | |
$SPARK_HOME/examples/jars/spark-examples_*.jar \ | |
10 | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
set -euxo pipefail | |
HOSTNAME=$1 | |
: "Set hostname" && { | |
sudo hostname $HOSTNAME | |
echo $HOSTNAME | sudo tee /etc/hostname > /dev/null | |
} | |
: "Edit hosts file" && { | |
if ! bash -c "grep 192.168.56.10 /etc/hosts" ; then | |
cat << 'EOF' | sudo tee -a /etc/hosts > /dev/null | |
192.168.56.10 master | |
192.168.56.11 node1 | |
192.168.56.12 node2 | |
EOF | |
fi | |
} | |
: "Install common packages" && { | |
sudo yum -y install java-1.8.0-openjdk-devel openssh-clients rsync wget | |
} | |
: "Set environment variables to shell RC file" && { | |
if ! bash -c "grep JAVA_HOME /etc/hosts" ; then | |
cat << 'EOF' >> ~/.bashrc | |
export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk | |
export HADOOP_HOME=~/hadoop-3.3.6 | |
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop | |
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$PATH | |
EOF | |
fi | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- mode: ruby -*- | |
# vi: set ft=ruby : | |
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing! | |
VAGRANTFILE_API_VERSION = "2" | |
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| | |
config.vm.define :master, primary: true do |master| | |
master.vm.box = "bento/rockylinux-9" | |
master.vm.network "private_network", ip: "192.168.56.10" | |
master.vm.provider "virtualbox" do |vb| | |
vb.memory = "8192" | |
end | |
master.vm.provision "shell", privileged: false do |s| | |
s.path = "master.sh" | |
s.args = "master" | |
end | |
end | |
(1..2).each {|i| | |
node_name = "node" + i.to_s | |
config.vm.define node_name do |node| | |
node.vm.box = "bento/rockylinux-9" | |
node.vm.network "private_network", ip: "192.168.56.1" + i.to_s | |
node.vm.provider "virtualbox" do |vb| | |
vb.memory = "4096" | |
end | |
node.vm.provision "shell", privileged: false do |s| | |
s.path = "nodes.sh" | |
s.args = node_name | |
end | |
end | |
} | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment