Last active
August 9, 2024 10:06
-
-
Save momijiame/d89890c1e9874f5e15f8cff9311544a5 to your computer and use it in GitHub Desktop.
Vagrantfile for Hadoop (3.3) Cluster with Hive (3.1)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
vagrant up node1 node2 master |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
set -euxo pipefail | |
HOSTNAME=$1 | |
: "Set hostname" && { | |
sudo hostname $HOSTNAME | |
echo $HOSTNAME | sudo tee /etc/hostname > /dev/null | |
} | |
: "Edit hosts file" && { | |
if ! bash -c "grep 192.168.56.10 /etc/hosts" ; then | |
cat << 'EOF' | sudo tee -a /etc/hosts > /dev/null | |
192.168.56.10 master | |
192.168.56.11 node1 | |
192.168.56.12 node2 | |
EOF | |
fi | |
} | |
: "Install common packages" && { | |
sudo yum -y install epel-release | |
sudo yum -y install java-1.8.0-openjdk-devel openssh-clients rsync wget sshpass | |
} | |
: "Download Hadoop" && { | |
if ! bash -c "ls | grep hadoop-*.tar.gz"; then | |
wget http://ftp.riken.jp/net/apache/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz -nv | |
tar xf hadoop-3.3.6.tar.gz | |
fi | |
} | |
: "Set environment variables to shell RC file" && { | |
if ! bash -c "grep JAVA_HOME ~/.bashrc"; then | |
cat << 'EOF' >> ~/.bashrc | |
export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk | |
export HADOOP_HOME=~/hadoop-3.3.6 | |
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop | |
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$PATH | |
EOF | |
fi | |
set +u | |
source ~/.bashrc | |
set -u | |
} | |
: "Hadoop execution check" && { | |
hadoop version | |
} | |
: "Install SSH public key to all nodes" && { | |
ssh-keygen -t ed25519 -P '' -f ~/.ssh/id_ed25519 | |
for node in master node1 node2; do | |
sshpass -p "vagrant" ssh-copy-id -i ~/.ssh/id_ed25519.pub -o "StrictHostKeyChecking no" $node | |
done; | |
} | |
: "Copy Hadoop directory to nodes" && { | |
for node in node1 node2; do | |
scp -r $HADOOP_HOME $node:~/ | |
done; | |
} | |
: "Setting configuration files" && { | |
: "etc/hadoop/workers" && { | |
cat << 'EOF' > $HADOOP_HOME/etc/hadoop/workers | |
node1 | |
node2 | |
EOF | |
} | |
: "etc/hadoop/core-site.xml" && { | |
if ! bash -c "grep fs.defaultFS $HADOOP_HOME/etc/hadoop/core-site.xml"; then | |
cat << 'EOF' > /tmp/core-site.xml.property | |
<property> | |
<name>fs.defaultFS</name> | |
<value>hdfs://master:9000</value> | |
</property> | |
EOF | |
sed -i -e ' | |
/^<configuration>$/r /tmp/core-site.xml.property | |
/^$/d | |
' $HADOOP_HOME/etc/hadoop/core-site.xml | |
fi | |
} | |
: "etc/hadoop/hdfs-site.xml" && { | |
if ! bash -c "grep dfs.replication $HADOOP_HOME/etc/hadoop/hdfs-site.xml" ; then | |
cat << 'EOF' > /tmp/hdfs-site.xml.property | |
<property> | |
<name>dfs.replication</name> | |
<value>2</value> | |
</property> | |
<property> | |
<name>dfs.namenode.secondary.http-address</name> | |
<value>master:50090</value> | |
</property> | |
EOF | |
sed -i -e ' | |
/^<configuration>$/r /tmp/hdfs-site.xml.property | |
/^$/d | |
' $HADOOP_HOME/etc/hadoop/hdfs-site.xml | |
fi | |
} | |
: "etc/hadoop/mapred-site.xml" && { | |
if ! bash -c "grep mapreduce.framework.nam $HADOOP_HOME/etc/hadoop/mapred-site.xml"; then | |
cat << 'EOF' > /tmp/mapred-site.xml.property | |
<property> | |
<name>mapreduce.framework.name</name> | |
<value>yarn</value> | |
</property> | |
<property> | |
<name>mapreduce.application.classpath</name> | |
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value> | |
</property> | |
EOF | |
sed -i -e ' | |
/^<configuration>$/r /tmp/mapred-site.xml.property | |
/^$/d | |
' $HADOOP_HOME/etc/hadoop/mapred-site.xml | |
fi | |
} | |
: "etc/hadoop/yarn-site.xml" && { | |
if ! bash -c "grep yarn.nodemanager.aux-service $HADOOP_HOME/etc/hadoop/yarn-site.xml"; then | |
cat << 'EOF' > /tmp/yarn-site.xml.property | |
<property> | |
<name>yarn.nodemanager.aux-services</name> | |
<value>mapreduce_shuffle</value> | |
</property> | |
<property> | |
<name>yarn.resourcemanager.hostname</name> | |
<value>master</value> | |
</property> | |
<property> | |
<name>yarn.nodemanager.env-whitelist</name> | |
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ,HADOOP_MAPRED_HOME</value> | |
</property> | |
EOF | |
sed -i -e ' | |
/^<configuration>$/r /tmp/yarn-site.xml.property | |
/^$/d | |
' $HADOOP_HOME/etc/hadoop/yarn-site.xml | |
fi | |
} | |
: "Copy to workers" && { | |
for node in node1 node2; do | |
scp $HADOOP_HOME/etc/hadoop/* $node:$HADOOP_HOME/etc/hadoop/ | |
done; | |
} | |
} | |
: "Format HDFS" && { | |
$HADOOP_HOME/bin/hdfs namenode -format -force | |
} | |
: "Start daemons" && { | |
: "HDFS" && { | |
if ! bash -c "jps | grep NameNode"; then | |
$HADOOP_HOME/sbin/start-dfs.sh | |
fi | |
} | |
: "YARN" && { | |
if ! bash -c "jps | grep ResourceManager"; then | |
$HADOOP_HOME/sbin/start-yarn.sh | |
fi | |
} | |
: "MapReduce JobHistory server" && { | |
if ! bash -c "jps | grep JobHistoryServer" ; then | |
$HADOOP_HOME/bin/mapred --daemon start historyserver | |
fi | |
} | |
: "Check YARN" && { | |
$HADOOP_HOME/bin/yarn app -list | |
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.6.jar pi 10 1000 | |
} | |
} | |
: "Setup Hive" && { | |
: "Download Hive (3.x)" && { | |
if ! bash -c "ls | grep apache-hive-*.tar.gz" ; then | |
wget http://ftp.riken.jp/net/apache/hive/hive-3.1.3/apache-hive-3.1.3-bin.tar.gz -nv | |
tar xf apache-hive-3.1.3-bin.tar.gz | |
fi | |
} | |
: "Set environment variables to shell RC file" && { | |
if ! bash -c "grep HIVE_HOME ~/.bashrc" ; then | |
cat << 'EOF' >> ~/.bashrc | |
export HIVE_HOME=~/apache-hive-3.1.3-bin | |
export PATH=$HIVE_HOME/bin:$PATH | |
EOF | |
fi | |
set +u | |
source ~/.bashrc | |
set -u | |
} | |
: "Setup HDFS working directory" && { | |
$HADOOP_HOME/bin/hadoop fs -mkdir -p /user/hive/warehouse | |
$HADOOP_HOME/bin/hadoop fs -chmod g+w /user/hive/warehouse | |
$HADOOP_HOME/bin/hadoop fs -mkdir -p /tmp | |
$HADOOP_HOME/bin/hadoop fs -chmod g+w /tmp | |
$HIVE_HOME/bin/schematool -dbType derby -initSchema --verbose | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
set -euxo pipefail | |
HOSTNAME=$1 | |
: "Set hostname" && { | |
sudo hostname $HOSTNAME | |
echo $HOSTNAME | sudo tee /etc/hostname > /dev/null | |
} | |
: "Edit hosts file" && { | |
if ! bash -c "grep 192.168.56.10 /etc/hosts" ; then | |
cat << 'EOF' | sudo tee -a /etc/hosts > /dev/null | |
192.168.56.10 master | |
192.168.56.11 node1 | |
192.168.56.12 node2 | |
EOF | |
fi | |
} | |
: "Install common packages" && { | |
sudo yum -y install java-1.8.0-openjdk-devel openssh-clients rsync wget | |
} | |
: "Set environment variables to shell RC file" && { | |
if ! bash -c "grep JAVA_HOME /etc/hosts" ; then | |
cat << 'EOF' >> ~/.bashrc | |
export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk | |
export HADOOP_HOME=~/hadoop-3.3.6 | |
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop | |
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$PATH | |
EOF | |
fi | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- mode: ruby -*- | |
# vi: set ft=ruby : | |
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing! | |
VAGRANTFILE_API_VERSION = "2" | |
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| | |
config.vm.define :master, primary: true do |master| | |
master.vm.box = "bento/rockylinux-9" | |
master.vm.network "private_network", ip: "192.168.56.10" | |
master.vm.provider "virtualbox" do |vb| | |
vb.memory = "8192" | |
end | |
master.vm.provision "shell", privileged: false do |s| | |
s.path = "master.sh" | |
s.args = "master" | |
end | |
end | |
(1..2).each {|i| | |
node_name = "node" + i.to_s | |
config.vm.define node_name do |node| | |
node.vm.box = "bento/rockylinux-9" | |
node.vm.network "private_network", ip: "192.168.56.1" + i.to_s | |
node.vm.provider "virtualbox" do |vb| | |
vb.memory = "4096" | |
end | |
node.vm.provision "shell", privileged: false do |s| | |
s.path = "nodes.sh" | |
s.args = node_name | |
end | |
end | |
} | |
if Vagrant.has_plugin?("vagrant-proxyconf") && ENV['http_proxy'] | |
config.proxy.http = ENV['http_proxy'] | |
config.proxy.https = ENV['https_proxy'] | |
config.proxy.no_proxy = "localhost,127.0.0.1" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment