Last active
November 20, 2020 00:21
-
-
Save momijiame/fefb9831e9f032ef264d8d517df57cb4 to your computer and use it in GitHub Desktop.
Vagrantfile for Spark 2.3 on YARN with CentOS 7 and Hadoop 2.8 (3 hosts)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
vagrant up node1 node2 master |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
set -x | |
HOSTNAME=$1 | |
: "Set hostname" && { | |
sudo hostname $HOSTNAME | |
echo $HOSTNAME | sudo tee /etc/hostname > /dev/null | |
} | |
: "Edit hosts file" && { | |
grep 192.168.33.10 /etc/hosts > /dev/null | |
if [ $? -ne 0 ] ; then | |
cat << 'EOF' | sudo tee -a /etc/hosts > /dev/null | |
192.168.33.10 master | |
192.168.33.11 node1 | |
192.168.33.12 node2 | |
EOF | |
fi | |
} | |
: "Install common packages" && { | |
sudo yum -y install epel-release | |
sudo yum -y install java-1.8.0-openjdk-devel openssh-clients rsync wget sshpass | |
} | |
: "Download Hadoop" && { | |
ls | grep hadoop-*.tar.gz > /dev/null | |
if [ $? -ne 0 ]; then | |
wget http://ftp.riken.jp/net/apache/hadoop/common/hadoop-2.8.3/hadoop-2.8.3.tar.gz -nv | |
tar xf hadoop-2.8.3.tar.gz | |
fi | |
} | |
: "Download Apache Spark" && { | |
ls | grep spark-*.tgz > /dev/null | |
if [ $? -ne 0 ]; then | |
wget http://ftp.riken.jp/net/apache/spark/spark-2.3.0/spark-2.3.0-bin-hadoop2.7.tgz -nv | |
tar xf spark-2.3.0-bin-hadoop2.7.tgz | |
fi | |
} | |
: "Set environment variables to shell RC file" && { | |
grep JAVA_HOME /etc/hosts > /dev/null | |
if [ $? -ne 0 ]; then | |
cat << 'EOF' >> ~/.bashrc | |
export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk | |
export HADOOP_HOME=~/hadoop-2.8.3 | |
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop | |
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$PATH | |
export SPARK_HOME=~/spark-2.3.0-bin-hadoop2.7 | |
export PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$JAVA_HOME/bin:$PATH | |
EOF | |
fi | |
source ~/.bashrc | |
} | |
: "Hadoop execution check" && { | |
hadoop version | |
} | |
: "Install SSH public key to all nodes" && { | |
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa | |
for node in master node1 node2; do | |
sshpass -p "vagrant" ssh-copy-id -i ~/.ssh/id_rsa.pub -o "StrictHostKeyChecking no" $node | |
done; | |
} | |
: "Setting configuration files" && { | |
: "etc/hadoop/slaves" && { | |
cat << 'EOF' > $HADOOP_HOME/etc/hadoop/slaves | |
node1 | |
node2 | |
EOF | |
} | |
: "etc/hadoop/core-site.xml" && { | |
grep fs.defaultFS $HADOOP_HOME/etc/hadoop/core-site.xml | |
if [ $? -ne 0 ]; then | |
cat << 'EOF' > /tmp/core-site.xml.property | |
<property> | |
<name>fs.defaultFS</name> | |
<value>hdfs://192.168.33.10:9000</value> | |
</property> | |
EOF | |
sed -i -e ' | |
/^<configuration>$/r /tmp/core-site.xml.property | |
/^$/d | |
' $HADOOP_HOME/etc/hadoop/core-site.xml | |
fi | |
} | |
: "etc/hadoop/hdfs-site.xml" && { | |
grep dfs.replication $HADOOP_HOME/etc/hadoop/hdfs-site.xml | |
if [ $? -ne 0 ]; then | |
cat << 'EOF' > /tmp/hdfs-site.xml.property | |
<property> | |
<name>dfs.replication</name> | |
<value>2</value> | |
</property> | |
<property> | |
<name>dfs.namenode.secondary.http-address</name> | |
<value>192.168.33.10:50090</value> | |
</property> | |
EOF | |
sed -i -e ' | |
/^<configuration>$/r /tmp/hdfs-site.xml.property | |
/^$/d | |
' $HADOOP_HOME/etc/hadoop/hdfs-site.xml | |
fi | |
} | |
: "etc/hadoop/mapred-site.xml" && { | |
grep mapreduce.framework.nam $HADOOP_HOME/etc/hadoop/mapred-site.xml | |
if [ $? -ne 0 ]; then | |
cp $HADOOP_HOME/etc/hadoop/mapred-site.xml{.template,} | |
cat << 'EOF' > /tmp/mapred-site.xml.property | |
<property> | |
<name>mapreduce.framework.name</name> | |
<value>yarn</value> | |
</property> | |
EOF | |
sed -i -e ' | |
/^<configuration>$/r /tmp/mapred-site.xml.property | |
/^$/d | |
' $HADOOP_HOME/etc/hadoop/mapred-site.xml | |
fi | |
} | |
: "etc/hadoop/yarn-site.xml" && { | |
grep yarn.nodemanager.aux-service $HADOOP_HOME/etc/hadoop/yarn-site.xml | |
if [ $? -ne 0 ]; then | |
cat << 'EOF' > /tmp/yarn-site.xml.property | |
<property> | |
<name>yarn.nodemanager.aux-services</name> | |
<value>mapreduce_shuffle</value> | |
</property> | |
<property> | |
<name>yarn.resourcemanager.hostname</name> | |
<value>master</value> | |
</property> | |
<property> | |
<name>yarn.nodemanager.pmem-check-enabled</name> | |
<value>false</value> | |
</property> | |
<property> | |
<name>yarn.nodemanager.vmem-check-enabled</name> | |
<value>false</value> | |
</property> | |
EOF | |
sed -i -e ' | |
/^<configuration>$/r /tmp/yarn-site.xml.property | |
/^$/d | |
' $HADOOP_HOME/etc/hadoop/yarn-site.xml | |
fi | |
} | |
: "Copy to nodes" && { | |
for node in node1 node2; do | |
scp $HADOOP_HOME/etc/hadoop/* $node:$HADOOP_HOME/etc/hadoop/ | |
done; | |
} | |
} | |
: "Format HDFS" && { | |
$HADOOP_HOME/bin/hdfs namenode -format | |
} | |
: "Start daemons" && { | |
: "HDFS" && { | |
jps | grep NameNode | |
if [ $? -ne 0 ]; then | |
$HADOOP_HOME/sbin/start-dfs.sh | |
fi | |
} | |
: "YARN" && { | |
jps | grep ResourceManager | |
if [ $? -ne 0 ]; then | |
$HADOOP_HOME/sbin/start-yarn.sh | |
fi | |
} | |
: "MapReduce JobHistory server" && { | |
jps | grep JobHistoryServer | |
if [ $? -ne 0 ]; then | |
$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh --config $HADOOP_CONF_DIR start historyserver | |
fi | |
} | |
} | |
: "Setup Python3" && { | |
: "Install Python3" && { | |
sudo yum -y install https://centos7.iuscommunity.org/ius-release.rpm | |
sudo yum clean all | |
sudo yum -y install python36u-pip | |
} | |
: "Install Jupyter-Notebook and others" && { | |
sudo pip3.6 install pandas | |
} | |
: "Configure to launch pyspark" && { | |
cat << 'EOF' >> ~/.bashrc | |
export PYSPARK_PYTHON=/usr/bin/python3.6 | |
EOF | |
} | |
: "Prepare to launch PySpark script" && { | |
cat << 'EOF' > launch-pyspark.sh | |
#!/bin/sh | |
pyspark --master yarn | |
EOF | |
chmod +x launch-pyspark.sh | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
set -x | |
HOSTNAME=$1 | |
: "Set hostname" && { | |
sudo hostname $HOSTNAME | |
echo $HOSTNAME | sudo tee /etc/hostname > /dev/null | |
} | |
: "Edit hosts file" && { | |
grep 192.168.33.10 /etc/hosts > /dev/null | |
if [ $? -ne 0 ] ; then | |
cat << 'EOF' | sudo tee -a /etc/hosts > /dev/null | |
192.168.33.10 master | |
192.168.33.11 node1 | |
192.168.33.12 node2 | |
EOF | |
fi | |
} | |
: "Install common packages" && { | |
sudo yum -y install java-1.8.0-openjdk-devel openssh-clients rsync wget | |
} | |
: "Download Hadoop" && { | |
ls | grep hadoop-*.tar.gz > /dev/null | |
if [ $? -ne 0 ]; then | |
wget http://ftp.riken.jp/net/apache/hadoop/common/hadoop-2.8.3/hadoop-2.8.3.tar.gz -nv | |
tar xf hadoop-2.8.3.tar.gz | |
fi | |
} | |
: "Set environment variables to shell RC file" && { | |
grep JAVA_HOME /etc/hosts > /dev/null | |
if [ $? -ne 0 ]; then | |
cat << 'EOF' >> ~/.bashrc | |
export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk | |
export HADOOP_HOME=~/hadoop-2.8.3 | |
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop | |
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$PATH | |
EOF | |
fi | |
source ~/.bashrc | |
} | |
: "Setup Python3" && { | |
: "Install Python3" && { | |
sudo yum -y install https://centos7.iuscommunity.org/ius-release.rpm | |
sudo yum clean all | |
sudo yum -y install python36u-pip | |
} | |
} | |
: "Hadoop execution check" && { | |
hadoop version | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- mode: ruby -*- | |
# vi: set ft=ruby : | |
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing! | |
VAGRANTFILE_API_VERSION = "2" | |
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| | |
config.vm.define :master, primary: true do |master| | |
master.vm.box = "bento/centos-7.4" | |
master.vm.network "private_network", ip: "192.168.33.10" | |
master.vm.network :forwarded_port, host: 8888, guest: 8888 | |
master.vm.provider "virtualbox" do |vb| | |
vb.memory = "1024" | |
end | |
master.vm.provision "shell", privileged: false do |s| | |
s.path = "master.sh" | |
s.args = "master" | |
end | |
end | |
(1..2).each {|i| | |
node_name = "node" + i.to_s | |
config.vm.define node_name do |node| | |
node.vm.box = "bento/centos-7.4" | |
node.vm.network "private_network", ip: "192.168.33.1" + i.to_s | |
node.vm.provider "virtualbox" do |vb| | |
vb.memory = "1024" | |
end | |
node.vm.provision "shell", privileged: false do |s| | |
s.path = "nodes.sh" | |
s.args = node_name | |
end | |
end | |
} | |
if Vagrant.has_plugin?("vagrant-proxyconf") && ENV['http_proxy'] | |
config.proxy.http = ENV['http_proxy'] | |
config.proxy.https = ENV['https_proxy'] | |
config.proxy.no_proxy = "localhost,127.0.0.1" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment