你好,游客 登录 注册 搜索
背景:
阅读新闻

单机基于Docker搭建Hadoop2.7.1 Spark1.7 高可用集群

[日期:2016-01-22] 来源:Linux公社  作者: [字体: ]

单机基于Docker搭建Hadoop2.7.1 Spark1.7 高可用集群

获取 Ubuntu 镜像

sudo docker pull ubuntu

分别下载 spark1.7 hadoop2.7.1 scala1.1 zookeeper3.4.6 jdk1.8 解压后放置本地文件夹用于挂载到容器中

并在文件夹下创建文件

authorized_keys

hosts

本例目录使用/home/docker/config

启动容器

sudo docker run --name installspark -v /home/docker/config/:/config -it ubuntu:14.04

安装启动后在容器内/config文件夹下能够看到放置的安装文件

安装jdk,scala :

vim ~/.bashrc

追加:

/usr/sbin/sshd

cat /config/hosts > /etc/hosts

cat /config/authorized_keys > /root/.ssh/authorized_keys

export JAVA_HOME=/usr/lib/jvm/java-8-sun

export PATH=${JAVA_HOME}/bin:$PATH

export HADOOP_HOME=/opt/hadoop

export PATH=${HADOOP_HOME}/bin:$PATH

export SCALA_HOME=/opt/scala

export PATH=${SCALA_HOME}/bin:$PATH

export SPARK_HOME=/opt/spark

export PATH=${SPARK_HOME}/bin:$PATH

拷贝 spark/hadoop/zookeeper到/opt下

安装hadoop:

创建文件夹:/opt/hadoop/namenode /opt/hadoop/datanode /opt/hadoop/tmp /opt/hadoop/journal

root@nn1:/opt/hadoop/etc/hadoop# vim hadoop-env.sh

修改:

export JAVA_HOME=/usr/lib/jvm/java-8-sun

root@nn1:/opt/hadoop/etc/hadoop# vim core-site.xml

添加:

<property>

<name>fs.defaultFS</name>

<value>hdfs://ns1</value>

</property>

<property>

<name>hadoop.tmp.dir</name>

<value>/opt/hadoop/tmp</value>

</property>

<property>

<name>ha.zookeeper.quorum</name>

<value>dnzk1:2181,dnzk2:2181,dnzk3:2181</value>

</property>

root@nn1:/opt/hadoop/etc/hadoop#vim hdfs-site.xml添加:

<property>

<name>dfs.datanode.data.dir</name>

<value>file:/opt/hadoop/datanode</value>

</property>

<property>

<name>dfs.namenode.name.dir</name>

<value>file:/opt/hadoop/namenode</value>

</property>

<property>

<name>dfs.nameservices</name>

<value>ns1</value>

</property>

<property>

<name>dfs.ha.namenodes.ns1</name>

<value>nn1,nn2</value>

</property>

<property>

<name>dfs.namenode.rpc-address.ns1.nn1</name>

<value>nn1:9000</value>

</property>

<property>

<name>dfs.namenode.http-address.ns1.nn1</name>

<value>nn1:50070</value>

</property>

<property>

<name>dfs.namenode.rpc-address.ns1.nn2</name>

<value>nn2:9000</value>

</property>

<property>

<name>dfs.namenode.http-address.ns1.nn2</name>

<value>nn2:50070</value>

</property>

<property>

<name>dfs.namenode.shared.edits.dir</name>

<value>qjournal://dnzk1:8485;dnzk2:8485;dnzk3:8485/ns1</value>

</property>

<property>

<name>dfs.journalnode.edits.dir</name>

<value>/opt/hadoop/journal</value>

</property>

<property>

<name>dfs.journalnode.http-address</name>

<value>0.0.0.0:8480</value>

</property>

<property>

<name>dfs.journalnode.rpc-address</name>

<value>0.0.0.0:8485</value>

</property>

<property>

<name>dfs.ha.automatic-failover.enabled</name>

<value>true</value>

</property>

<property>

<name>dfs.client.failover.proxy.provider.ns1</name>

<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>

</property>

<property>

<name>dfs.ha.fencing.methods</name>

<value>

sshfence

shell(/bin/true)

</value>

</property>

<property>

<name>dfs.ha.fencing.ssh.private-key-files</name>

<value>/root/.ssh/id_rsa</value>

</property>

<property>

<name>dfs.ha.fencing.ssh.connect-timeout</name>

<value>30000</value>

</property>

<property>

<name>dfs.permissions</name>

<value>false</value>

</property>

<name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value> </property> <property> <name>yarn.resourcemanager.zk-address</name> <value>dnzk1:2181,dnzk2:2181,dnzk3:2181</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property>

root@nn1:/opt/hadoop# vim /opt/hadoop/etc/hadoop/slaves

添加:

dnzk1

dnzk2

dnzk3

安装spark

root@nn1:/opt/spark/conf# vim spark-env.sh

添加:

export SPARK_MASTER_IP=nn1

export SPARK_WORKER_MEMORY=256m

export JAVA_HOME=/usr/lib/jvm/java-8-sun

export SCALA_HOME=/opt/scala

export SPARK_HOME=/opt/spark

export HADOOP_CONF_DIR=/opt/hadoop/etc/hadoop

export SPARK_LIBRARY_PATH=$$SPARK_HOME/lib

export SCALA_LIBRARY_PATH=$SPARK_LIBRARY_PATH

export SPARK_WORKER_CORES=1

export SPARK_WORKER_INSTANCES=1

export SPARK_MASTER_PORT=7077

root@nn1:/opt/spark/conf# vim slaves

添加:

安装zookeeper

创建文件夹 /opt/zookeeper/tmp

创建文件 /opt/zookeeper/tmp/myid

echo 1 > /opt/zookeeper/tmp/myid

root@nn1:/opt/zookeeper/conf# vim zoo.cfg

修改

dataDir=/opt/zookeeper/tmp

server.1=dnzk1:2888:3888

server.2=dnzk2:2888:3888

server.3=dnzk3:2888:3888

生成密钥

ssh-keygen -t dsa

追加id_dsa.pub到宿主机的/home/docker/config/authorized_keys文件

root@nn1:/opt/hadoop# cat ~/.ssh/id_dsa.pub

执行

sudo docker commit -m "namenode1" installspark ubuntu:ns1

修改本地宿主机/home/docker/config/hosts文件添加

172.17.0.11 nn1

172.17.0.12    nn2

172.17.0.13    rm1

172.17.0.14    rm2

172.17.0.15    dnzk1

172.17.0.16    dnzk2

172.17.0.17    dnzk3

启动docker

sudo docker run --name dnzk1 -h dnzk1 --net=none -p 2185:2181 -p 50075:50070 -p 9005:9000 -p 8485:8485 -p 7075:7077 -p 2885:2888 -v /home/docker/config/:/config -it spark1_7-hadoop2_7_1-scala1_1:basic

sudo docker run --name dnzk2 -h dnzk2 --net=none -p 2186:2181 -p 50076:50070 -p 9006:9000 -p 8486:8485 -p 7076:7077 -p 2886:2888 -v /home/docker/config/:/config -it spark1_7-hadoop2_7_1-scala1_1:basic

sudo docker run --name dnzk3 -h dnzk3 --net=none -p 2186:2181 -p 50076:50070 -p 9006:9000 -p 8486:8485 -p 7076:7077 -p 2887:2888 -v /home/docker/config/:/config -it spark1_7-hadoop2_7_1-scala1_1:basic

sudo docker run --name nn1 -h nn1 --net=none -p 2181:2181 -p 50071:50070 -p 9001:9000 -p 8481:8485 -p 7071:7077 -p 2881:2888 -v /home/docker/config/:/config -it spark1_7-hadoop2_7_1-scala1_1:basic

sudo docker run --name nn2 -h nn2 --net=none -p 2182:2181 -p 50072:50070 -p 9002:9000 -p 8482:8485 -p 7072:7077 -p 2882:2888 -v /home/docker/config/:/config -it spark1_7-hadoop2_7_1-scala1_1:basic

sudo docker run --name rm1 -h rm1 --net=none -p 2183:2181 -p 50073:50070 -p 9003:9000 -p 8483:8485 -p 7073:7077 -p 2883:2888 -v /home/docker/config/:/config -it spark1_7-hadoop2_7_1-scala1_1:basic

sudo docker run --name rm2 -h rm2 --net=none -p 2184:2181 -p 50074:50070 -p 9004:9000 -p 8484:8485 -p 7074:7077 -p 2884:2888 -v /home/docker/config/:/config -it spark1_7-hadoop2_7_1-scala1_1:basic

dnzk2(执行echo 2 > /opt/zookeeper/tmp/myid),dnzk2(执行echo 3 > /opt/zookeeper/tmp/myid)

配置网络

sudo pipework docker0 -i eth0 nn1 172.17.0.11/16

sudo pipework docker0 -i eth0 nn2 172.17.0.12/16

sudo pipework docker0 -i eth0 rm1 172.17.0.13/16

sudo pipework docker0 -i eth0 rm2 172.17.0.14/16

sudo pipework docker0 -i eth0 dnzk1 172.17.0.15/16

sudo pipework docker0 -i eth0 dnzk2 172.17.0.16/16

sudo pipework docker0 -i eth0 dnzk3 172.17.0.17/16

启动hadoop集群在dnzk1/dnzk2/dnzk3上启动zookeeper和 hadoop journal

/opt/zookeeper/bin/zkServer.sh start/opt/hadoop/sbin/hadoop-daemon.sh start journalnode

在nn1上格式化zookeeper启动和format hadoop

/opt/hadoop/bin/hdfs namenode -format

/opt/hadoop/bin/hdfs namenode -format

scp -r /opt/hadoop/namenode/ nn2:/opt/hadoop/

/opt/hadoop/bin/hdfs namenode -bootstrapStandby

/opt/hadoop/bin/hdfs zkfc -formatZK

/opt/hadoop/sbin/start-dfs.sh

在rm1上启动yarn

/opt/hadoop/sbin/start-yarn.sh

在rm2上启动

/opt/hadoop/sbin/yarn-daemon.sh start resourcemanager

启动spark

/opt/spark/sbin/start-all.sh

查看:

http://172.17.0.11:50070 (active)

http://172.17.0.12:50070(standby)

启动后集群服务情况

nn1        172.17.0.11        jdk、hadoop                  NameNode、DFSZKFailoverController(zkfc)

nn2        172.17.0.12        jdk、hadoop                  NameNode、DFSZKFailoverController(zkfc)

rm1        172.17.0.13        jdk、hadoop                  ResourceManager

rm2        172.17.0.14        jdk、hadoop                  ResourceManager

dnzk1      172.17.0.15        jdk、hadoop、zookeeper        DataNode、NodeManager、JournalNode、QuorumPeerMain

dnzk2      172.17.0.16        jdk、hadoop、zookeeper        DataNode、NodeManager、JournalNode、QuorumPeerMain

dnzk3      172.17.0.17        jdk、hadoop、zookeeper        DataNode、NodeManager、JournalNode、QuorumPeerMain





收藏 推荐 打印 | 录入:elainebo | 阅读:
本文评论   查看全部评论 (0)
表情: 表情 姓名: 字数
点评:
       
评论声明
  • 尊重网上道德,遵守中华人民共和国的各项有关法律法规
  • 承担一切因您的行为而直接或间接导致的民事或刑事法律责任
  • 本站管理人员有权保留或删除其管辖留言中的任意内容
  • 本站有权在网站内转载或引用您的评论
  • 参与本评论即表明您已经阅读并接受上述条款