Hadoop搭建

环境搭建

第一部分

主机名修改为 master

ifconfig
vim /etc/hostname
vim /etc/host
hostname master
sudo passwd root
ssh root@127.0.0.1
su root
sudo ssh root@127.0.0.1
<!-- sudo ssh parallels@10.211.55.3 -->

ssh-kegen
文章
 SSH
Linux基本服务
 Mac下如何配置虚拟机软件Pparallel Desktop–超详细
 30分钟zabbix入门

查看ssh安装

rpm -qa | grep openssh
sudo yum install openssh-clients
sudo yum install openssh-server
ssh root@127.0.0.1
ssh-keygen -t rsa
ssh-copy-id master
ssh master

hadoop

tar -zxvf hadoop-2.6.0.tar.gz
mv hadoop-2.6.0 /usr/local/hadoop

tar -zxvf jdk-8u181-linux-x64.gz
mv jdk1.8.0_181 /usr/local/jdk

cd /usr/local/hadoop
cd etc/hadoop
# 配置 JAVAHOME
vim hadoop-env.sh

core-site.xml

vim core-site.xml 
<configuration>
    <property>
            <name>fs.default.name</name>
            <value>hdfs://master:9000</value>
    </property>
    <property>
            <name>hadoop.tmp.dir</name>
            <value>/usr/local/hadoop/tmp</value>
    </property>
</configuration>

hdfs-site.xml

vim hdfs-site.xml
<configuration>
  <property>
    <name>dfs.replication</name>
    <value>1</value>
  </property>
        <property>
    <name>dfs.permissions</name>
    <value>false</value>
  </property>
</configuration>

mapred-site.xml

cp mapred-site.xml.template mapred-site.xml 
vim mapred-site.xml 
<configuration>
  <property> 
    <name>mapreduce.framework.name</name> 
    <value>yarn</value> 
  </property>
  <property>  
    <name>mapreduce.jobhistory.address</name>  
    <value>master:10020</value>  
  </property>
</configuration>

yarn-site.xml

vim yarn-site.xml 

<configuration>
<!-- Site specific YARN configuration properties -->
  <property> 
    <name>yarn.resourcemanager.hostname</name> 
    <value>master</value> 
  </property>
  <property> 
    <name>yarn.nodemanager.aux-services</name> 
    <value>mapreduce_shuffle</value> 
  </property>
  <property>
    <name>mapreduce.job.ubertask.enable</name>
    <value>true</value>
  </property>
</configuration>

环境配置

vim /etc/profile
export HADOOP_HOME=/usr/local/hadoop
export PATH=.:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
source /etc/profile
hadoop namenode -format
cd hadoop/sbin
start-dfs.sh
start-yarn.sh
systemctl stop firewalld.service

hadoop

10.211.55.3:50070

yarn

10.211.55.3:8088

hbase

tar -zxvf hadoop-2.6.0.tar.gz 
rm -rf  hadoop-2.6.0
tar -zxvf hbase-1.0.0-cdh5.5.1.tar.gz 
mv hbase-1.0.0-cdh5.5.1 /usr/local/hbase

进入安装目录

1
2
3

cd /usr/local/hbase
cd conf/
vim hbase-site.xml

hbase-site.xml

<configuration>
  <property>
    <name>hbase.rootdir</name>
    <value>hdfs://master:9000/hbase</value>
  </property>
  <property>
    <name>hbase.cluster.distributed</name>
    <value>true</value>
  </property>
  <property>
    <name>hbase.zookeeper.quorum</name>
    <value>master</value>
  </property>
  <property>
    <name>dfs.replication</name>
    <value>1</value>
  </property>
</configuration>

hbase-env.sh

1 2	vim hbase-env.sh export JAVA_HOME=/usr/local/jdk

regionservers

vim regionservers
master

ps aux |grep zookeeper
ps aux |grep regionserver

profile

vim /etc/profile
export HBASE_HOME=/usr/local/hbase
export HADOOP_HOME=/usr/local/hadoop
export JAVA_HOME=/usr/local/jdk
export ZK_HOME=/usr/local/zk
export PATH=.:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$ZK_HOME/bin:$HBASE_HOME/bin:$PATH

source /etc/profile

查看
1
2
3
jps
start-hbase.sh
10.211.55.3:60010

hbase shell

hbase shell

create 'userscanlog','info'
create 'pindaoanaly','info'

list

create ‘userscanlog’,’info’

hbase(main):001:0> create 'userscanlog','info'

ERROR: Can't get master address from ZooKeeper; znode data == null

Here is some help for this command:
Creates a table. Pass a table name, and a set of column family
specifications (at least one), and, optionally, table configuration.
Column specification can be a simple string (name), or a dictionary
(dictionaries are described below in main help output), necessarily 
including NAME attribute. 
Examples:

Create a table with namespace=ns1 and table qualifier=t1
  hbase> create 'ns1:t1', {NAME => 'f1', VERSIONS => 5}

Create a table with namespace=default and table qualifier=t1
  hbase> create 't1', {NAME => 'f1'}, {NAME => 'f2'}, {NAME => 'f3'}
  hbase> # The above in shorthand would be the following:
  hbase> create 't1', 'f1', 'f2', 'f3'
  hbase> create 't1', {NAME => 'f1', VERSIONS => 1, TTL => 2592000, BLOCKCACHE => true}
  hbase> create 't1', {NAME => 'f1', CONFIGURATION => {'hbase.hstore.blockingStoreFiles' => '10'}}
  
Table configuration options can be put at the end.
Examples:

  hbase> create 'ns1:t1', 'f1', SPLITS => ['10', '20', '30', '40']
  hbase> create 't1', 'f1', SPLITS => ['10', '20', '30', '40']
  hbase> create 't1', 'f1', SPLITS_FILE => 'splits.txt', OWNER => 'johndoe'
  hbase> create 't1', {NAME => 'f1', VERSIONS => 5}, METADATA => { 'mykey' => 'myvalue' }
  hbase> # Optionally pre-split the table into NUMREGIONS, using
  hbase> # SPLITALGO ("HexStringSplit", "UniformSplit" or classname)
  hbase> create 't1', 'f1', {NUMREGIONS => 15, SPLITALGO => 'HexStringSplit'}
  hbase> create 't1', 'f1', {NUMREGIONS => 15, SPLITALGO => 'HexStringSplit', REGION_REPLICATION => 2, CONFIGURATION => {'hbase.hregion.scan.loadColumnFamiliesOnDemand' => 'true'}}

You can also keep around a reference to the created table:

  hbase> t1 = create 't1', 'f1'

Which gives you a reference to the table named 't1', on which you can then
call methods.

1）原因：运行hbase(zookeeper)的用户无法写入zookeeper文件，导致znode data为空。
解决：在hbase-site.xml指定一个运行hbase的用户有写入文件权限的目录作为zookeeper数据目录，如链接
1
2
3
4
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/data/zk_data</value>
</property>
linux下查看所有用户及所有用户组
【更新中】个人总结：在大数据hadoop路上跳过的坑
znode data == null原因及解决方法
大数据和人工智能躺过的坑
hadoop-2.6.0-cdh5.4.5.tar.gz(CDH)的3节点集群搭建(含zookeeper集群安装）

异次元

MySQL-5.5.53

删除linux上已经安装的mysql相关库信息。rpm -e xxxxxxx –nodeps
执行命令rpm -qa |grep mysql 检查是否删除干净
1
2
3
4
rpm -qa |grep mysql
rpm -ivh mysql.rpm
rpm -qa |grep mariadb
rpm -e --nodeps mariadb-libs

执行命令 rpm -i mysql-server-**** 安装mysql服务端

tar -xvf MySQL-5.5.53-1.linux2.6.x86_64.rpm-bundle.tar 
rpm -ivh MySQL-server-5.5.53-1.linux2.6.x86_64.rpm 
/usr/bin/mysqladmin -u root password 'new-password'
/usr/bin/mysqladmin -u root -h master password 'new-password'

Alternatively you can run:
/usr/bin/mysql_secure_installation

启动mysql 服务端，执行命令 mysqld_safe &
（注意要把这个服务起来再安装客户端和第5步，不起来的话，第5步会报错的吧）
1
mysqld_safe &
执行命令 rpm -i mysql-client-**** 安装mysql客户端
1
rpm -ivh MySQL-client-5.5.53-1.linux2.6.x86_64.rpm

执行命令mysql_secure_installation设置root用户密码

mysql_secure_installation

mysql -u root -p admin（mysql -p）
grant all on hive.* to 'root'@'%' identified by 'my81527';
flush privileges

hive 安装

unzip
1
tar -zxvf apache-hive-1.0.0-bin.tar.gz

1
2
3

mv apache-hive-1.0.0-bin /usr/local/hive
cd /usr/local/hive/
cd conf/

hive-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://test1:3306/hive?createDatabaseIfNotExist=true</value>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.jdbc.Driver</value>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>root</value>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>123456</value>
  </property>
  <property>  
    <name>hive.metastore.warehouse.dir</name>  
    <value>/user/hive/warehouse</value>  
  </property>
  <property>  
    <name>hive.metastore.local</name>  
    <value>true</value>  
  </property>
</configuration>

上传mysql-connect

1	cp mysql-connector-java-5.1.40-bin.jar /usr/local/hive/lib/

环境配置

vim /etc/profile

export HBASE_HOME=/usr/local/hbase
export HADOOP_HOME=/usr/local/hadoop
export JAVA_HOME=/usr/local/jdk
export ZK_HOME=/usr/local/zk
export HIVE_HOME=/usr/local/hive
export PATH=.:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$ZK_HOME/bin:$HBASE_HOME/bin:$HIVE_HOME/bin:$PATH

source /etc/profile

启动
1
2
3
4
cd bin
hive

show tables;

数据库创建

hive
create database cargocn-cloud
use cargocn-cloud;

CREATE TABLE user(id string, name string,age string,address string,telphone string,qq string,weixin string,email string,sex string,birthday string,account string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

 CREATE TABLE producttype(id string, producttypename string,producttypedescription string,typegrade string,parentid string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';


 CREATE TABLE productdetail(id string,proudctid string,productplace string,productdescription string,productbrand string,productweight string,productspecification string,productdetaipicurl string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';


 CREATE TABLE product(id string,producttypeid string,producttitle string,productprice string,mechartid string,createtime string,audittime string,auditstate string,stocknum string,sellnum string,productpicurl string,proudctstatus string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';


 CREATE TABLE ordermain(id string,payamount string,userid string,createtime string,paytime string,paystatus string,consigneeadress string,consigneephone string,consigneename string,tradenumber string,paytype string,orderstatus string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

 CREATE TABLE orderdetail(id string,orderid string,productid string,mechartid string,createtime string,tradenum string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

 CREATE TABLE mechant(id string,merchantname string,merchantshopname string,merchantaccount string,mechantscope string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

sqoop同步命令

sqoop import --connect jdbc:mysql://10.211.55.3:3066/cargocn-cloud  --username root --password my81527 --query 'SELECT id,name,age,address,telphone,qq,weixin,email,sex,birthday,account FROM user WHERE $CONDITIONS' --fields-terminated-by '\t'  --null-string '**'  --target-dir /user/hive/warehouse/cargocn-cloud.db/user  --hive-table cargocn-cloud.user  --m 1 --hive-import

sqoop import --connect jdbc:mysql://10.211.55.3:3066/cargocn-cloud  --username root --password my81527 --table producttype --fields-terminated-by '\t'  --null-string '**'  --target-dir /user/hive/warehouse/cargocn-cloud.db/producttype  --hive-table cargocn-cloud.producttype --m 1   --hive-import

sqoop import --connect jdbc:mysql://10.211.55.3:3066/cargocn-cloud  --username root --password my81527 --table productdetail --fields-terminated-by '\t'  --null-string '**'  --target-dir /user/hive/warehouse/cargocn-cloud.db/productdetail --hive-table cargocn-cloud.productdetail --m 1   --hive-import

sqoop import --connect jdbc:mysql://10.211.55.3:3066/cargocn-cloud  --username root --password my81527 --table product --fields-terminated-by '\t'  --null-string '**'  --m 1 --target-dir /user/hive/warehouse/cargocn-cloud.db/product --hive-table cargocn-cloud.product   --hive-import

sqoop import --connect jdbc:mysql://10.211.55.3:3066/cargocn-cloud  --username root --password my81527 --table ordermain --fields-terminated-by '\t'  --null-string '**'  --m 1 --target-dir /user/hive/warehouse/cargocn-cloud.db/ordermain  --hive-table cargocn-cloud.ordermain    --hive-import

sqoop import --connect jdbc:mysql://10.211.55.3:3066/cargocn-cloud  --username root --password my81527 --table orderdetail --fields-terminated-by '\t'  --null-string '**'  --m 1 --target-dir /user/hive/warehouse/cargocn-cloud.db/orderdetail --hive-table cargocn-cloud.orderdetail    --hive-import

sqoop import --connect jdbc:mysql://10.211.55.3:3066/cargocn-cloud  --username root --password my81527 --query 'SELECT id ,merchantname ,merchantshopname ,merchantaccount ,mechantscope FROM mechant WHERE $CONDITIONS' --fields-terminated-by '\t'  --null-string '**'  --m 1 --target-dir /user/hive/warehouse/cargocn-cloud.db/mechant --hive-table cargocn-cloud.mechant    --hive-import

flume 收集

1
2
3

tar -zxvf apache-flume-1.6.0-bin.tar.gz 
mv apache-flume-1.6.0-bin /usr/local/flume
cd /usr/local/flume/conf

修改配置文件 flume-conf.properties

productinfo.sources  = s1
productinfo.channels = c1
productinfo.sinks = s1
 
productinfo.sources.s1.type = org.apache.flume.source.kafka.KafkaSource
productinfo.sources.s1.zookeeperConnect = master:2181
productinfo.sources.s1.topic = productscanlogflume
productinfo.sources.s1.groupId = ty1
productinfo.sources.s1.channels = c1
productinfo.sources.s1.interceptors = i1
productinfo.sources.s1.interceptors.i1.type = timestamp
productinfo.sources.s1.kafka.consumer.timeout.ms = 1000
 
productinfo.channels.c1.type = memory
productinfo.channels.c1.capacity = 1000
productinfo.channels.c1.transactionCapacity = 1000
 
productinfo.sinks.s1.type = hdfs
productinfo.sinks.s1.hdfs.path = /data/kafka/productinfo/%y-%m-%d
productinfo.sinks.s1.hdfs.fileType = DataStream
productinfo.sinks.s1.hdfs.rollSize = 0
productinfo.sinks.s1.hdfs.rollCount = 0
productinfo.sinks.s1.hdfs.rollInterval = 30
productinfo.sinks.s1.channel = c1

启动

1	/usr/local/flume/bin/flume-ng agent -f /usr/local/flume/conf/flume-conf.properties -n productinfo > productinfo.txt