环境搭建
第一部分
主机名修改为
master
1
2
3
4
5
6
7
8
9ifconfig
vim /etc/hostname
vim /etc/host
hostname master
sudo passwd root
ssh root@127.0.0.1
su root
sudo ssh root@127.0.0.1
<!-- sudo ssh parallels@10.211.55.3 -->ssh-kegen
文章
SSH
Linux基本服务
Mac下 如何配置虚拟机软件Pparallel Desktop–超详细
30分钟zabbix入门
查看ssh安装
1 | rpm -qa | grep openssh |
- hadoop
1
2
3
4
5
6
7
8
9
10tar -zxvf hadoop-2.6.0.tar.gz
mv hadoop-2.6.0 /usr/local/hadoop
tar -zxvf jdk-8u181-linux-x64.gz
mv jdk1.8.0_181 /usr/local/jdk
cd /usr/local/hadoop
cd etc/hadoop
# 配置 JAVAHOME
vim hadoop-env.sh
- core-site.xml
1
2
3
4
5
6
7
8
9
10
11vim core-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/tmp</value>
</property>
</configuration> - hdfs-site.xml
1
2
3
4
5
6
7
8
9
10
11vim hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration> - mapred-site.xml
1
2
3
4
5
6
7
8
9
10
11
12cp mapred-site.xml.template mapred-site.xml
vim mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
</configuration> - yarn-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17vim yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>mapreduce.job.ubertask.enable</name>
<value>true</value>
</property>
</configuration> - 环境配置
1
2
3
4
5
6
7
8
9vim /etc/profile
export HADOOP_HOME=/usr/local/hadoop
export PATH=.:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
source /etc/profile
hadoop namenode -format
cd hadoop/sbin
start-dfs.sh
start-yarn.sh
systemctl stop firewalld.service - hadoop
10.211.55.3:50070
- yarn
10.211.55.3:8088
hbase
1
2
3
4tar -zxvf hadoop-2.6.0.tar.gz
rm -rf hadoop-2.6.0
tar -zxvf hbase-1.0.0-cdh5.5.1.tar.gz
mv hbase-1.0.0-cdh5.5.1 /usr/local/hbase进入安装目录
1
2
3cd /usr/local/hbase
cd conf/
vim hbase-site.xmlhbase-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://master:9000/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>master</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>hbase-env.sh
1
2vim hbase-env.sh
export JAVA_HOME=/usr/local/jdkregionservers
1
2
3
4
5vim regionservers
master
ps aux |grep zookeeper
ps aux |grep regionserverprofile
1
2
3
4
5
6
7
8vim /etc/profile
export HBASE_HOME=/usr/local/hbase
export HADOOP_HOME=/usr/local/hadoop
export JAVA_HOME=/usr/local/jdk
export ZK_HOME=/usr/local/zk
export PATH=.:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$ZK_HOME/bin:$HBASE_HOME/bin:$PATH
source /etc/profile查看
1
2
3jps
start-hbase.sh
10.211.55.3:60010hbase shell
1
2
3
4
5
6hbase shell
create 'userscanlog','info'
create 'pindaoanaly','info'
listcreate ‘userscanlog’,’info’
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40hbase(main):001:0> create 'userscanlog','info'
ERROR: Can't get master address from ZooKeeper; znode data == null
Here is some help for this command:
Creates a table. Pass a table name, and a set of column family
specifications (at least one), and, optionally, table configuration.
Column specification can be a simple string (name), or a dictionary
(dictionaries are described below in main help output), necessarily
including NAME attribute.
Examples:
Create a table with namespace=ns1 and table qualifier=t1
hbase> create 'ns1:t1', {NAME => 'f1', VERSIONS => 5}
Create a table with namespace=default and table qualifier=t1
hbase> create 't1', {NAME => 'f1'}, {NAME => 'f2'}, {NAME => 'f3'}
hbase> # The above in shorthand would be the following:
hbase> create 't1', 'f1', 'f2', 'f3'
hbase> create 't1', {NAME => 'f1', VERSIONS => 1, TTL => 2592000, BLOCKCACHE => true}
hbase> create 't1', {NAME => 'f1', CONFIGURATION => {'hbase.hstore.blockingStoreFiles' => '10'}}
Table configuration options can be put at the end.
Examples:
hbase> create 'ns1:t1', 'f1', SPLITS => ['10', '20', '30', '40']
hbase> create 't1', 'f1', SPLITS => ['10', '20', '30', '40']
hbase> create 't1', 'f1', SPLITS_FILE => 'splits.txt', OWNER => 'johndoe'
hbase> create 't1', {NAME => 'f1', VERSIONS => 5}, METADATA => { 'mykey' => 'myvalue' }
hbase> # Optionally pre-split the table into NUMREGIONS, using
hbase> # SPLITALGO ("HexStringSplit", "UniformSplit" or classname)
hbase> create 't1', 'f1', {NUMREGIONS => 15, SPLITALGO => 'HexStringSplit'}
hbase> create 't1', 'f1', {NUMREGIONS => 15, SPLITALGO => 'HexStringSplit', REGION_REPLICATION => 2, CONFIGURATION => {'hbase.hregion.scan.loadColumnFamiliesOnDemand' => 'true'}}
You can also keep around a reference to the created table:
hbase> t1 = create 't1', 'f1'
Which gives you a reference to the table named 't1', on which you can then
call methods.
1)原因:运行hbase(zookeeper)的用户无法写入zookeeper文件,导致znode data为空。
解决:在hbase-site.xml指定一个运行hbase的用户有写入文件权限的目录作为zookeeper数据目录,如链接1
2
3
4<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/data/zk_data</value>
</property>
- MySQL-5.5.53
- 删除linux上已经安装的mysql相关库信息。rpm -e xxxxxxx –nodeps
执行命令rpm -qa |grep mysql 检查是否删除干净1
2
3
4rpm -qa |grep mysql
rpm -ivh mysql.rpm
rpm -qa |grep mariadb
rpm -e --nodeps mariadb-libs - 执行命令 rpm -i mysql-server-**** 安装mysql服务端
1
2
3
4
5
6
7tar -xvf MySQL-5.5.53-1.linux2.6.x86_64.rpm-bundle.tar
rpm -ivh MySQL-server-5.5.53-1.linux2.6.x86_64.rpm
/usr/bin/mysqladmin -u root password 'new-password'
/usr/bin/mysqladmin -u root -h master password 'new-password'
Alternatively you can run:
/usr/bin/mysql_secure_installation - 启动mysql 服务端,执行命令 mysqld_safe &
(注意要把这个服务起来再安装客户端和第5步,不起来的话,第5步会报错的吧)1
mysqld_safe &
- 执行命令 rpm -i mysql-client-**** 安装mysql客户端
1
rpm -ivh MySQL-client-5.5.53-1.linux2.6.x86_64.rpm
- 执行命令mysql_secure_installation设置root用户密码
1
2
3
4
5mysql_secure_installation
mysql -u root -p admin(mysql -p)
grant all on hive.* to 'root'@'%' identified by 'my81527';
flush privileges
hive 安装
unzip
1
tar -zxvf apache-hive-1.0.0-bin.tar.gz
mv
1
2
3mv apache-hive-1.0.0-bin /usr/local/hive
cd /usr/local/hive/
cd conf/hive-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://test1:3306/hive?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
</property>
<property>
<name>hive.metastore.local</name>
<value>true</value>
</property>
</configuration>上传mysql-connect
1
cp mysql-connector-java-5.1.40-bin.jar /usr/local/hive/lib/
环境配置
1
2
3
4
5
6
7
8
9
10vim /etc/profile
export HBASE_HOME=/usr/local/hbase
export HADOOP_HOME=/usr/local/hadoop
export JAVA_HOME=/usr/local/jdk
export ZK_HOME=/usr/local/zk
export HIVE_HOME=/usr/local/hive
export PATH=.:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$ZK_HOME/bin:$HBASE_HOME/bin:$HIVE_HOME/bin:$PATH
source /etc/profile启动
1
2
3
4cd bin
hive
show tables;
推荐系统之sqoop环境搭建
- 解压安装
1
2
3
4
5
6
7
8
9tar -zxvf sqoop-1.4.6.bin__hadoop-2.0.4-alpha.tar.gz
mv sqoop-1.4.6.bin__hadoop-2.0.4-alpha /usr/local/sqoop
cd /usr/local/sqoop
cp mysql-connector-java-5.1.40-bin.jar /usr/local/sqoop/lib
cp mysql-connector-java-5.1.35.jar /usr/local/sqoop/lib
vim /etc/profile
export SQOOP_HOME=/usr/local/sqoop
export PATH=.:$SQOOP_HOME/bin:$PATH
source /etc/profile
- 把mysql中的表复制到hdfs/hive中:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32sqoop ##sqoop命令
import ##表示导入
--connect jdbc:mysql://ip:3306/sqoop ##告诉jdbc,连接mysql的url
--username root ##连接mysql的用户名
--password admin ##连接mysql的密码
--table mysql1 ##从mysql导出的表名称
--fields-terminated-by '\t' ##指定输出文件中的行的字段分隔符
-m 1 ##复制过程使用1个map作业
--hive-import ##把mysql表数据复制到hive空间中。如果不使用该选项,意味着复制到hdfs中
增量导入
参数
说明
--check-column (col)
用来作为判断的列名,如id
--incremental (mode)
append:追加,比如对大于last-value指定的值之后的记录进行追加导入。lastmodified:最后的修改时间,追加last-value指定的日期之后的记录
--last-value (value)
指定自从上次导入后列的最大值(大于该指定的值),也可以自己设定某一值
对incremental参数,如果是以日期作为追加导入的依据,则使用lastmodified,否则就使用append值
导入到hive中:
sqoop import --connect jdbc:mysql://hadoop0:3306/hive --username root --password my81527 --table TBLS --fields-terminated-by '\t' --null-string '**' --m 1 --append --hive-import
CREATE TABLE t2(id int, name string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
--query 'SELECT a.*, b.* FROM a JOIN b on (a.id == b.id) WHERE $CONDITIONS'
hdfs dfs rm -r /user/hive/warehouse/*
数据库创建
1 | hive |
- sqoop同步命令
1
2
3
4
5
6
7
8
9
10
11
12
13sqoop import --connect jdbc:mysql://10.211.55.3:3066/cargocn-cloud --username root --password my81527 --query 'SELECT id,name,age,address,telphone,qq,weixin,email,sex,birthday,account FROM user WHERE $CONDITIONS' --fields-terminated-by '\t' --null-string '**' --target-dir /user/hive/warehouse/cargocn-cloud.db/user --hive-table cargocn-cloud.user --m 1 --hive-import
sqoop import --connect jdbc:mysql://10.211.55.3:3066/cargocn-cloud --username root --password my81527 --table producttype --fields-terminated-by '\t' --null-string '**' --target-dir /user/hive/warehouse/cargocn-cloud.db/producttype --hive-table cargocn-cloud.producttype --m 1 --hive-import
sqoop import --connect jdbc:mysql://10.211.55.3:3066/cargocn-cloud --username root --password my81527 --table productdetail --fields-terminated-by '\t' --null-string '**' --target-dir /user/hive/warehouse/cargocn-cloud.db/productdetail --hive-table cargocn-cloud.productdetail --m 1 --hive-import
sqoop import --connect jdbc:mysql://10.211.55.3:3066/cargocn-cloud --username root --password my81527 --table product --fields-terminated-by '\t' --null-string '**' --m 1 --target-dir /user/hive/warehouse/cargocn-cloud.db/product --hive-table cargocn-cloud.product --hive-import
sqoop import --connect jdbc:mysql://10.211.55.3:3066/cargocn-cloud --username root --password my81527 --table ordermain --fields-terminated-by '\t' --null-string '**' --m 1 --target-dir /user/hive/warehouse/cargocn-cloud.db/ordermain --hive-table cargocn-cloud.ordermain --hive-import
sqoop import --connect jdbc:mysql://10.211.55.3:3066/cargocn-cloud --username root --password my81527 --table orderdetail --fields-terminated-by '\t' --null-string '**' --m 1 --target-dir /user/hive/warehouse/cargocn-cloud.db/orderdetail --hive-table cargocn-cloud.orderdetail --hive-import
sqoop import --connect jdbc:mysql://10.211.55.3:3066/cargocn-cloud --username root --password my81527 --query 'SELECT id ,merchantname ,merchantshopname ,merchantaccount ,mechantscope FROM mechant WHERE $CONDITIONS' --fields-terminated-by '\t' --null-string '**' --m 1 --target-dir /user/hive/warehouse/cargocn-cloud.db/mechant --hive-table cargocn-cloud.mechant --hive-import
flume 收集
1 | tar -zxvf apache-flume-1.6.0-bin.tar.gz |
修改配置文件 flume-conf.properties
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24productinfo.sources = s1
productinfo.channels = c1
productinfo.sinks = s1
productinfo.sources.s1.type = org.apache.flume.source.kafka.KafkaSource
productinfo.sources.s1.zookeeperConnect = master:2181
productinfo.sources.s1.topic = productscanlogflume
productinfo.sources.s1.groupId = ty1
productinfo.sources.s1.channels = c1
productinfo.sources.s1.interceptors = i1
productinfo.sources.s1.interceptors.i1.type = timestamp
productinfo.sources.s1.kafka.consumer.timeout.ms = 1000
productinfo.channels.c1.type = memory
productinfo.channels.c1.capacity = 1000
productinfo.channels.c1.transactionCapacity = 1000
productinfo.sinks.s1.type = hdfs
productinfo.sinks.s1.hdfs.path = /data/kafka/productinfo/%y-%m-%d
productinfo.sinks.s1.hdfs.fileType = DataStream
productinfo.sinks.s1.hdfs.rollSize = 0
productinfo.sinks.s1.hdfs.rollCount = 0
productinfo.sinks.s1.hdfs.rollInterval = 30
productinfo.sinks.s1.channel = c1启动
1
/usr/local/flume/bin/flume-ng agent -f /usr/local/flume/conf/flume-conf.properties -n productinfo > productinfo.txt
推荐系统之spark mlib实现推荐代码编写
- maven 依赖
1
2
3
4
5
6<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>