centos7在线datahub搭建

1.安装vmware

2.安装centos7

设置虚拟机为8g内存,2核处理器,80g硬盘

3.环境准备

关闭并禁用防火墙

1
2
3
4
5
6
su root
systemctl stop firewalld
systemctl disable firewalld
#关闭selinux鉴权
vim /etc/selinux/config
SELINUX=disable

然后xshell连接虚拟机,方便操作

挂载共享文件夹

在vmware里挂载对应目录

然后在虚拟机内

1
2
3
cd /mnt/hgfs
yum install open-vm-tools
vmhgfs-fuse .host:/ /mnt/hgfs

yum换源

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#安装wget
yum install wget -y
#备份原来的源
sudo mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.bk
#下载阿里源
cd /etc/yum.repos.d
sudo wget -nc http://mirrors.aliyun.com/repo/Centos-7.repo
#更改阿里yum源为默认源
sudo mv Centos-7.repo CentOS-Base.repo
#更新本地yum缓存
# 全部清除
yum clean all
# 更新列表
yum list
# 缓存yum包信息到本机,提高搜索速度
yum makecache

安装docker

外面下好docker-19.03.9.tgz,并放在虚拟机的共享文件夹里

1
2
3
cd /mnt/hgfs/tmp
tar zxvf docker-19.03.9.tgz
cp docker/* /usr/bin

systemd管理docker

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
cat > /usr/lib/systemd/system/docker.service << EOF
[Unit]
Description=Docker Application Container Engine
Documentation=https://docs.docker.com
After=network-online.target firewalld.service
Wants=network-online.target
[Service]
Type=notify
ExecStart=/usr/bin/dockerd
ExecReload=/bin/kill -s HUP $MAINPID
LimitNOFILE=infinity
LimitNPROC=infinity
LimitCORE=infinity
TimeoutStartSec=0
Delegate=yes
KillMode=process
Restart=on-failure
StartLimitBurst=3
StartLimitInterval=60s
[Install]
WantedBy=multi-user.target
EOF

创建配置文件

1
2
3
4
5
6
7
8
mkdir /etc/docker
cat > /etc/docker/daemon.json << EOF
{
"registry-mirrors": ["https://b9pmyelo.mirror.aliyuncs.com"],
"log-driver":"json-file",
"log-opts": {"max-size":"500m", "max-file":"3"}
}
EOF

启动并设置开机启动

1
2
3
systemctl daemon-reload
systemctl start docker
systemctl enable docker

安装docker compse v2

外面下好docker-compose-linux-x86_64,并放在虚拟机的共享文件夹里

1
2
3
4
5
mkdir -p ~/.docker/cli-plugins/
cd /mnt/hgfs/tmp
cp docker-compose-linux-x86_64 ~/.docker/cli-plugins/docker-compose
chmod +x ~/.docker/cli-plugins/docker-compose
docker compose version

安装python3

1
2
3
4
yum -y groupinstall "Development tools"
yum -y install zlib-devel bzip2-devel openssl-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel
yum install -y libffi-devel zlib1g-dev
yum install zlib* -y

外面下好Python-3.10.10.tar.xz,并放在虚拟机的共享文件夹里

安装openssl-1.1.1

1
2
3
4
5
6
7
8
wget http://www.openssl.org/source/openssl-1.1.1.tar.gz --no-check-certificate
tar -zxvf openssl-1.1.1.tar.gz
cp openssl-1.1.1 /root/openssl-1.1.1 -r
cd /root/openssl-1.1.1
./config --prefix=$HOME/openssl shared zlib
make && make install
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/openssl/lib" >> $HOME/.bash_profile
source $HOME/.bash_profile

安装Python

1
2
3
4
5
6
7
tar -zxvf Python-3.10.10.tgz 
mkdir /usr/local/python3
cd Python-3.10.10
./configure --prefix=/usr/local/python3 --with-ssl --with-openssl=$HOME/openssl
#第一个指定安装的路径,不指定的话,安装过程中可能软件所需要的文件复制到其他不同目录,删除软件很不方便,复制软件也不方便.
#第三个是为了安装pip需要用到ssl,后面报错会有提到.
make && make install

创建软链接

1
2
ln -s /usr/local/python3/bin/python3 /usr/local/bin/python3
ln -s /usr/local/python3/bin/pip3 /usr/local/bin/pip3

验证

1
2
python3 -V
pip3 -V

import ssl

1
2
python3
import ssl

修改pip安装源

1
2
3
4
5
6
7
cd ~
mkdir .pip
cd .pip
vim pip.conf
#进入后添加以下内容,保存退出.
[global]
index-url = https://mirrors.aliyun.com/pypi/simple

datahub安装

1
2
3
4
5
6
7

python3 -m pip install --upgrade pip wheel setuptools
python3 -m pip uninstall datahub acryl-datahub || true # sanity check - ok if it fails
python3 -m pip install --upgrade acryl-datahub

python3 -m datahub version
python3 -m datahub check plugins
1
2
3
4
#192.168.184.2的安装需要额外的命令
GRANT USAGE ON *.* TO 'datahub'@'%' IDENTIFIED BY 'datahub' WITH GRANT OPTION;
GRANT USAGE ON *.* TO 'datahub'@'localhost' IDENTIFIED BY 'datahub' WITH GRANT OPTION;
FLUSH PRIVILEGES;

启动datahub

1
2
3
4
python3 -m datahub docker quickstart
python3 -m datahub docker quickstart --quickstart-compose-file /mnt/hgfs/tmp/datahub/datahub-master/docker/quickstart/docker-compose.quickstart.yml
python3 -m datahub docker quickstart --quickstart-compose-file /mnt/hgfs/tmp/datahub/docker-compose-without-neo4j.quickstart.yml
python3 -m datahub docker quickstart --stop

引入mysql数据源

1
2
python3 -m pip install 'acryl-datahub[mysql]'
python3 -m datahub ingest -c mysql.yaml

mysql.yaml

1
2
3
4
5
6
7
8
9
10
source:
type: mysql
config:
# Coordinates
host_port: 172.31.70.133:3306
database: supervision-safety

# Credentials
username: root
password: xjkaiya123,.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
source:
type: mysql
config:
host_port: '172.31.70.196:3306'
database: snowlotus
username: root
password: 'xjkaiya123,.'
include_tables: true
include_views: true
profiling:
enabled: true
profile_table_level_only: true
stateful_ingestion:
enabled: true
password: 'xjkaiya123,.'
1
{"operationName":"createIngestionSource","variables":{"input":{"type":"mysql","name":"70.196-necares_auth","config":{"recipe":"{\"source\":{\"type\":\"mysql\",\"config\":{\"host_port\":\"172.31.70.196:3306\",\"database\":\"necares_auth\",\"username\":\"root\",\"include_tables\":true,\"include_views\":true,\"profiling\":{\"enabled\":true,\"profile_table_level_only\":true},\"stateful_ingestion\":{\"enabled\":true},\"password\":\"xjkaiya123,.\"}}}","executorId":"default","debugMode":false},"schedule":{"interval":"0 0 * * *","timezone":"Asia/Shanghai"}}},"query":"mutation createIngestionSource($input: UpdateIngestionSourceInput!) {\n  createIngestionSource(input: $input)\n}\n"}