一. 问题描述
之前的文章,我们已经安装完成Spark,然后配置了Spark相关的环境变量,但是运行pyspark报错
[root@hp7 ~]# pyspark
env: python3: 没有那个文件或目录
从报错可以看到,spark默认需要python 3版本,而centos 7.8默认的是python 2版本。
二. 升级python
升级python
-- 下载python
wget https://npm.taobao.org/mirrors/python/3.8.0/Python-3.8.0.tar.xz
-- 解压
tar -xf Python-3.8.0.tar.xz
-- 编译
cd Python-3.8.0
./configure --prefix=/usr/local/python3 --with-openssl=/usr/local/python3 --with-http_ssl_module && make && make install
-- 备份 2.7
mv /usr/bin/python /usr/bin/python_2.7
-- 软链
ln -sv /usr/local/python3/bin/python3.8 /usr/bin/python
ln -sv /usr/local/python3/bin/python3.8 /usr/bin/python3
--验证
输入 python, 查看是否已经升级到 3.8 版本。
-- 解决升级后, yum 无法正常使用的问题
vi /usr/libexec/urlgrabber-ext-down
将顶部的 /usr/bin/python 改为 /usr/bin/python2
vi /usr/bin/yum
将顶部的 /usr/bin/python 改为 /usr/bin/python2
-- 将python的bin目录加入到环境变量
cd
vi .bash_profile
/usr/local/python3/bin
重新运行依旧报错:*
[root@hp7 ~]# pyspark
Python 3.8.0 (default, Nov 14 2022, 17:51:20)
[GCC 4.8.5 20150623 (Red Hat 4.8.5-39)] on linux
Type "help", "copyright", "credits" or "license" for more information.
Traceback (most recent call last):
File "/home/spark-3.2.2-bin-hadoop3.2/python/pyspark/shell.py", line 29, in <module>
from pyspark.context import SparkContext
File "/home/spark-3.2.2-bin-hadoop3.2/python/pyspark/__init__.py", line 121, in <module>
from pyspark.sql import SQLContext, HiveContext, Row # noqa: F401
File "/home/spark-3.2.2-bin-hadoop3.2/python/pyspark/sql/__init__.py", line 42, in <module>
from pyspark.sql.types import Row
File "/home/spark-3.2.2-bin-hadoop3.2/python/pyspark/sql/types.py", line 27, in <module>
import ctypes
File "/usr/local/python3/lib/python3.8/ctypes/__init__.py", line 7, in <module>
from _ctypes import Union, Structure, Array
ModuleNotFoundError: No module named '_ctypes'
>>>
安装openssl:
-- 下载软件
wget https://github.com/openssl/openssl/archive/OpenSSL_1_1_1d.tar.gz
-- 解压
tar -zxvf OpenSSL_1_1_1d.tar.gz
cd openssl-OpenSSL_1_1_1d
-- 编译安装
mkdir /usr/local/openssl
./config --prefix=/usr/local/openssl
make
make install
-- 备份老版本openssl,创建新版本软连接
mv /usr/bin/openssl /usr/bin/openssl.old
mv /usr/include/openssl /usr/include/openssl.old
ln -s /usr/local/openssl/bin/openssl /usr/bin/openssl
ln -s /usr/local/openssl/include/openssl /usr/include/openssl
-- 把openssl lib路径 /usr/local/openssl/lib 追加到下面到文件中
# vi /usr/local/openssl/lib
cp /home/software/openssl-OpenSSL_1_1_1d/libcrypto.so.1.1 /usr/local/lib/
cp /home/software/openssl-OpenSSL_1_1_1d/libssl.so.1.1 /usr/local/lib/
-- 创建软连接
ln -s /usr/local/lib/libssl.so.1.1 /usr/lib/libssl.so.1.1
ln -s /usr/local/lib/libcrypto.so.1.1 /usr/lib/libcrypto.so.1.1
ln -s /usr/local/lib/libcrypto.so.1.1 /usr/lib64/libcrypto.so.1.1
ln -s /usr/local/lib/libssl.so.1.1 /usr/lib64/libssl.so.1.1
-- 验证
openssl version
安装依赖包
-- 安装依赖包
yum -y install libffi-devel
yum -y install zlib*
yum -y install gcc
-- 重新安装python3
-- 编译
cd Python-3.8.0
./configure --prefix=/usr/local/python3 --with-openssl=/usr/local/python3 --with-http_ssl_module && make && make install
终于搞定: