[TOC]

KubeSphere入门

全栈的 Kubernetes 容器云 PaaS 解决方案：https://kubesphere.io/zh/

KubeSphere 是在 Kubernetes 之上构建的以应用为中心的多租户容器平台，提供全栈的 IT 自动化运维的能力，简化企业的 DevOps 工作流。KubeSphere 提供了运维友好的向导式操作界面，帮助企业快速构建一个强大和功能丰富的容器云平台。

安装

KubeSphere有两种安装模式：

在已有的k8s环境中安装
在linux环境中安装

其实KubeSphere的安装已经做的很简便了，不管是使用那种方式进行安装，都是开箱即用，通过简单的几个命令就可以安装成功。

在已有的k8s环境中安装

前置环境：https://kubesphere.io/zh/docs/v3.4/quick-start/minimal-kubesphere-on-k8s/

在安装之前，需要配置 Kubernetes 集群中的默认存储类型。

# 所有机器执行
yum install -y nfs-utils

# 只在 mster 机器执行：nfs主节点，rw 读写
echo "/nfs/data/ *(insecure,rw,sync,no_root_squash)" > /etc/exports

mkdir -p /nfs/data
systemctl enable rpcbind --now
systemctl enable nfs-server --now

# 配置生效
exportfs -r

从节点配置加入到这个存储NFS中

# 检查，下面的 IP 是master IP
showmount -e 192.168.27.251

# 在 2 个从服务器 执行，执行以下命令挂载 nfs 服务器上的共享目录到本机路径 /root/nfsmount
mkdir -p /nfs/data

# 在 2 个从服务器执行，将远程 和本地的 文件夹 挂载
mount -t nfs 192.168.27.251:/nfs/data /nfs/data

配置默认存储，使其支持动态供应能力。

## 创建了一个存储类
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: nfs-storage
  annotations:
    storageclass.kubernetes.io/is-default-class: "true"
provisioner: k8s-sigs.io/nfs-subdir-external-provisioner
parameters:
  archiveOnDelete: "true"  ## 删除pv的时候，pv的内容是否要备份

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nfs-client-provisioner
  labels:
    app: nfs-client-provisioner
  # replace with namespace where provisioner is deployed
  namespace: default
spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: nfs-client-provisioner
  template:
    metadata:
      labels:
        app: nfs-client-provisioner
    spec:
      serviceAccountName: nfs-client-provisioner
      containers:
        - name: nfs-client-provisioner
          image: registry.cn-hangzhou.aliyuncs.com/lfy_k8s_images/nfs-subdir-external-provisioner:v4.0.2
          # resources:
          #    limits:
          #      cpu: 10m
          #    requests:
          #      cpu: 10m
          volumeMounts:
            - name: nfs-client-root
              mountPath: /persistentvolumes
          env:
            - name: PROVISIONER_NAME
              value: k8s-sigs.io/nfs-subdir-external-provisioner
            - name: NFS_SERVER
              value: 172.31.0.4 ## 指定自己nfs服务器地址
            - name: NFS_PATH  
              value: /nfs/data  ## nfs服务器共享的目录
      volumes:
        - name: nfs-client-root
          nfs:
            server: 172.31.0.4
            path: /nfs/data
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: nfs-client-provisioner
  # replace with namespace where provisioner is deployed
  namespace: default
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: nfs-client-provisioner-runner
rules:
  - apiGroups: [""]
    resources: ["nodes"]
    verbs: ["get", "list", "watch"]
  - apiGroups: [""]
    resources: ["persistentvolumes"]
    verbs: ["get", "list", "watch", "create", "delete"]
  - apiGroups: [""]
    resources: ["persistentvolumeclaims"]
    verbs: ["get", "list", "watch", "update"]
  - apiGroups: ["storage.k8s.io"]
    resources: ["storageclasses"]
    verbs: ["get", "list", "watch"]
  - apiGroups: [""]
    resources: ["events"]
    verbs: ["create", "update", "patch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: run-nfs-client-provisioner
subjects:
  - kind: ServiceAccount
    name: nfs-client-provisioner
    # replace with namespace where provisioner is deployed
    namespace: default
roleRef:
  kind: ClusterRole
  name: nfs-client-provisioner-runner
  apiGroup: rbac.authorization.k8s.io
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: leader-locking-nfs-client-provisioner
  # replace with namespace where provisioner is deployed
  namespace: default
rules:
  - apiGroups: [""]
    resources: ["endpoints"]
    verbs: ["get", "list", "watch", "create", "update", "patch"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: leader-locking-nfs-client-provisioner
  # replace with namespace where provisioner is deployed
  namespace: default
subjects:
  - kind: ServiceAccount
    name: nfs-client-provisioner
    # replace with namespace where provisioner is deployed
    namespace: default
roleRef:
  kind: Role
  name: leader-locking-nfs-client-provisioner
  apiGroup: rbac.authorization.k8s.io

集群监控组件

apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    k8s-app: metrics-server
    rbac.authorization.k8s.io/aggregate-to-admin: "true"
    rbac.authorization.k8s.io/aggregate-to-edit: "true"
    rbac.authorization.k8s.io/aggregate-to-view: "true"
  name: system:aggregated-metrics-reader
rules:
- apiGroups:
  - metrics.k8s.io
  resources:
  - pods
  - nodes
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    k8s-app: metrics-server
  name: system:metrics-server
rules:
- apiGroups:
  - ""
  resources:
  - pods
  - nodes
  - nodes/stats
  - namespaces
  - configmaps
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server-auth-reader
  namespace: kube-system
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server:system:auth-delegator
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:auth-delegator
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: system:metrics-server
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:metrics-server
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
spec:
  ports:
  - name: https
    port: 443
    protocol: TCP
    targetPort: https
  selector:
    k8s-app: metrics-server
---
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
spec:
  selector:
    matchLabels:
      k8s-app: metrics-server
  strategy:
    rollingUpdate:
      maxUnavailable: 0
  template:
    metadata:
      labels:
        k8s-app: metrics-server
    spec:
      containers:
      - args:
        - --cert-dir=/tmp
        - --kubelet-insecure-tls
        - --secure-port=4443
        - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
        - --kubelet-use-node-status-port
        image: registry.cn-hangzhou.aliyuncs.com/lfy_k8s_images/metrics-server:v0.4.3
        imagePullPolicy: IfNotPresent
        livenessProbe:
          failureThreshold: 3
          httpGet:
            path: /livez
            port: https
            scheme: HTTPS
          periodSeconds: 10
        name: metrics-server
        ports:
        - containerPort: 4443
          name: https
          protocol: TCP
        readinessProbe:
          failureThreshold: 3
          httpGet:
            path: /readyz
            port: https
            scheme: HTTPS
          periodSeconds: 10
        securityContext:
          readOnlyRootFilesystem: true
          runAsNonRoot: true
          runAsUser: 1000
        volumeMounts:
        - mountPath: /tmp
          name: tmp-dir
      nodeSelector:
        kubernetes.io/os: linux
      priorityClassName: system-cluster-critical
      serviceAccountName: metrics-server
      volumes:
      - emptyDir: {}
        name: tmp-dir
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
  labels:
    k8s-app: metrics-server
  name: v1beta1.metrics.k8s.io
spec:
  group: metrics.k8s.io
  groupPriorityMinimum: 100
  insecureSkipTLSVerify: true
  service:
    name: metrics-server
    namespace: kube-system
  version: v1beta1
  versionPriority: 100

确保您的机器满足安装的前提条件之后，可以按照以下步骤安装 KubeSphere。

执行以下命令开始安装：(可以先把这两个文件通过wget下载下来，然后在通过kubectl执行)

kubectl apply -f https://github.com/kubesphere/ks-installer/releases/download/v3.4.1/kubesphere-installer.yaml

# 这个配置文件可以控制开启一些可插拔的功能 
# https://kubesphere.io/zh/docs/v3.4/pluggable-components/
kubectl apply -f https://github.com/kubesphere/ks-installer/releases/download/v3.4.1/cluster-configuration.yaml

检查安装日志：

1	kubectl logs -n kubesphere-system $(kubectl get pod -n kubesphere-system -l 'app in (ks-install, ks-installer)' -o jsonpath='{.items[0].metadata.name}') -f

使用 kubectl get pod --all-namespaces 查看所有 Pod 是否在 KubeSphere 的相关命名空间中正常运行。如果是，请通过以下命令检查控制台的端口（默认为 30880）：
1
kubectl get svc/ks-console -n kubesphere-system
确保在安全组中打开了端口 30880，并通过 NodePort (IP:30880) 使用默认帐户和密码 (admin/P@88w0rd) 访问 Web 控制台。
登录控制台后，您可以在系统组件中检查不同组件的状态。如果要使用相关服务，可能需要等待某些组件启动并运行。

解决etcd监控证书找不到问题

kubectl -n kubesphere-monitoring-system create secret generic kube-etcd-client-certs  --from-file=etcd-client-ca.crt=/etc/kubernetes/pki/etcd/ca.crt  --from-file=etcd-client.crt=/etc/kubernetes/pki/apiserver-etcd-client.crt  --from-file=etcd-client.key=/etc/kubernetes/pki/apiserver-etcd-client.key

在linux环境中安装

设置服务器名

[root@kubeshpere-master ~]# hostnamectl set-hostname master


[root@kubeshpere-node ~]# hostnamectl set-hostname node

下载KubeKey

只需要在master节点执行：

export KKZONE=cn


curl -sfL https://get-kk.kubesphere.io | VERSION=v1.1.1 sh -

chmod +x kk

创建集群配置文件

使用kk工具创建集群的配置文件，执行之后会生成一个config-sample.yaml配置文件。

1
2
3

[root@master ~]# ./kk create config --with-kubernetes v1.20.4 --with-kubesphere v3.1.1
[root@master ~]# ls
anaconda-ks.cfg  config-sample.yaml  kk  kubekey-v1.1.1-linux-amd64.tar.gz  original-ks.cfg  README.md  README_zh-CN.md

修改配置文件中的集群信息：

apiVersion: kubekey.kubesphere.io/v1alpha1
kind: Cluster
metadata:
  name: sample
spec:
  hosts: # 集群的相关信息
  - {name: master, address: 10.211.55.17, internalAddress: 10.211.55.17, user: root, password: jjm7560808}
  - {name: node, address: 10.211.55.18, internalAddress: 10.211.55.18, user: root, password: jjm7560808}
  roleGroups:
    etcd: # etcd放在那台机器上
    - master
    master: # 主节点是那台机器
    - master
    worker: # 工作节点是那台机器
    - master # 主节点也作为工作节点
    - node
  controlPlaneEndpoint:
    domain: lb.kubesphere.local
    address: ""
    port: 6443
  kubernetes:
    version: v1.20.4
    imageRepo: kubesphere
    clusterName: cluster.local
  network:
    plugin: calico
    kubePodsCIDR: 10.233.64.0/18
    kubeServiceCIDR: 10.233.0.0/18
  registry:
    registryMirrors: []
    insecureRegistries: []
  addons: []

创建集群

[root@master ~]# ./kk create cluster -f config-sample.yaml
+--------+------+------+---------+----------+-------+-------+-----------+--------+------------+-------------+------------------+--------------+
| name   | sudo | curl | openssl | ebtables | socat | ipset | conntrack | docker | nfs client | ceph client | glusterfs client | time         |
+--------+------+------+---------+----------+-------+-------+-----------+--------+------------+-------------+------------------+--------------+
| node   | y    | y    | y       | y        |       | y     |           |        |            |             |                  | CST 22:52:17 |
| master | y    | y    | y       | y        |       | y     |           |        | y          |             | y                | CST 20:07:22 |
+--------+------+------+---------+----------+-------+-------+-----------+--------+------------+-------------+------------------+--------------+
node: conntrack is required. 
master: conntrack is required. 

# 执行之后它会检查每个机器上是否有一些必须安装的项没有安装
[root@master ~]# yum install -y conntrack

我在安装的时候出现的问题：

Please, check the contents of the $HOME/.kube/config file.
ERRO[20:36:11 CST] Failed to add worker to cluster: Failed to exec command: sudo env PATH=$PATH /bin/sh -c "/usr/local/bin/kubeadm join  lb.kubesphere.local:6443 --token cizh27.yysycm95alsr418r     --discovery-token-ca-cert-hash sha256:babe3bc05ea57c183e800628d06c42587cc6425822eea5db652b3c28312f944b" 
[preflight] Running pre-flight checks
        [WARNING FileExisting-socat]: socat not found in system path
        [WARNING SystemVerification]: this Docker version is not on the list of validated versions: 26.1.4. Latest validated version: 19.03
error execution phase preflight: couldn't validate the identity of the API Server: Get "https://lb.kubesphere.local:6443/api/v1/namespaces/kube-public/configmaps/cluster-info?timeout=10s": x509: certificate has expired or is not yet valid: current time 2024-08-06T23:23:26+08:00 is before 2024-08-07T12:20:40Z
To see the stack trace of this error execute with --v=5 or higher: Process exited with status 1  node=10.211.55.18
WARN[20:36:11 CST] Task failed ...                              
WARN[20:36:11 CST] error: interrupted by error                  
Error: Failed to join node: interrupted by error

找了很多资料都没有找到怎么解决，根据之前逐个安装的经验，感觉是因为calico安装的问题。但是我没有去尝试机器中先安装calico再安装一次，而是跟换了版本，我直接升级到最高版本。

# 删除旧版
./kk create cluster -f config-sample.yaml

curl -sfL https://get-kk.kubesphere.io | VERSION=v3.0.13 sh -

./kk create config --with-kubesphere v3.4.1 --with-kubernetes v1.22.12

./kk create cluster -f config-sample.yaml

尝试最新版之后本以为会一切顺利，没想到在安装后卡在：

1	Please wait for the installation to complete: <---<<

退出进程后，查看集群Pod的状态：

[root@master docker]# kubectl get pods -A
NAMESPACE                      NAME                                             READY   STATUS              RESTARTS        AGE
kube-system                    calico-kube-controllers-5d5bbb5dc-5qpg5          1/1     Running             0               78m
kube-system                    calico-node-b28p6                                1/1     Running             0               78m
kube-system                    calico-node-kslgp                                1/1     Running             2 (6m11s ago)   78m
kube-system                    coredns-5495dd7c88-czhp4                         1/1     Running             0               78m
kube-system                    coredns-5495dd7c88-jdk5j                         1/1     Running             0               78m
kube-system                    kube-apiserver-master                            1/1     Running             2 (6m1s ago)    78m
kube-system                    kube-controller-manager-master                   1/1     Running             7 (6m11s ago)   78m
kube-system                    kube-proxy-2zgvn                                 1/1     Running             2 (6m11s ago)   78m
kube-system                    kube-proxy-jhdpm                                 1/1     Running             0               78m
kube-system                    kube-scheduler-master                            1/1     Running             7 (6m11s ago)   78m
kube-system                    nodelocaldns-2h8bc                               1/1     Running             0               78m
kube-system                    nodelocaldns-bwkwf                               1/1     Running             2 (6m10s ago)   78m
kube-system                    openebs-localpv-provisioner-58d9ff469c-j94fg     1/1     Running             6 (6m ago)      78m
kube-system                    snapshot-controller-0                            0/1     ErrImagePull        0               76m
kubesphere-controls-system     default-http-backend-5bf68ff9b8-l4qwm            0/1     ErrImagePull        0               74m
kubesphere-monitoring-system   kube-state-metrics-554c8c5d65-bthnt              0/3     ErrImagePull        0               68m
kubesphere-monitoring-system   node-exporter-l8245                              0/2     ErrImagePull        0               68m
kubesphere-monitoring-system   node-exporter-vxcqg                              0/2     ContainerCreating   0               68m
kubesphere-monitoring-system   notification-manager-operator-8694799c76-l6zkf   0/2     ContainerCreating   0               63m
kubesphere-monitoring-system   prometheus-operator-8955bbd98-84fml              0/2     ErrImagePull        0               68m
kubesphere-system              ks-apiserver-7fd66f7885-dfsrm                    0/1     ContainerCreating   0               74m
kubesphere-system              ks-console-85c97b6d7d-d4g7w                      0/1     ErrImagePull        0               74m
kubesphere-system              ks-controller-manager-798444f496-gqk2w           0/1     ImagePullBackOff    0               74m
kubesphere-system              ks-installer-5594ffc86d-kl8g6                    1/1     Running             0               78m

发现好多的容器状态不对，通过describe命令查看状态：看状态像是容器拉取失败，所以尝试修改了对docker配置文件进行修改，添加阿里云的镜像加速。

[root@master docker]# kubectl describe pod -n kubesphere-monitoring-system node-exporter-vxcqg

  Warning  Failed          26m                kubelet            Failed to pull image "kubesphere/kube-rbac-proxy:v0.11.0": rpc error: code = Unknown desc = error pulling image configuration: download failed after attempts=6: dial tcp 111.243.214.169:443: i/o timeout
  Warning  Failed          26m (x3 over 62m)  kubelet            Error: ErrImagePull
  Normal   Pulling         26m (x4 over 69m)  kubelet            Pulling image "prom/node-exporter:v1.3.1"
  Warning  Failed          17m (x4 over 66m)  kubelet            Error: ErrImagePull
  Warning  Failed          17m (x2 over 56m)  kubelet            Failed to pull image "prom/node-exporter:v1.3.1": rpc error: code = Unknown desc = context canceled
  Normal   Pulling         17m (x4 over 66m)  kubelet            Pulling image "kubesphere/kube-rbac-proxy:v0.11.0"
  Normal   SandboxChanged  11m                kubelet            Pod sandbox changed, it will be killed and re-created.
  Normal   Pulling         11m                kubelet            Pulling image "prom/node-exporter:v1.3.1"
  Warning  Failed          8m55s              kubelet            Failed to pull image "prom/node-exporter:v1.3.1": rpc error: code = Unknown desc = error pulling image configuration: download failed after attempts=6: dial tcp 128.121.243.228:443: i/o timeout
  Warning  Failed          8m55s              kubelet            Error: ErrImagePull
  Normal   Pulling         8m55s              kubelet            Pulling image "kubesphere/kube-rbac-proxy:v0.11.0"