mirror of
https://github.com/TECHNOFAB11/zfs-localpv.git
synced 2025-12-12 06:20:11 +01:00
feat(topology): adding support for custom topology keys (#94)
This commit adds the support for use to specify custom labels to the kubernetes nodes and use them in the allowedToplogoies section of the StorageClass. Few notes: - This PR depends on the CSI driver's capability to support custom topology keys. - label on the nodes should be added first and then deploy the driver to make it aware of all the labels that node has. If labels are added after ZFS-LocalPV driver has been deployed, a restart all the node csi driver agents is required so that the driver can pick the labels and add them as supported topology keys. - if storageclass is using Immediate binding mode and topology key is not mentioned then all the nodes should be labeled using same key, that means: - same key should be present on all nodes, nodes can have different values for those keys. - If nodes are labeled with different keys i.e. some nodes are having different keys, then ZFSPV's default scheduler can not effictively do the volume count based scheduling. In this case the CSI provisioner will pick keys from any random node and then prepare the preferred topology list using the nodes which has those keys defined. And ZFSPV scheduler will schedule the PV among those nodes only. Signed-off-by: Pawan <pawan@mayadata.io>
This commit is contained in:
parent
f65575e447
commit
de9b302083
7 changed files with 184 additions and 13 deletions
|
|
@ -562,7 +562,7 @@ spec:
|
|||
- "--leader-election=true"
|
||||
imagePullPolicy: IfNotPresent
|
||||
- name: csi-provisioner
|
||||
image: quay.io/k8scsi/csi-provisioner:v1.5.0
|
||||
image: quay.io/k8scsi/csi-provisioner:v1.6.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
- "--csi-address=$(ADDRESS)"
|
||||
|
|
|
|||
|
|
@ -1019,7 +1019,7 @@ spec:
|
|||
- "--leader-election=true"
|
||||
imagePullPolicy: IfNotPresent
|
||||
- name: csi-provisioner
|
||||
image: quay.io/k8scsi/csi-provisioner:v1.5.0
|
||||
image: quay.io/k8scsi/csi-provisioner:v1.6.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
- "--csi-address=$(ADDRESS)"
|
||||
|
|
|
|||
85
docs/faq.md
85
docs/faq.md
|
|
@ -37,7 +37,7 @@ openebs-zfs-node-twmx8 2/2 Running 0 5h28m
|
|||
|
||||
### 3. How to upgrade the driver to newer version
|
||||
|
||||
In the [operator file](../deploy/zfs-operator.yaml), change the zfs-driver image to the required tag which you want (like for tag v0.2 use `quay.io/openebs/zfs-driver:v0.2`), and then apply the yaml, there are two places where we need to change the image, one for the controller and once for the node agent. By default, the operator uses the `ci` tag which always points to development image not the release tag, so if you want to test the development image you can use ci tag. Please note that the default ImagePullPolicy is IfNotPresent, that means if `ci` image is already there on the node, it will not be pulled again.
|
||||
Follow the instructions here https://github.com/openebs/zfs-localpv/tree/master/upgrade.
|
||||
|
||||
### 4. ZFS Pools are there on certain nodes only, how can I create the storage class.
|
||||
|
||||
|
|
@ -67,7 +67,7 @@ The above storage class tells that ZFS pool "zfspv-pool" is available on nodes z
|
|||
Please note that the provisioner name for ZFS driver is "zfs.csi.openebs.io", we have to use this while creating the storage class so that the volume provisioning/deprovisioning request can come to ZFS driver.
|
||||
|
||||
|
||||
### 3. How to install the provisioner in HA
|
||||
### 5. How to install the provisioner in HA
|
||||
|
||||
To have HA for the provisioner(controller), we can update the replica count to 2(or more as per need) and deploy the yaml. Once yaml is deployed, you can see 2(or more) controller pod running. At a time only one will be active and once it is down, the other will take over. They will use lease mechanism to decide who is active/master. Please note that it has anti affinity rules, so on one node only one pod will be running, that means, if you are using 2 replicas on a single node cluster, the other pod will be in pending state because of the anti-affinity rule. So, before changing the replica count, please make sure you have sufficient nodes.
|
||||
|
||||
|
|
@ -88,3 +88,84 @@ spec:
|
|||
replicas: 2
|
||||
---
|
||||
```
|
||||
|
||||
### 6. How to add custom topology key
|
||||
|
||||
To add custom topology key, we can label all the nodes with the required key and value :-
|
||||
|
||||
```sh
|
||||
$ kubectl label node pawan-node-1 openebs.io/rack=rack1
|
||||
node/pawan-node-1 labeled
|
||||
|
||||
$ kubectl get nodes pawan-node-1 --show-labels
|
||||
NAME STATUS ROLES AGE VERSION LABELS
|
||||
pawan-node-1 Ready worker 16d v1.17.4 beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=pawan-node-1,kubernetes.io/os=linux,node-role.kubernetes.io/worker=true,openebs.io/rack=rack1
|
||||
|
||||
```
|
||||
It is recommended is to label all the nodes with the same key, they can have different values for the given keys, but all keys should be present on all the worker node.
|
||||
|
||||
Once we have labeled the node, we can install the zfs driver. The driver will pick the node labels and add that as the supported topology key. If the driver is already installed and you want to add a new topology information, you can label the node with the topology information and then restart of the ZFSPV CSI driver daemon sets (openebs-zfs-node) are required so that the driver can pick the labels and add them as supported topology keys. We should restart the pod in kube-system namespace with the name as openebs-zfs-node-[xxxxx] which is the node agent pod for the ZFS-LocalPV Driver.
|
||||
|
||||
```sh
|
||||
$ kubectl get pods -n kube-system -l role=openebs-zfs
|
||||
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
openebs-zfs-controller-0 4/4 Running 0 5h28m
|
||||
openebs-zfs-node-4d94n 2/2 Running 0 5h28m
|
||||
openebs-zfs-node-gssh8 2/2 Running 0 5h28m
|
||||
openebs-zfs-node-twmx8 2/2 Running 0 5h28m
|
||||
```
|
||||
|
||||
We can verify that key has been registered successfully with the ZFSPV CSI Driver by checking the CSI node object yaml :-
|
||||
|
||||
```yaml
|
||||
$ kubectl get csinodes pawan-node-1 -oyaml
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: CSINode
|
||||
metadata:
|
||||
creationTimestamp: "2020-04-13T14:49:59Z"
|
||||
name: pawan-node-1
|
||||
ownerReferences:
|
||||
- apiVersion: v1
|
||||
kind: Node
|
||||
name: pawan-node-1
|
||||
uid: fe268f4b-d9a9-490a-a999-8cde20c4dadb
|
||||
resourceVersion: "4586341"
|
||||
selfLink: /apis/storage.k8s.io/v1/csinodes/pawan-node-1
|
||||
uid: 522c2110-9d75-4bca-9879-098eb8b44e5d
|
||||
spec:
|
||||
drivers:
|
||||
- name: zfs.csi.openebs.io
|
||||
nodeID: pawan-node-1
|
||||
topologyKeys:
|
||||
- beta.kubernetes.io/arch
|
||||
- beta.kubernetes.io/os
|
||||
- kubernetes.io/arch
|
||||
- kubernetes.io/hostname
|
||||
- kubernetes.io/os
|
||||
- node-role.kubernetes.io/worker
|
||||
- openebs.io/rack
|
||||
```
|
||||
|
||||
We can see that "openebs.io/rack" is listed as topology key. Now we can create a storageclass with the topology key created :
|
||||
|
||||
```yaml
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: openebs-zfspv
|
||||
allowVolumeExpansion: true
|
||||
parameters:
|
||||
fstype: "zfs"
|
||||
poolname: "zfspv-pool"
|
||||
provisioner: zfs.csi.openebs.io
|
||||
allowedTopologies:
|
||||
- matchLabelExpressions:
|
||||
- key: openebs.io/rack
|
||||
values:
|
||||
- rack1
|
||||
```
|
||||
|
||||
The ZFSPV CSI driver will schedule the PV to the nodes where label "openebs.io/rack" is set to "rack1". If there are multiple nodes qualifying this prerequisite, then it will pick the node which has less number of volumes provisioned for the given ZFS Pool.
|
||||
|
||||
Note that if storageclass is using Immediate binding mode and topology key is not mentioned then all the nodes should be labeled using same key, that means, same key should be present on all nodes, nodes can have different values for those keys. If nodes are labeled with different keys i.e. some nodes are having different keys, then ZFSPV's default scheduler can not effictively do the volume count based scheduling. Here, in this case the CSI provisioner will pick keys from any random node and then prepare the preferred topology list using the nodes which has those keys defined and ZFSPV scheduler will schedule the PV among those nodes only.
|
||||
|
|
|
|||
|
|
@ -68,6 +68,28 @@ func NumberOfNodes() (int, error) {
|
|||
}
|
||||
}
|
||||
|
||||
// GetNode returns a node instance from kubernetes cluster
|
||||
func GetNode(name string) (*corev1.Node, error) {
|
||||
n := Node()
|
||||
node, err := n.Get(name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "failed to get node")
|
||||
} else {
|
||||
return node, nil
|
||||
}
|
||||
}
|
||||
|
||||
// ListNodes returns list of node instance from kubernetes cluster
|
||||
func ListNodes(options metav1.ListOptions) (*corev1.NodeList, error) {
|
||||
n := Node()
|
||||
nodelist, err := n.List(options)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "failed to list node")
|
||||
} else {
|
||||
return nodelist, nil
|
||||
}
|
||||
}
|
||||
|
||||
// GetOSAndKernelVersion gets us the OS,Kernel version
|
||||
func GetOSAndKernelVersion() (string, error) {
|
||||
nodes := Node()
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ import (
|
|||
"github.com/container-storage-interface/spec/lib/go/csi"
|
||||
apis "github.com/openebs/zfs-localpv/pkg/apis/openebs.io/zfs/v1alpha1"
|
||||
"github.com/openebs/zfs-localpv/pkg/builder/volbuilder"
|
||||
k8sapi "github.com/openebs/zfs-localpv/pkg/client/k8s/v1alpha1"
|
||||
"github.com/openebs/zfs-localpv/pkg/mgmt/snapshot"
|
||||
"github.com/openebs/zfs-localpv/pkg/mgmt/volume"
|
||||
"github.com/openebs/zfs-localpv/pkg/zfs"
|
||||
|
|
@ -171,7 +172,34 @@ func (ns *node) NodeGetInfo(
|
|||
req *csi.NodeGetInfoRequest,
|
||||
) (*csi.NodeGetInfoResponse, error) {
|
||||
|
||||
topology := map[string]string{zfs.ZFSTopologyKey: ns.driver.config.NodeID}
|
||||
node, err := k8sapi.GetNode(ns.driver.config.NodeID)
|
||||
if err != nil {
|
||||
logrus.Errorf("failed to get the node %s", ns.driver.config.NodeID)
|
||||
return nil, err
|
||||
}
|
||||
/*
|
||||
* The driver will support all the keys and values defined in the node's label.
|
||||
* if nodes are labeled with the below keys and values
|
||||
* map[beta.kubernetes.io/arch:amd64 beta.kubernetes.io/os:linux kubernetes.io/arch:amd64 kubernetes.io/hostname:pawan-node-1 kubernetes.io/os:linux node-role.kubernetes.io/worker:true openebs.io/zone:zone1 openebs.io/zpool:ssd]
|
||||
* The driver will support below key and values
|
||||
* {
|
||||
* beta.kubernetes.io/arch:amd64
|
||||
* beta.kubernetes.io/os:linux
|
||||
* kubernetes.io/arch:amd64
|
||||
* kubernetes.io/hostname:pawan-node-1
|
||||
* kubernetes.io/os:linux
|
||||
* node-role.kubernetes.io/worker:true
|
||||
* openebs.io/zone:zone1
|
||||
* openebs.io/zpool:ssd
|
||||
* }
|
||||
*/
|
||||
|
||||
// support all the keys that node has
|
||||
topology := node.Labels
|
||||
|
||||
// add driver's topology key
|
||||
topology[zfs.ZFSTopologyKey] = ns.driver.config.NodeID
|
||||
|
||||
return &csi.NodeGetInfoResponse{
|
||||
NodeId: ns.driver.config.NodeID,
|
||||
AccessibleTopology: &csi.Topology{
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ import (
|
|||
|
||||
"github.com/container-storage-interface/spec/lib/go/csi"
|
||||
"github.com/openebs/zfs-localpv/pkg/builder/volbuilder"
|
||||
k8sapi "github.com/openebs/zfs-localpv/pkg/client/k8s/v1alpha1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
zfs "github.com/openebs/zfs-localpv/pkg/zfs"
|
||||
|
|
@ -34,10 +35,39 @@ const (
|
|||
VolumeWeighted = "VolumeWeighted"
|
||||
)
|
||||
|
||||
// GetNodeList gets the nodelist which satisfies the topology info
|
||||
func GetNodeList(topo *csi.TopologyRequirement) ([]string, error) {
|
||||
|
||||
var nodelist []string
|
||||
|
||||
list, err := k8sapi.ListNodes(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, node := range list.Items {
|
||||
for _, prf := range topo.Preferred {
|
||||
nodeFiltered := false
|
||||
for key, value := range prf.Segments {
|
||||
if node.Labels[key] != value {
|
||||
nodeFiltered = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if nodeFiltered == false {
|
||||
nodelist = append(nodelist, node.Name)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nodelist, nil
|
||||
}
|
||||
|
||||
// volumeWeightedScheduler goes through all the pools on the nodes mentioned
|
||||
// in the topology and picks the node which has less volume on
|
||||
// the given zfs pool.
|
||||
func volumeWeightedScheduler(topo *csi.TopologyRequirement, pool string) string {
|
||||
func volumeWeightedScheduler(nodelist []string, pool string) string {
|
||||
var selected string
|
||||
|
||||
zvlist, err := volbuilder.NewKubeclient().
|
||||
|
|
@ -62,8 +92,7 @@ func volumeWeightedScheduler(topo *csi.TopologyRequirement, pool string) string
|
|||
|
||||
// schedule it on the node which has less
|
||||
// number of volume for the given pool
|
||||
for _, prf := range topo.Preferred {
|
||||
node := prf.Segments[zfs.ZFSTopologyKey]
|
||||
for _, node := range nodelist {
|
||||
if volmap[node] < numVol {
|
||||
selected = node
|
||||
numVol = volmap[node]
|
||||
|
|
@ -78,19 +107,29 @@ func scheduler(topo *csi.TopologyRequirement, schld string, pool string) string
|
|||
|
||||
if topo == nil ||
|
||||
len(topo.Preferred) == 0 {
|
||||
logrus.Errorf("topology information not provided")
|
||||
logrus.Errorf("scheduler: topology information not provided")
|
||||
return ""
|
||||
}
|
||||
|
||||
nodelist, err := GetNodeList(topo)
|
||||
if err != nil {
|
||||
logrus.Errorf("scheduler: can not get the nodelist err : %v", err.Error())
|
||||
return ""
|
||||
} else if len(nodelist) == 0 {
|
||||
logrus.Errorf("scheduler: nodelist is empty")
|
||||
return ""
|
||||
}
|
||||
|
||||
// if there is a single node, schedule it on that
|
||||
if len(topo.Preferred) == 1 {
|
||||
return topo.Preferred[0].Segments[zfs.ZFSTopologyKey]
|
||||
if len(nodelist) == 1 {
|
||||
return nodelist[0]
|
||||
}
|
||||
|
||||
switch schld {
|
||||
case VolumeWeighted:
|
||||
return volumeWeightedScheduler(topo, pool)
|
||||
return volumeWeightedScheduler(nodelist, pool)
|
||||
default:
|
||||
return volumeWeightedScheduler(topo, pool)
|
||||
return volumeWeightedScheduler(nodelist, pool)
|
||||
}
|
||||
|
||||
return ""
|
||||
|
|
|
|||
1
unreleased/94-pawanpraka1
Normal file
1
unreleased/94-pawanpraka1
Normal file
|
|
@ -0,0 +1 @@
|
|||
adding support to configure custom topology key
|
||||
Loading…
Add table
Add a link
Reference in a new issue