From a10dedbd5e79817e41fea500e9fca443075722b4 Mon Sep 17 00:00:00 2001 From: Pawan Prakash Sharma Date: Wed, 6 Nov 2019 21:20:49 +0530 Subject: [PATCH] feat(ZFSPV): volume count based scheduler for ZFSPV (#8) This is an initial scheduler implementation for ZFS Local PV. * adding scheduler as a configurable option * adding volumeWeightedScheduler as scheduling logic The volumeWeightedScheduler will go through all the nodes as per topology information and it will pick the node which has less volume provisioned in the given pool. lets say there are 2 nodes node1 and node2 with below pool configuration :- ``` node1 | |-----> pool1 | | | |------> pvc1 | |------> pvc2 |-----> pool2 |------> pvc3 node2 | |-----> pool1 | | | |------> pvc4 |-----> pool2 |------> pvc5 |------> pvc6 ``` So if application is using pool1 as shown in the below storage class, then ZFS driver will schedule it on node2 as it has one volume as compared to node1 which has 2 volumes in pool1. ```yaml kind: StorageClass apiVersion: storage.k8s.io/v1 metadata: name: openebs-zfspv provisioner: zfs.csi.openebs.io parameters: blocksize: "4k" compression: "on" dedup: "on" thinprovision: "yes" poolname: "pool1" ``` So if application is using pool2 as shown in the below storage class, then ZFS driver will schedule it on node1 as it has one volume only as compared node2 which has 2 volumes in pool2. ```yaml kind: StorageClass apiVersion: storage.k8s.io/v1 metadata: name: openebs-zfspv provisioner: zfs.csi.openebs.io parameters: blocksize: "4k" compression: "on" dedup: "on" thinprovision: "yes" poolname: "pool2" ``` In case of same number of volumes on all the nodes for the given pool, it can pick any node and schedule the PV on that. Signed-off-by: Pawan --- cmd/main.go | 2 +- deploy/sample/fio.yaml | 3 +- deploy/sample/percona.yaml | 9 +--- deploy/zfs-operator.yaml | 4 ++ pkg/driver/controller.go | 17 ++++--- pkg/driver/scheduler.go | 96 ++++++++++++++++++++++++++++++++++++++ pkg/zfs/volume.go | 1 - 7 files changed, 114 insertions(+), 18 deletions(-) create mode 100644 pkg/driver/scheduler.go diff --git a/cmd/main.go b/cmd/main.go index 8e6d3b6..df02c7f 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -51,7 +51,7 @@ func main() { ) cmd.PersistentFlags().StringVar( - &config.DriverName, "name", "zfs-localpv", "Name of this driver", + &config.DriverName, "name", "zfs.csi.openebs.io", "Name of this driver", ) cmd.PersistentFlags().StringVar( diff --git a/deploy/sample/fio.yaml b/deploy/sample/fio.yaml index c75c257..5ef5c60 100644 --- a/deploy/sample/fio.yaml +++ b/deploy/sample/fio.yaml @@ -12,8 +12,7 @@ parameters: #keyformat: "raw" #keylocation: "file:///home/pawan/key" poolname: "zfspv-pool" -provisioner: zfs-localpv -volumeBindingMode: WaitForFirstConsumer +provisioner: zfs.csi.openebs.io allowedTopologies: - matchLabelExpressions: - key: kubernetes.io/hostname diff --git a/deploy/sample/percona.yaml b/deploy/sample/percona.yaml index 1d83fcf..04609cc 100644 --- a/deploy/sample/percona.yaml +++ b/deploy/sample/percona.yaml @@ -9,14 +9,7 @@ parameters: dedup: "on" thinprovision: "yes" poolname: "zfspv-pool" -provisioner: zfs-localpv -volumeBindingMode: WaitForFirstConsumer -allowedTopologies: -- matchLabelExpressions: - - key: kubernetes.io/hostname - values: - - gke-zfspv-pawan-default-pool-c8929518-cgd4 - - gke-zfspv-pawan-default-pool-c8929518-dxzc +provisioner: zfs.csi.openebs.io --- kind: PersistentVolumeClaim apiVersion: v1 diff --git a/deploy/zfs-operator.yaml b/deploy/zfs-operator.yaml index 8a6d3ac..4da936e 100644 --- a/deploy/zfs-operator.yaml +++ b/deploy/zfs-operator.yaml @@ -23,6 +23,10 @@ spec: - zvol - zv additionalPrinterColumns: + - JSONPath: .spec.poolName + name: ZPool + description: ZFS Pool where the volume is created + type: string - JSONPath: .spec.ownerNodeID name: Node description: Node where the volume is created diff --git a/pkg/driver/controller.go b/pkg/driver/controller.go index 52bc4fd..1f5d600 100644 --- a/pkg/driver/controller.go +++ b/pkg/driver/controller.go @@ -61,7 +61,6 @@ func (cs *controller) CreateVolume( req *csi.CreateVolumeRequest, ) (*csi.CreateVolumeResponse, error) { - logrus.Infof("received request to create volume {%s} vol{%v}", req.GetName(), req) var err error if err = cs.validateVolumeCreateReq(req); err != nil { @@ -78,9 +77,15 @@ func (cs *controller) CreateVolume( kl := req.GetParameters()["keylocation"] pool := req.GetParameters()["poolname"] tp := req.GetParameters()["thinprovision"] + schld := req.GetParameters()["scheduler"] - // setting first in preferred list as the ownernode of this volume - OwnerNode := req.AccessibilityRequirements.Preferred[0].Segments[zvol.ZFSTopologyKey] + selected := scheduler(req.AccessibilityRequirements, schld, pool) + + if len(selected) == 0 { + return nil, status.Error(codes.Internal, "scheduler failed") + } + + logrus.Infof("scheduled the volume %s/%s on node %s", pool, volName, selected) volObj, err := builder.NewBuilder(). WithName(volName). @@ -92,7 +97,7 @@ func (cs *controller) CreateVolume( WithKeyFormat(kf). WithKeyLocation(kl). WithThinProv(tp). - WithOwnerNode(OwnerNode). + WithOwnerNode(selected). WithCompression(compression).Build() if err != nil { @@ -101,10 +106,10 @@ func (cs *controller) CreateVolume( err = zvol.ProvisionVolume(size, volObj) if err != nil { - return nil, status.Error(codes.Internal, err.Error()) + return nil, status.Error(codes.Internal, "not able to provision the volume") } - topology := map[string]string{zvol.ZFSTopologyKey: OwnerNode} + topology := map[string]string{zvol.ZFSTopologyKey: selected} return csipayload.NewCreateVolumeResponseBuilder(). WithName(volName). diff --git a/pkg/driver/scheduler.go b/pkg/driver/scheduler.go new file mode 100644 index 0000000..184da51 --- /dev/null +++ b/pkg/driver/scheduler.go @@ -0,0 +1,96 @@ +/* +Copyright © 2019 The OpenEBS Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package driver + +import ( + "github.com/Sirupsen/logrus" + "math" + + "github.com/container-storage-interface/spec/lib/go/csi" + "github.com/openebs/zfs-localpv/pkg/builder" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + zvol "github.com/openebs/zfs-localpv/pkg/zfs" +) + +// scheduling algorithm constants +const ( + // pick the node where less volumes are provisioned for the given pool + // this will be the default scheduler when none provided + VolumeWeighted = "VolumeWeighted" +) + +// volumeWeightedScheduler goes through all the pools on the nodes mentioned +// in the topology and picks the node which has less volume on +// the given zfs pool. +func volumeWeightedScheduler(topo *csi.TopologyRequirement, pool string) string { + var selected string + + zvlist, err := builder.NewKubeclient(). + WithNamespace(zvol.OpenEBSNamespace). + List(metav1.ListOptions{}) + + if err != nil { + return "" + } + + volmap := map[string]int{} + + // create the map of the volume count + // for the given pool + for _, zv := range zvlist.Items { + if zv.Spec.PoolName == pool { + volmap[zv.Spec.OwnerNodeID]++ + } + } + + var numVol int = math.MaxInt32 + + // schedule it on the node which has less + // number of volume for the given pool + for _, prf := range topo.Preferred { + node := prf.Segments[zvol.ZFSTopologyKey] + if volmap[node] < numVol { + selected = node + numVol = volmap[node] + } + } + return selected +} + +// scheduler schedules the PV as per topology constraints for +// the given zfs pool. +func scheduler(topo *csi.TopologyRequirement, schld string, pool string) string { + + if len(topo.Preferred) == 0 { + logrus.Errorf("topology information not provided") + return "" + } + // if there is a single node, schedule it on that + if len(topo.Preferred) == 1 { + return topo.Preferred[0].Segments[zvol.ZFSTopologyKey] + } + + switch schld { + case VolumeWeighted: + return volumeWeightedScheduler(topo, pool) + default: + return volumeWeightedScheduler(topo, pool) + } + + return "" +} diff --git a/pkg/zfs/volume.go b/pkg/zfs/volume.go index a0e03c0..b1b3362 100644 --- a/pkg/zfs/volume.go +++ b/pkg/zfs/volume.go @@ -118,7 +118,6 @@ func UpdateZvolInfo(vol *apis.ZFSVolume) error { } newVol, err := builder.BuildFrom(vol). - WithNodename(NodeID). WithFinalizer(finalizers). WithLabels(labels).Build()