feat(scheduling): add zfs pool capacity tracking (#335)

Signed-off-by: shubham <shubham.bajpai@mayadata.io>
This commit is contained in:
Shubham Bajpai 2021-05-31 18:59:59 +05:30 committed by GitHub
parent 4fce22afb5
commit 3eb2c9e894
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
26 changed files with 2284 additions and 7 deletions

157
pkg/mgmt/zfsnode/builder.go Normal file
View file

@ -0,0 +1,157 @@
/*
Copyright © 2021 The OpenEBS Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package zfsnode
import (
"time"
clientset "github.com/openebs/zfs-localpv/pkg/generated/clientset/internalclientset"
openebsScheme "github.com/openebs/zfs-localpv/pkg/generated/clientset/internalclientset/scheme"
informers "github.com/openebs/zfs-localpv/pkg/generated/informer/externalversions"
listers "github.com/openebs/zfs-localpv/pkg/generated/lister/zfs/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog"
)
const controllerAgentName = "zfsnode-controller"
// NodeController is the controller implementation for zfs node resources
type NodeController struct {
// kubeclientset is a standard kubernetes clientset
kubeclientset kubernetes.Interface
// clientset is a openebs custom resource package generated for custom API group.
clientset clientset.Interface
NodeLister listers.ZFSNodeLister
// NodeSynced is used for caches sync to get populated
NodeSynced cache.InformerSynced
// workqueue is a rate limited work queue. This is used to queue work to be
// processed instead of performing it as soon as a change happens. This
// means we can ensure we only process a fixed amount of resources at a
// time, and makes it easy to ensure we are never processing the same item
// simultaneously in two different workers.
workqueue workqueue.RateLimitingInterface
// recorder is an event recorder for recording Event resources to the
// Kubernetes API.
recorder record.EventRecorder
// pollInterval controls the polling frequency of syncing up the vg metadata.
pollInterval time.Duration
// ownerRef is used to set the owner reference to zfsnode objects.
ownerRef metav1.OwnerReference
}
// NodeControllerBuilder is the builder object for controller.
type NodeControllerBuilder struct {
NodeController *NodeController
}
// NewNodeControllerBuilder returns an empty instance of controller builder.
func NewNodeControllerBuilder() *NodeControllerBuilder {
return &NodeControllerBuilder{
NodeController: &NodeController{},
}
}
// withKubeClient fills kube client to controller object.
func (cb *NodeControllerBuilder) withKubeClient(ks kubernetes.Interface) *NodeControllerBuilder {
cb.NodeController.kubeclientset = ks
return cb
}
// withOpenEBSClient fills openebs client to controller object.
func (cb *NodeControllerBuilder) withOpenEBSClient(cs clientset.Interface) *NodeControllerBuilder {
cb.NodeController.clientset = cs
return cb
}
// withNodeLister fills Node lister to controller object.
func (cb *NodeControllerBuilder) withNodeLister(sl informers.SharedInformerFactory) *NodeControllerBuilder {
NodeInformer := sl.Zfs().V1().ZFSNodes()
cb.NodeController.NodeLister = NodeInformer.Lister()
return cb
}
// withNodeSynced adds object sync information in cache to controller object.
func (cb *NodeControllerBuilder) withNodeSynced(sl informers.SharedInformerFactory) *NodeControllerBuilder {
NodeInformer := sl.Zfs().V1().ZFSNodes()
cb.NodeController.NodeSynced = NodeInformer.Informer().HasSynced
return cb
}
// withWorkqueue adds workqueue to controller object.
func (cb *NodeControllerBuilder) withWorkqueueRateLimiting() *NodeControllerBuilder {
cb.NodeController.workqueue = workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "Node")
return cb
}
// withRecorder adds recorder to controller object.
func (cb *NodeControllerBuilder) withRecorder(ks kubernetes.Interface) *NodeControllerBuilder {
klog.Infof("Creating event broadcaster")
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartLogging(klog.Infof)
eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: ks.CoreV1().Events("")})
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: controllerAgentName})
cb.NodeController.recorder = recorder
return cb
}
// withEventHandler adds event handlers controller object.
func (cb *NodeControllerBuilder) withEventHandler(cvcInformerFactory informers.SharedInformerFactory) *NodeControllerBuilder {
cvcInformer := cvcInformerFactory.Zfs().V1().ZFSNodes()
// Set up an event handler for when zfs node vg change.
// Note: rather than setting up the resync period at informer level,
// we are controlling the syncing based on pollInternal. See
// NodeController#Run func for more details.
cvcInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{
AddFunc: cb.NodeController.addNode,
UpdateFunc: cb.NodeController.updateNode,
DeleteFunc: cb.NodeController.deleteNode,
}, 0)
return cb
}
func (cb *NodeControllerBuilder) withPollInterval(interval time.Duration) *NodeControllerBuilder {
cb.NodeController.pollInterval = interval
return cb
}
func (cb *NodeControllerBuilder) withOwnerReference(ownerRef metav1.OwnerReference) *NodeControllerBuilder {
cb.NodeController.ownerRef = ownerRef
return cb
}
// Build returns a controller instance.
func (cb *NodeControllerBuilder) Build() (*NodeController, error) {
err := openebsScheme.AddToScheme(scheme.Scheme)
if err != nil {
return nil, err
}
return cb.NodeController, nil
}

109
pkg/mgmt/zfsnode/start.go Normal file
View file

@ -0,0 +1,109 @@
/*
Copyright © 2021 The OpenEBS Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package zfsnode
import (
"context"
"sync"
"time"
k8sapi "github.com/openebs/lib-csi/pkg/client/k8s"
clientset "github.com/openebs/zfs-localpv/pkg/generated/clientset/internalclientset"
informers "github.com/openebs/zfs-localpv/pkg/generated/informer/externalversions"
"github.com/openebs/zfs-localpv/pkg/zfs"
"github.com/pkg/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/kubernetes"
)
// Start starts the zfsnode controller.
func Start(controllerMtx *sync.RWMutex, stopCh <-chan struct{}) error {
// Get in cluster config
cfg, err := k8sapi.Config().Get()
if err != nil {
return errors.Wrap(err, "error building kubeconfig")
}
// Building Kubernetes Clientset
kubeClient, err := kubernetes.NewForConfig(cfg)
if err != nil {
return errors.Wrap(err, "error building kubernetes clientset")
}
// Building OpenEBS Clientset
openebsClient, err := clientset.NewForConfig(cfg)
if err != nil {
return errors.Wrap(err, "error building openebs clientset")
}
// setup watch only on node we are interested in.
nodeInformerFactory := informers.NewSharedInformerFactoryWithOptions(
openebsClient, 0, informers.WithNamespace(zfs.OpenEBSNamespace),
informers.WithTweakListOptions(func(options *metav1.ListOptions) {
options.FieldSelector = fields.OneTermEqualSelector("metadata.name", zfs.NodeID).String()
}))
k8sNode, err := kubeClient.CoreV1().Nodes().Get(context.TODO(), zfs.NodeID, metav1.GetOptions{})
if err != nil {
return errors.Wrapf(err, "fetch k8s node %s", zfs.NodeID)
}
isTrue := true
// as object returned by client go clears all TypeMeta from it.
nodeGVK := &schema.GroupVersionKind{
Group: "", Version: "v1", Kind: "Node",
}
ownerRef := metav1.OwnerReference{
APIVersion: nodeGVK.GroupVersion().String(),
Kind: nodeGVK.Kind,
Name: k8sNode.Name,
UID: k8sNode.GetUID(),
Controller: &isTrue,
}
// Build() fn of all controllers calls AddToScheme to adds all types of this
// clientset into the given scheme.
// If multiple controllers happen to call this AddToScheme same time,
// it causes panic with error saying concurrent map access.
// This lock is used to serialize the AddToScheme call of all controllers.
controllerMtx.Lock()
controller, err := NewNodeControllerBuilder().
withKubeClient(kubeClient).
withOpenEBSClient(openebsClient).
withNodeSynced(nodeInformerFactory).
withNodeLister(nodeInformerFactory).
withRecorder(kubeClient).
withEventHandler(nodeInformerFactory).
withPollInterval(60 * time.Second).
withOwnerReference(ownerRef).
withWorkqueueRateLimiting().Build()
// blocking call, can't use defer to release the lock
controllerMtx.Unlock()
if err != nil {
return errors.Wrapf(err, "error building controller instance")
}
nodeInformerFactory.Start(stopCh)
// Threadiness defines the number of workers to be launched in Run function
return controller.Run(1, stopCh)
}

307
pkg/mgmt/zfsnode/zfsnode.go Normal file
View file

@ -0,0 +1,307 @@
/*
Copyright © 2021 The OpenEBS Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package zfsnode
import (
"fmt"
"reflect"
"time"
apis "github.com/openebs/zfs-localpv/pkg/apis/openebs.io/zfs/v1"
"github.com/openebs/zfs-localpv/pkg/builder/nodebuilder"
"github.com/openebs/zfs-localpv/pkg/equality"
"github.com/openebs/zfs-localpv/pkg/zfs"
k8serror "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/tools/cache"
"k8s.io/klog"
)
func (c *NodeController) listZFSPool() ([]apis.Pool, error) {
return zfs.ListZFSPool()
}
// syncHandler compares the actual state with the desired, and attempts to
// converge the two.
func (c *NodeController) syncHandler(key string) error {
// Convert the namespace/name string into a distinct namespace and name
namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
runtime.HandleError(fmt.Errorf("invalid resource key: %s", key))
return nil
}
return c.syncNode(namespace, name)
}
// syncNode is the function which tries to converge to a desired state for the
// ZFSNode
func (c *NodeController) syncNode(namespace string, name string) error {
// Get the node resource with this namespace/name
cachedNode, err := c.NodeLister.ZFSNodes(namespace).Get(name)
if err != nil && !k8serror.IsNotFound(err) {
return err
}
var node *apis.ZFSNode
if cachedNode != nil {
node = cachedNode.DeepCopy()
}
pools, err := c.listZFSPool()
if err != nil {
return err
}
if node == nil { // if it doesn't exists, create zfs node object
if node, err = nodebuilder.NewBuilder().
WithNamespace(namespace).WithName(name).
WithPools(pools).
WithOwnerReferences(c.ownerRef).
Build(); err != nil {
return err
}
klog.Infof("zfs node controller: creating new node object for %+v", node)
if node, err = nodebuilder.NewKubeclient().WithNamespace(namespace).Create(node); err != nil {
return fmt.Errorf("create zfs node %s/%s: %v", namespace, name, err)
}
klog.Infof("zfs node controller: created node object %s/%s", namespace, name)
return nil
}
// zfs node already exists check if we need to update it.
var updateRequired bool
// validate if owner reference updated.
if ownerRefs, req := c.isOwnerRefsUpdateRequired(node.OwnerReferences); req {
klog.Infof("zfs node controller: node owner references updated current=%+v, required=%+v",
node.OwnerReferences, ownerRefs)
node.OwnerReferences = ownerRefs
updateRequired = true
}
// validate if node pools are upto date.
if !equality.Semantic.DeepEqual(node.Pools, pools) {
klog.Infof("zfs node controller: node pools updated current=%+v, required=%+v",
node.Pools, pools)
node.Pools = pools
updateRequired = true
}
if !updateRequired {
return nil
}
klog.Infof("zfs node controller: updating node object with %+v", node)
if _, err = nodebuilder.NewKubeclient().WithNamespace(namespace).Update(node); err != nil {
return fmt.Errorf("update zfs node %s/%s: %v", namespace, name, err)
}
klog.Infof("zfs node controller: updated node object %s/%s", namespace, name)
return nil
}
// addNode is the add event handler for ZFSNode
func (c *NodeController) addNode(obj interface{}) {
node, ok := obj.(*apis.ZFSNode)
if !ok {
runtime.HandleError(fmt.Errorf("Couldn't get node object %#v", obj))
return
}
klog.Infof("Got add event for zfs node %s/%s", node.Namespace, node.Name)
c.enqueueNode(node)
}
// updateNode is the update event handler for ZFSNode
func (c *NodeController) updateNode(oldObj, newObj interface{}) {
newNode, ok := newObj.(*apis.ZFSNode)
if !ok {
runtime.HandleError(fmt.Errorf("Couldn't get node object %#v", newNode))
return
}
klog.Infof("Got update event for zfs node %s/%s", newNode.Namespace, newNode.Name)
c.enqueueNode(newNode)
}
// deleteNode is the delete event handler for ZFSNode
func (c *NodeController) deleteNode(obj interface{}) {
node, ok := obj.(*apis.ZFSNode)
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
runtime.HandleError(fmt.Errorf("Couldn't get object from tombstone %#v", obj))
return
}
node, ok = tombstone.Obj.(*apis.ZFSNode)
if !ok {
runtime.HandleError(fmt.Errorf("Tombstone contained object that is not a ZFSNode %#v", obj))
return
}
}
klog.Infof("Got delete event for node %s/%s", node.Namespace, node.Name)
c.enqueueNode(node)
}
// enqueueNode takes a ZFSNode resource and converts it into a namespace/name
// string which is then put onto the work queue. This method should *not* be
// passed resources of any type other than ZFSNode.
func (c *NodeController) enqueueNode(node *apis.ZFSNode) {
// node must exists in openebs namespace & must equal to the node id.
if node.Namespace != zfs.OpenEBSNamespace ||
node.Name != zfs.NodeID {
klog.Warningf("skipping zfs node object %s/%s", node.Namespace, node.Name)
return
}
key, err := cache.MetaNamespaceKeyFunc(node)
if err != nil {
runtime.HandleError(err)
return
}
c.workqueue.Add(key)
}
// Run will set up the event handlers for types we are interested in, as well
// as syncing informer caches and starting workers. It will block until stopCh
// is closed, at which point it will shutdown the workqueue and wait for
// workers to finish processing their current work items.
func (c *NodeController) Run(threadiness int, stopCh <-chan struct{}) error {
defer runtime.HandleCrash()
defer c.workqueue.ShutDown()
// Start the informer factories to begin populating the informer caches
klog.Info("Starting Node controller")
// Wait for the k8s caches to be synced before starting workers
klog.Info("Waiting for informer caches to sync")
if ok := cache.WaitForCacheSync(stopCh, c.NodeSynced); !ok {
return fmt.Errorf("failed to wait for caches to sync")
}
klog.Info("Starting Node workers")
// Launch worker to process Node resources
// Threadiness will decide the number of workers you want to launch to process work items from queue
for i := 0; i < threadiness; i++ {
go wait.Until(c.runWorker, time.Second, stopCh)
}
klog.Info("Started Node workers")
timer := time.NewTimer(0)
defer timer.Stop()
for {
select {
case <-timer.C:
case <-stopCh:
klog.Info("Shutting down Node controller")
return nil
}
item := zfs.OpenEBSNamespace + "/" + zfs.NodeID
c.workqueue.Add(item) // add the item to worker queue.
timer.Reset(c.pollInterval)
}
}
// runWorker is a long-running function that will continually call the
// processNextWorkItem function in order to read and process a message on the
// workqueue.
func (c *NodeController) runWorker() {
for c.processNextWorkItem() {
}
}
// processNextWorkItem will read a single work item off the workqueue and
// attempt to process it, by calling the syncHandler.
func (c *NodeController) processNextWorkItem() bool {
obj, shutdown := c.workqueue.Get()
if shutdown {
return false
}
// We wrap this block in a func so we can defer c.workqueue.Done.
err := func(obj interface{}) error {
// We call Done here so the workqueue knows we have finished
// processing this item. We also must remember to call Forget if we
// do not want this work item being re-queued. For example, we do
// not call Forget if a transient error occurs, instead the item is
// put back on the workqueue and attempted again after a back-off
// period.
defer c.workqueue.Done(obj)
var key string
var ok bool
// We expect strings to come off the workqueue. These are of the
// form namespace/name. We do this as the delayed nature of the
// workqueue means the items in the informer cache may actually be
// more up to date that when the item was initially put onto the
// workqueue.
if key, ok = obj.(string); !ok {
// As the item in the workqueue is actually invalid, we call
// Forget here else we'd go into a loop of attempting to
// process a work item that is invalid.
c.workqueue.Forget(obj)
runtime.HandleError(fmt.Errorf("expected string in workqueue but got %#v", obj))
return nil
}
// Run the syncHandler, passing it the namespace/name string of the
// Node resource to be synced.
if err := c.syncHandler(key); err != nil {
// Put the item back on the workqueue to handle any transient errors.
c.workqueue.AddRateLimited(key)
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
}
// Finally, if no error occurs we Forget this item so it does not
// get queued again until another change happens.
c.workqueue.Forget(obj)
klog.V(5).Infof("Successfully synced '%s'", key)
return nil
}(obj)
if err != nil {
runtime.HandleError(err)
return true
}
return true
}
// isOwnerRefUpdateRequired validates if relevant owner references is being
// set for zfs node. If not, it returns the final owner references that needs
// to be set.
func (c *NodeController) isOwnerRefsUpdateRequired(ownerRefs []metav1.OwnerReference) ([]metav1.OwnerReference, bool) {
updated := false
reqOwnerRef := c.ownerRef
for idx := range ownerRefs {
if ownerRefs[idx].UID != reqOwnerRef.UID {
continue
}
// in case owner reference exists, validate
// if controller field is set correctly or not.
if !reflect.DeepEqual(ownerRefs[idx].Controller, reqOwnerRef.Controller) {
updated = true
ownerRefs[idx].Controller = reqOwnerRef.Controller
}
return ownerRefs, updated
}
updated = true
ownerRefs = append(ownerRefs, reqOwnerRef)
return ownerRefs, updated
}