feat(migration): adding support to migrate the PV to a new node (#304)

Usecase: A node in the Kubernetes cluster is replaced with a new node. The 
new node gets a different `kubernetes.io/hostname`. The storage devices
that were attached to the old node are re-attached to the new node. 

Fix: Instead of using the default `kubenetes.io/hostname` as the node affinity 
label, this commit changes to use `openebs.io/nodeid`. The ZFS LocalPV driver 
will pick the value from the nodes and set the affinity.

Once the old node is removed from the cluster, the K8s scheduler will continue 
to schedule applications on the old node only.

User can now modify the value of `openebs.io/nodeid` on the new node to the same
value that was available on the old node. This will make sure the pods/volumes are 
scheduled to the node now. 


Note: Now to migrate the PV to the other node, we have to move the disks to the other node
and remove the old node from the cluster and set the same label on the new node using
the same key, which will let k8s scheduler to schedule the pods to that node.

Other updates: 
* adding faq doc
* renaming the config variable to nodename

Signed-off-by: Pawan <pawan@mayadata.io>
Co-authored-by: Akhil Mohan <akhilerm@gmail.com>

* Update docs/faq.md

Co-authored-by: Akhil Mohan <akhilerm@gmail.com>
This commit is contained in:
Pawan Prakash Sharma 2021-05-01 19:05:01 +05:30 committed by GitHub
parent da7f4c2320
commit 1b30116e5f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 104 additions and 34 deletions

View file

@ -21,6 +21,7 @@ import (
"strconv"
"time"
k8sapi "github.com/openebs/lib-csi/pkg/client/k8s"
apis "github.com/openebs/zfs-localpv/pkg/apis/openebs.io/zfs/v1"
"github.com/openebs/zfs-localpv/pkg/builder/bkpbuilder"
"github.com/openebs/zfs-localpv/pkg/builder/restorebuilder"
@ -49,7 +50,7 @@ const (
// ZFSNodeKey will be used to insert Label in ZfsVolume CR
ZFSNodeKey string = "kubernetes.io/nodename"
// ZFSTopologyKey is supported topology key for the zfs driver
ZFSTopologyKey string = "openebs.io/nodename"
ZFSTopologyKey string = "openebs.io/nodeid"
// ZFSStatusPending shows object has not handled yet
ZFSStatusPending string = "Pending"
// ZFSStatusFailed shows object operation has failed
@ -70,19 +71,45 @@ var (
)
func init() {
var err error
OpenEBSNamespace = os.Getenv(OpenEBSNamespaceKey)
if OpenEBSNamespace == "" && os.Getenv("OPENEBS_NODE_DRIVER") != "" {
klog.Fatalf("OPENEBS_NAMESPACE environment variable not set")
}
NodeID = os.Getenv("OPENEBS_NODE_ID")
if NodeID == "" && os.Getenv("OPENEBS_NODE_DRIVER") != "" {
klog.Fatalf("NodeID environment variable not set")
if os.Getenv("OPENEBS_NODE_DRIVER") != "" {
if OpenEBSNamespace == "" {
klog.Fatalf("OPENEBS_NAMESPACE environment variable not set for daemonset")
}
nodename := os.Getenv("OPENEBS_NODE_NAME")
if nodename == "" {
klog.Fatalf("OPENEBS_NODE_NAME environment variable not set")
}
if NodeID, err = GetNodeID(nodename); err != nil {
klog.Fatalf("GetNodeID failed for node=%s err: %s", nodename, err.Error())
}
klog.Infof("zfs: node(%s) has node affinity %s=%s", nodename, ZFSTopologyKey, NodeID)
} else if os.Getenv("OPENEBS_CONTROLLER_DRIVER") != "" {
if OpenEBSNamespace == "" {
klog.Fatalf("OPENEBS_NAMESPACE environment variable not set for controller")
}
}
GoogleAnalyticsEnabled = os.Getenv(GoogleAnalyticsKey)
}
func GetNodeID(nodename string) (string, error) {
node, err := k8sapi.GetNode(nodename)
if err != nil {
return "", fmt.Errorf("failed to get the node %s", nodename)
}
nodeid, ok := node.Labels[ZFSTopologyKey]
if !ok {
// node is not labelled, use node name as nodeid
return nodename, nil
}
return nodeid, nil
}
func checkVolCreation(ctx context.Context, volname string) (bool, error) {
timeout := time.After(10 * time.Second)
for {
@ -104,7 +131,7 @@ func checkVolCreation(ctx context.Context, volname string) (bool, error) {
return false, fmt.Errorf("zfs: volume creation failed")
}
klog.Infof("zfs: waiting for volume %s/%s to be created on node %s",
klog.Infof("zfs: waiting for volume %s/%s to be created on nodeid %s",
vol.Spec.PoolName, volname, vol.Spec.OwnerNodeID)
time.Sleep(time.Second)
@ -135,7 +162,7 @@ func ProvisionVolume(
}
if err != nil {
klog.Infof("zfs: volume %s/%s provisioning failed on node %s err: %s",
klog.Infof("zfs: volume %s/%s provisioning failed on nodeid %s err: %s",
vol.Spec.PoolName, vol.Name, vol.Spec.OwnerNodeID, err.Error())
}