feat(e2e-test): Add e2e-tests for zfs-localpv (#298)

Signed-off-by: w3aman <aman.gupta@mayadata.io>
2026-02-02 15:45:13 +01:00 · 2021-06-09 21:21:39 +05:30 · 2021-06-09 21:21:39 +05:30 · 4e73638b5a
commit 4e73638b5a
parent 53f872fcf1
137 changed files with 8745 additions and 0 deletions
--- a/e2e-tests/experiments/infra-chaos/node_failure/README.md
+++ b/e2e-tests/experiments/infra-chaos/node_failure/README.md
@ -0,0 +1,85 @@
+## Experiment Metadata
+
+| Type  | Description                                                  | Storage | K8s Platform      |
+| ----- | ------------------------------------------------------------ | ------- | ----------------- |
+| Chaos | Power off the node where application pod is hosted and observe application behavior | OpenEBS | on-premise-VMware |
+
+## Entry-Criteria
+
+- Application services are accessible & pods are healthy
+- Application writes are successful
+
+## Exit-Criteria
+
+- Application pod should be evicted and rescheduled on other node.
+- Data written prior to chaos is successfully retrieved/read
+- Database consistency is maintained as per db integrity check utils
+- Storage target pods are healthy
+
+### Notes
+
+- Typically used as a disruptive test, to cause loss of access to storage target by killing the node where application pod is scheduled.
+- The container should be created again and it should be healthy.
+
+## Associated Utils
+
+- `vm_power_operations.yml`,`mysql_data_persistence.yml`,`busybox_data_persistence.yml`
+
+
+
+### Procedure
+
+This scenario validates the behaviour of application and OpenEBS persistent volumes in the amidst of chaos induced on the node where the application pod is scheduled. It is performed by shutting down the node(virtual machine) created on VMware hypervisor. After attaining podevictiontimeout(5 minutes by default), the application pod is expected to be scheduled on other available node. Due to abrupt shutdown, the old application pod still remain in unknown state. As an impact, volume mount in the newly scheduled pod fails due to multi-attach error. As a workaround for this, the node CR will be deleted which kills the old pod. Then, the application pod is expected to run successfully after 5 minutes.
+
+Based on the value of env `DATA_PERSISTENCE`, the corresponding data consistency util will be executed. At present, only busybox and percona-mysql are supported. Along with specifying env in the litmus experiment, user needs to pass name for configmap and the data consistency specific parameters required via configmap in the format as follows:
+
+```
+    parameters.yml: |
+      blocksize: 4k
+      blockcount: 1024
+      testfile: difiletest
+```
+
+It is recommended to pass test-name for configmap and mount the corresponding configmap as volume in the litmus pod. The above snippet holds the parameters required for validation data consistency in busybox application.
+
+For percona-mysql, the following parameters are to be injected into configmap.
+
+```
+    parameters.yml: |
+      dbuser: root
+      dbpassword: k8sDem0
+      dbname: tdb
+```
+
+The configmap data will be utilised by litmus experiments as its variables while executing the scenario.
+
+Based on the data provided, litmus checks if the data is consistent after recovering from induced chaos.
+
+ESX password has to updated through k8s secret created. The litmus runner can retrieve the password from secret as environmental variable and utilize it for performing admin operations on the server.
+
+
+
+Note: To perform admin operatons on vmware, the VM display name in hypervisor should match its hostname.
+
+
+
+## Litmus experiment Environment Variables
+
+### Application
+
+| Parameter        | Description                                                  |
+| ---------------- | ------------------------------------------------------------ |
+| APP_NAMESPACE    | Namespace in which application pods are deployed             |
+| APP_LABEL        | Unique Labels in `key=value` format of application deployment |
+| APP_PVC          | Name of persistent volume claim used for app's volume mounts |
+| TARGET_NAMESPACE | Namespace where OpenEBS is installed                         |
+| DATA_PERSISTENCE | Specify the application name against which data consistency has to be ensured. Example: busybox |
+
+### Chaos
+
+| Parameter    | Description                                                  |
+| ------------ | ------------------------------------------------------------ |
+| PLATFORM     | The platform where k8s cluster is created. Currently, only 'vmware' is supported. |
+| ESX_HOST_IP  | The IP address of ESX server where the virtual machines are hosted. |
+| ESX_PASSWORD | To be passed as configmap data.                              |
+
--- a/e2e-tests/experiments/infra-chaos/node_failure/data_persistence.j2
+++ b/e2e-tests/experiments/infra-chaos/node_failure/data_persistence.j2
@ -0,0 +1,5 @@
+{% if data_persistence is defined and data_persistence == 'mysql' %}
+   consistencyutil: /e2e-tests/utils/applications/mysql/mysql_data_persistence.yml
+  {% elif data_persistence is defined and data_persistence == 'busybox' %}
+   consistencyutil: /e2e-tests/utils/applications/busybox/busybox_data_persistence.yml
+{% endif %}
--- a/e2e-tests/experiments/infra-chaos/node_failure/run_e2e_test.yml
+++ b/e2e-tests/experiments/infra-chaos/node_failure/run_e2e_test.yml
@ -0,0 +1,108 @@
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: node-failure
+  namespace: e2e
+data:
+  parameters.yml: |
+
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: host-password
+  namespace: e2e
+type: Opaque
+data:
+  password:
+
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: node-password
+  namespace: e2e
+type: Opaque
+data:
+  passwordNode:
+
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  generateName: node-failure-
+  namespace: e2e
+spec:
+  template:
+    metadata:
+      labels:
+        test: node-failure
+    spec:
+      serviceAccountName: e2e
+      restartPolicy: Never
+
+      #nodeSelector:
+      #  kubernetes.io/hostname:
+
+      tolerations:
+      - key: "infra-aid"
+        operator: "Equal"
+        value: "observer"
+        effect: "NoSchedule"
+
+      containers:
+      - name: ansibletest
+        image: openebs/zfs-localpv-e2e:ci
+        imagePullPolicy: IfNotPresent
+        env:
+
+          - name: ANSIBLE_STDOUT_CALLBACK
+            value: default
+
+          - name: APP_NAMESPACE
+            value: ''
+
+          - name: APP_LABEL
+            value: ''
+
+          - name: APP_PVC
+            value: ''
+            
+            # The IP address of ESX HOST
+          - name: ESX_HOST_IP
+            value: ""
+
+          - name: ZFS_OPERATOR_NAMESPACE
+            value: ''
+
+          - name: USERNAME
+            value: ''
+
+          - name: ZPOOL_NAME
+            value: ''
+
+          - name: ESX_PASSWORD
+            valueFrom:
+              secretKeyRef:
+                name: host-password 
+                key: password
+
+          - name: NODE_PASSWORD
+            valueFrom:
+              secretKeyRef:
+                name: node-password
+                key: passwordNode
+
+          - name: DATA_PERSISTENCE
+            value: "" 
+
+        command: ["/bin/bash"]
+        args: ["-c", "ANSIBLE_LOCAL_TEMP=$HOME/.ansible/tmp ANSIBLE_REMOTE_TEMP=$HOME/.ansible/tmp ansible-playbook ./e2e-tests/experiments/infra-chaos/node_failure/test.yml -i /etc/ansible/hosts -vv; exit 0"]
+        volumeMounts:
+        - name: parameters
+          mountPath: /mnt/
+      volumes:
+        - name: parameters
+          configMap:
+            name: node-failure
--- a/e2e-tests/experiments/infra-chaos/node_failure/test.yml
+++ b/e2e-tests/experiments/infra-chaos/node_failure/test.yml
@ -0,0 +1,236 @@
+---
+- hosts: localhost
+  connection: local
+  gather_facts: False
+
+  vars_files:
+    - test_vars.yml
+    - /mnt/parameters.yml
+
+  tasks:
+
+    - block:
+
+          ## Generating the testname for node failure chaos test
+        - include_tasks: /e2e-tests/hack/create_testname.yml
+    
+          ## Record SOT (start of test) in e2e result e2e-cr (e2e-custom-resource)
+        - include_tasks: /e2e-tests/hack/update_e2e_result_resource.yml
+          vars:
+            status: 'SOT'
+
+        - name: Identify the data consistency util to be invoked
+          template:
+            src: data_persistence.j2
+            dest: data_persistence.yml
+
+        - include_vars:
+            file: data_persistence.yml
+
+        - name: Record the data consistency util path
+          set_fact:
+            data_consistency_util_path: "{{ consistencyutil }}"
+          when: data_persistence != ''
+
+        - name: Get application pod name
+          shell: >
+            kubectl get pod -n {{ namespace }} -l {{ label }} --no-headers 
+            -o=custom-columns=NAME:".metadata.name"
+          args:
+            executable: /bin/bash
+          register: app_pod_name
+
+        - name: Record the application pod name
+          set_fact:
+            application_pod: "{{ app_pod_name.stdout }}"
+
+        - name: Obtain PVC name from the application mount
+          shell: >
+            kubectl get pods "{{ app_pod_name.stdout }}" -n "{{ namespace }}" 
+            -o custom-columns=:.spec.volumes[*].persistentVolumeClaim.claimName --no-headers
+          args:
+            executable: /bin/bash
+          register: pvc
+
+        - name: Obtain the Persistent Volume name
+          shell: >
+            kubectl get pvc "{{ pvc.stdout }}" -n "{{ namespace }}" --no-headers 
+            -o custom-columns=:.spec.volumeName
+          args:
+            executable: /bin/bash
+          register: pv
+          failed_when: 'pv.stdout == ""'
+
+        - name: Record the pv name
+          set_fact:
+            pv_name: "{{ pv.stdout }}"
+
+          ## Generate dummy test data on the application
+        - name: Generate data on the specified application.
+          include: "{{ data_consistency_util_path }}"
+          vars:
+            status: 'LOAD'
+            ns: "{{ namespace }}"
+            pod_name: "{{ app_pod_name.stdout }}"
+          when: data_persistence != ''
+
+          ## Obtain the node name where application pod is running
+        - name: Get Application pod Node to perform chaos
+          shell: >
+            kubectl get pod {{ app_pod_name.stdout }} -n {{ namespace }}
+            --no-headers -o custom-columns=:spec.nodeName
+          args:
+            executable: /bin/bash
+          register: app_node
+
+        - name: Record the application pod node name
+          set_fact:
+            app_node_name: "{{ app_node.stdout }}"
+
+          ## Execute the chaos util to turn off the target node
+        - include_tasks: "/e2e-tests/chaoslib/vmware_chaos/vm_power_operations.yml"
+          vars:
+            esx_ip: "{{ host_ip }}"
+            target_node: "{{ app_node.stdout }}"
+            operation: "off"
+
+        - name: Check the node status
+          shell: kubectl get nodes {{ app_node.stdout }} --no-headers
+          args:
+            executable: /bin/bash
+          register: state
+          until: "'NotReady' in state.stdout"
+          delay: 15
+          retries: 30
+
+        - name: Check if the new application pod is scheduled after node failure
+          shell: >
+            kubectl get pods -n {{ namespace }} -l {{ label }} --no-headers | wc -l
+          args:
+            executable: /bin/bash
+          register: app_pod_count
+          until: "'2' in app_pod_count.stdout"
+          delay: 15
+          retries: 30
+
+        - name: Get the new application pod name
+          shell: > 
+            kubectl get pod -n {{ namespace }} -l {{ label }} --no-headers | grep -v Terminating | awk '{print $1}'
+          args:
+            executable: /bin/bash
+          register: new_app_pod_name
+
+        - name: Record the new application pod name 
+          set_fact:
+            new_app_pod: "{{ new_app_pod_name.stdout }}"
+          
+        - name: Check for the newly created application pod status 
+          shell: >
+            kubectl get pod {{ new_app_pod }} -n {{ namespace }} --no-headers -o custom-columns=:.status.phase
+          args:
+            executable: /bin/bash
+          register: new_app_pod_status
+          failed_when: "'Pending' not in new_app_pod_status.stdout"
+
+        - include_tasks: "/e2e-tests/chaoslib/vmware_chaos/vm_power_operations.yml"
+          vars:
+            esx_ip: "{{ host_ip }}"
+            target_node: "{{ app_node_name }}"
+            operation: "on"
+
+        - name: Check the node status
+          shell: kubectl get node {{ app_node_name }} --no-headers
+          args:
+            executable: /bin/bash
+          register: node_status
+          until: "'NotReady' not in node_status.stdout"
+          delay: 10
+          retries: 30
+
+        - name: verify that previous application pod is successfully deleted
+          shell: kubectl get pod -n {{ namespace }} -l {{ label }} --no-headers
+          args:
+            executable: /bin/bash
+          register: app_pod_status
+          until: "'{{ application_pod }}' not in app_pod_status.stdout"
+          delay: 5
+          retries: 40
+
+        - name: Get the IP Address of the node on which application pod is scheduled
+          shell: >
+            kubectl get nodes {{ app_node_name }} --no-headers -o jsonpath='{.status.addresses[0].address}'
+          args:
+            executable: /bin/bash
+          register: node_ip_address
+    
+        - name: Record the IP Address of the node on which application pod is scheduled
+          set_fact:
+            node_ip_add: "{{ node_ip_address.stdout }}"
+
+        - name: Check if zpool is present 
+          shell: >
+            sshpass -p {{ node_pwd }} ssh -o StrictHostKeyChecking=no {{ user }}@{{ node_ip_add }} "zpool list"
+          args:
+            executable: /bin/bash
+          register: zpool_status          
+
+        - name: Import the zpool after turning on the VM's
+          shell: >
+            sshpass -p {{ node_pwd }} ssh -o StrictHostKeyChecking=no {{ user }}@{{ node_ip_add }}
+            "echo {{ node_pwd }} | sudo -S su -c 'zpool import -f {{ zpool_name }}'"
+          args:
+            executable: /bin/bash
+          register: status
+          failed_when: "status.rc != 0"
+          when: "'{{ zpool_name }}' not in zpool_status.stdout"
+          
+        - name: verify that zfs dataset is available now
+          shell: >
+            sshpass -p {{ node_pwd }} ssh -o StrictHostKeyChecking=no {{ user }}@{{ node_ip_add }} "zfs list"
+          args: 
+            executable: /bin/bash
+          register: zfs_dataset
+          until: "'{{ zpool_name }}/{{ pv_name }}' in zfs_dataset.stdout"
+          delay: 10
+          retries: 30
+
+        - name: check the newly scheduled application pod status
+          shell: kubectl get pod {{ new_app_pod }} -n {{ namespace }} --no-headers -o custom-columns=:.status.phase
+          args:
+            executable: /bin/bash
+          register: new_app_pod_status
+          until: "'Running' in new_app_pod_status.stdout"
+          delay: 5
+          retries: 50
+
+        - block:
+
+            - name: Obtain the rescheduled pod name
+              shell: >
+                kubectl get pods -n {{ namespace }} -l {{ label }} --no-headers
+                -o custom-columns=:metadata.name
+              args:
+                executable: /bin/bash
+              register: rescheduled_app_pod
+
+            - name: Verify application data persistence
+              include: "{{ data_consistency_util_path }}"
+              vars:
+                status: 'VERIFY'
+                ns: "{{ namespace }}"
+                pod_name: "{{ rescheduled_app_pod.stdout }}"
+
+          when: data_persistence != ''
+
+        - set_fact:
+            flag: "Pass"
+
+      rescue:
+        - set_fact:
+            flag: "Fail"
+
+      always:
+
+        - include_tasks: /e2e-tests/hack/update_e2e_result_resource.yml
+          vars:
+            status: 'EOT'
--- a/e2e-tests/experiments/infra-chaos/node_failure/test_vars.yml
+++ b/e2e-tests/experiments/infra-chaos/node_failure/test_vars.yml
@ -0,0 +1,24 @@
+---
+# Test specific parameters
+
+test_name: node-failure
+
+namespace: "{{ lookup('env','APP_NAMESPACE') }}"
+
+pvc: "{{ lookup('env','APP_PVC') }}"
+
+label: "{{ lookup('env','APP_LABEL') }}"
+
+host_ip: "{{ lookup('env','ESX_HOST_IP') }}"
+
+esx_pwd: "{{ lookup('env','ESX_PASSWORD') }}"
+
+data_persistence: "{{ lookup('env','DATA_PERSISTENCE') }}"
+
+zfs_operator_ns: "{{ lookup('env','ZFS_OPERATOR_NAMESPACE') }}"
+
+user: "{{ lookup('env','USERNAME') }}"
+
+zpool_name: "{{ lookup('env','ZPOOL_NAME') }}"
+
+node_pwd: "{{ lookup('env','NODE_PASSWORD') }}"