feat(e2e-test): Add e2e-tests for zfs-localpv (#298)

Signed-off-by: w3aman <aman.gupta@mayadata.io>
This commit is contained in:
Aman Gupta 2021-06-09 21:21:39 +05:30 committed by GitHub
parent 53f872fcf1
commit 4e73638b5a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
137 changed files with 8745 additions and 0 deletions

View file

@ -0,0 +1,85 @@
## Experiment Metadata
| Type | Description | Storage | K8s Platform |
| ----- | ------------------------------------------------------------ | ------- | ----------------- |
| Chaos | Power off the node where application pod is hosted and observe application behavior | OpenEBS | on-premise-VMware |
## Entry-Criteria
- Application services are accessible & pods are healthy
- Application writes are successful
## Exit-Criteria
- Application pod should be evicted and rescheduled on other node.
- Data written prior to chaos is successfully retrieved/read
- Database consistency is maintained as per db integrity check utils
- Storage target pods are healthy
### Notes
- Typically used as a disruptive test, to cause loss of access to storage target by killing the node where application pod is scheduled.
- The container should be created again and it should be healthy.
## Associated Utils
- `vm_power_operations.yml`,`mysql_data_persistence.yml`,`busybox_data_persistence.yml`
### Procedure
This scenario validates the behaviour of application and OpenEBS persistent volumes in the amidst of chaos induced on the node where the application pod is scheduled. It is performed by shutting down the node(virtual machine) created on VMware hypervisor. After attaining podevictiontimeout(5 minutes by default), the application pod is expected to be scheduled on other available node. Due to abrupt shutdown, the old application pod still remain in unknown state. As an impact, volume mount in the newly scheduled pod fails due to multi-attach error. As a workaround for this, the node CR will be deleted which kills the old pod. Then, the application pod is expected to run successfully after 5 minutes.
Based on the value of env `DATA_PERSISTENCE`, the corresponding data consistency util will be executed. At present, only busybox and percona-mysql are supported. Along with specifying env in the litmus experiment, user needs to pass name for configmap and the data consistency specific parameters required via configmap in the format as follows:
```
parameters.yml: |
blocksize: 4k
blockcount: 1024
testfile: difiletest
```
It is recommended to pass test-name for configmap and mount the corresponding configmap as volume in the litmus pod. The above snippet holds the parameters required for validation data consistency in busybox application.
For percona-mysql, the following parameters are to be injected into configmap.
```
parameters.yml: |
dbuser: root
dbpassword: k8sDem0
dbname: tdb
```
The configmap data will be utilised by litmus experiments as its variables while executing the scenario.
Based on the data provided, litmus checks if the data is consistent after recovering from induced chaos.
ESX password has to updated through k8s secret created. The litmus runner can retrieve the password from secret as environmental variable and utilize it for performing admin operations on the server.
Note: To perform admin operatons on vmware, the VM display name in hypervisor should match its hostname.
## Litmus experiment Environment Variables
### Application
| Parameter | Description |
| ---------------- | ------------------------------------------------------------ |
| APP_NAMESPACE | Namespace in which application pods are deployed |
| APP_LABEL | Unique Labels in `key=value` format of application deployment |
| APP_PVC | Name of persistent volume claim used for app's volume mounts |
| TARGET_NAMESPACE | Namespace where OpenEBS is installed |
| DATA_PERSISTENCE | Specify the application name against which data consistency has to be ensured. Example: busybox |
### Chaos
| Parameter | Description |
| ------------ | ------------------------------------------------------------ |
| PLATFORM | The platform where k8s cluster is created. Currently, only 'vmware' is supported. |
| ESX_HOST_IP | The IP address of ESX server where the virtual machines are hosted. |
| ESX_PASSWORD | To be passed as configmap data. |

View file

@ -0,0 +1,5 @@
{% if data_persistence is defined and data_persistence == 'mysql' %}
consistencyutil: /e2e-tests/utils/applications/mysql/mysql_data_persistence.yml
{% elif data_persistence is defined and data_persistence == 'busybox' %}
consistencyutil: /e2e-tests/utils/applications/busybox/busybox_data_persistence.yml
{% endif %}

View file

@ -0,0 +1,108 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: node-failure
namespace: e2e
data:
parameters.yml: |
---
apiVersion: v1
kind: Secret
metadata:
name: host-password
namespace: e2e
type: Opaque
data:
password:
---
apiVersion: v1
kind: Secret
metadata:
name: node-password
namespace: e2e
type: Opaque
data:
passwordNode:
---
apiVersion: batch/v1
kind: Job
metadata:
generateName: node-failure-
namespace: e2e
spec:
template:
metadata:
labels:
test: node-failure
spec:
serviceAccountName: e2e
restartPolicy: Never
#nodeSelector:
# kubernetes.io/hostname:
tolerations:
- key: "infra-aid"
operator: "Equal"
value: "observer"
effect: "NoSchedule"
containers:
- name: ansibletest
image: openebs/zfs-localpv-e2e:ci
imagePullPolicy: IfNotPresent
env:
- name: ANSIBLE_STDOUT_CALLBACK
value: default
- name: APP_NAMESPACE
value: ''
- name: APP_LABEL
value: ''
- name: APP_PVC
value: ''
# The IP address of ESX HOST
- name: ESX_HOST_IP
value: ""
- name: ZFS_OPERATOR_NAMESPACE
value: ''
- name: USERNAME
value: ''
- name: ZPOOL_NAME
value: ''
- name: ESX_PASSWORD
valueFrom:
secretKeyRef:
name: host-password
key: password
- name: NODE_PASSWORD
valueFrom:
secretKeyRef:
name: node-password
key: passwordNode
- name: DATA_PERSISTENCE
value: ""
command: ["/bin/bash"]
args: ["-c", "ANSIBLE_LOCAL_TEMP=$HOME/.ansible/tmp ANSIBLE_REMOTE_TEMP=$HOME/.ansible/tmp ansible-playbook ./e2e-tests/experiments/infra-chaos/node_failure/test.yml -i /etc/ansible/hosts -vv; exit 0"]
volumeMounts:
- name: parameters
mountPath: /mnt/
volumes:
- name: parameters
configMap:
name: node-failure

View file

@ -0,0 +1,236 @@
---
- hosts: localhost
connection: local
gather_facts: False
vars_files:
- test_vars.yml
- /mnt/parameters.yml
tasks:
- block:
## Generating the testname for node failure chaos test
- include_tasks: /e2e-tests/hack/create_testname.yml
## Record SOT (start of test) in e2e result e2e-cr (e2e-custom-resource)
- include_tasks: /e2e-tests/hack/update_e2e_result_resource.yml
vars:
status: 'SOT'
- name: Identify the data consistency util to be invoked
template:
src: data_persistence.j2
dest: data_persistence.yml
- include_vars:
file: data_persistence.yml
- name: Record the data consistency util path
set_fact:
data_consistency_util_path: "{{ consistencyutil }}"
when: data_persistence != ''
- name: Get application pod name
shell: >
kubectl get pod -n {{ namespace }} -l {{ label }} --no-headers
-o=custom-columns=NAME:".metadata.name"
args:
executable: /bin/bash
register: app_pod_name
- name: Record the application pod name
set_fact:
application_pod: "{{ app_pod_name.stdout }}"
- name: Obtain PVC name from the application mount
shell: >
kubectl get pods "{{ app_pod_name.stdout }}" -n "{{ namespace }}"
-o custom-columns=:.spec.volumes[*].persistentVolumeClaim.claimName --no-headers
args:
executable: /bin/bash
register: pvc
- name: Obtain the Persistent Volume name
shell: >
kubectl get pvc "{{ pvc.stdout }}" -n "{{ namespace }}" --no-headers
-o custom-columns=:.spec.volumeName
args:
executable: /bin/bash
register: pv
failed_when: 'pv.stdout == ""'
- name: Record the pv name
set_fact:
pv_name: "{{ pv.stdout }}"
## Generate dummy test data on the application
- name: Generate data on the specified application.
include: "{{ data_consistency_util_path }}"
vars:
status: 'LOAD'
ns: "{{ namespace }}"
pod_name: "{{ app_pod_name.stdout }}"
when: data_persistence != ''
## Obtain the node name where application pod is running
- name: Get Application pod Node to perform chaos
shell: >
kubectl get pod {{ app_pod_name.stdout }} -n {{ namespace }}
--no-headers -o custom-columns=:spec.nodeName
args:
executable: /bin/bash
register: app_node
- name: Record the application pod node name
set_fact:
app_node_name: "{{ app_node.stdout }}"
## Execute the chaos util to turn off the target node
- include_tasks: "/e2e-tests/chaoslib/vmware_chaos/vm_power_operations.yml"
vars:
esx_ip: "{{ host_ip }}"
target_node: "{{ app_node.stdout }}"
operation: "off"
- name: Check the node status
shell: kubectl get nodes {{ app_node.stdout }} --no-headers
args:
executable: /bin/bash
register: state
until: "'NotReady' in state.stdout"
delay: 15
retries: 30
- name: Check if the new application pod is scheduled after node failure
shell: >
kubectl get pods -n {{ namespace }} -l {{ label }} --no-headers | wc -l
args:
executable: /bin/bash
register: app_pod_count
until: "'2' in app_pod_count.stdout"
delay: 15
retries: 30
- name: Get the new application pod name
shell: >
kubectl get pod -n {{ namespace }} -l {{ label }} --no-headers | grep -v Terminating | awk '{print $1}'
args:
executable: /bin/bash
register: new_app_pod_name
- name: Record the new application pod name
set_fact:
new_app_pod: "{{ new_app_pod_name.stdout }}"
- name: Check for the newly created application pod status
shell: >
kubectl get pod {{ new_app_pod }} -n {{ namespace }} --no-headers -o custom-columns=:.status.phase
args:
executable: /bin/bash
register: new_app_pod_status
failed_when: "'Pending' not in new_app_pod_status.stdout"
- include_tasks: "/e2e-tests/chaoslib/vmware_chaos/vm_power_operations.yml"
vars:
esx_ip: "{{ host_ip }}"
target_node: "{{ app_node_name }}"
operation: "on"
- name: Check the node status
shell: kubectl get node {{ app_node_name }} --no-headers
args:
executable: /bin/bash
register: node_status
until: "'NotReady' not in node_status.stdout"
delay: 10
retries: 30
- name: verify that previous application pod is successfully deleted
shell: kubectl get pod -n {{ namespace }} -l {{ label }} --no-headers
args:
executable: /bin/bash
register: app_pod_status
until: "'{{ application_pod }}' not in app_pod_status.stdout"
delay: 5
retries: 40
- name: Get the IP Address of the node on which application pod is scheduled
shell: >
kubectl get nodes {{ app_node_name }} --no-headers -o jsonpath='{.status.addresses[0].address}'
args:
executable: /bin/bash
register: node_ip_address
- name: Record the IP Address of the node on which application pod is scheduled
set_fact:
node_ip_add: "{{ node_ip_address.stdout }}"
- name: Check if zpool is present
shell: >
sshpass -p {{ node_pwd }} ssh -o StrictHostKeyChecking=no {{ user }}@{{ node_ip_add }} "zpool list"
args:
executable: /bin/bash
register: zpool_status
- name: Import the zpool after turning on the VM's
shell: >
sshpass -p {{ node_pwd }} ssh -o StrictHostKeyChecking=no {{ user }}@{{ node_ip_add }}
"echo {{ node_pwd }} | sudo -S su -c 'zpool import -f {{ zpool_name }}'"
args:
executable: /bin/bash
register: status
failed_when: "status.rc != 0"
when: "'{{ zpool_name }}' not in zpool_status.stdout"
- name: verify that zfs dataset is available now
shell: >
sshpass -p {{ node_pwd }} ssh -o StrictHostKeyChecking=no {{ user }}@{{ node_ip_add }} "zfs list"
args:
executable: /bin/bash
register: zfs_dataset
until: "'{{ zpool_name }}/{{ pv_name }}' in zfs_dataset.stdout"
delay: 10
retries: 30
- name: check the newly scheduled application pod status
shell: kubectl get pod {{ new_app_pod }} -n {{ namespace }} --no-headers -o custom-columns=:.status.phase
args:
executable: /bin/bash
register: new_app_pod_status
until: "'Running' in new_app_pod_status.stdout"
delay: 5
retries: 50
- block:
- name: Obtain the rescheduled pod name
shell: >
kubectl get pods -n {{ namespace }} -l {{ label }} --no-headers
-o custom-columns=:metadata.name
args:
executable: /bin/bash
register: rescheduled_app_pod
- name: Verify application data persistence
include: "{{ data_consistency_util_path }}"
vars:
status: 'VERIFY'
ns: "{{ namespace }}"
pod_name: "{{ rescheduled_app_pod.stdout }}"
when: data_persistence != ''
- set_fact:
flag: "Pass"
rescue:
- set_fact:
flag: "Fail"
always:
- include_tasks: /e2e-tests/hack/update_e2e_result_resource.yml
vars:
status: 'EOT'

View file

@ -0,0 +1,24 @@
---
# Test specific parameters
test_name: node-failure
namespace: "{{ lookup('env','APP_NAMESPACE') }}"
pvc: "{{ lookup('env','APP_PVC') }}"
label: "{{ lookup('env','APP_LABEL') }}"
host_ip: "{{ lookup('env','ESX_HOST_IP') }}"
esx_pwd: "{{ lookup('env','ESX_PASSWORD') }}"
data_persistence: "{{ lookup('env','DATA_PERSISTENCE') }}"
zfs_operator_ns: "{{ lookup('env','ZFS_OPERATOR_NAMESPACE') }}"
user: "{{ lookup('env','USERNAME') }}"
zpool_name: "{{ lookup('env','ZPOOL_NAME') }}"
node_pwd: "{{ lookup('env','NODE_PASSWORD') }}"