feat(e2e-test): Add e2e-tests for zfs-localpv (#298)

Signed-off-by: w3aman <aman.gupta@mayadata.io>
This commit is contained in:
Aman Gupta 2021-06-09 21:21:39 +05:30 committed by GitHub
parent 53f872fcf1
commit 4e73638b5a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
137 changed files with 8745 additions and 0 deletions

View file

@ -0,0 +1,63 @@
## About this experiment
This experiment validates the stability and fault-tolerance of application pod consuming zfs-localpv storage. In this test chaos is induced on the container of application pod using pumba chaos utils. Basically it is used as a disruptive test, to cause loss of access to storage by failing the application pod and later it tests the recovery workflow of the application pod.
## Supported platforms:
K8s : 1.18+
OS : Ubuntu, CentOS
ZFS : 0.7, 0.8
## Entry-Criteria
- One application should be deployed consuming zfs-localpv storage.
- Application services are accessible & pods are healthy
- Application writes are successful
- zfs-controller and csi node-agent daemonset pods should be in running state.
## Exit-Criteria
- Application services are accessible & pods are healthy
- Data written prior to chaos is successfully retrieved/read
- Data consistency is maintained as per integrity check utils
- Storage target pods are healthy
## Steps performed
- Get the application pod name and check its Running status
- Dump some dummy data into the application mount point to check data consistency after chaos injection.
- Create a daemonset of pumba utils and get the name of the pod scheduled on the same node as of application pod. Utils used in this test is located at `e2e-tests/chaoslib/pumba` directory.
- Now using SIGKILL command via pumba pod disrupt the access of application container to the storage. And now in recovery process container restarts.
- Check the container restart count to validate successful chaos injection.
- Validate the data consistency by checking the md5sum of test data.
- Delete the pumba daemonset.
## How to run
- This experiment accepts the parameters in form of kubernetes job environmental variables.
- For running this experiment of application pod failure, clone openens/zfs-localpv[https://github.com/openebs/zfs-localpv] repo and then first apply rbac and crds for e2e-framework.
```
kubectl apply -f zfs-localpv/e2e-tests/hack/rbac.yaml
kubectl apply -f zfs-localpv/e2e-tests/hack/crds.yaml
```
then update the needed test specific values in run_e2e_test.yml file and create the kubernetes job.
```
kubectl create -f run_e2e_test.yml
```
All the env variables description is provided with the comments in the same file.
After creating kubernetes job, when the jobs pod is instantiated, we can see the logs of that pod which is executing the test-case.
```
kubectl get pods -n e2e
kubectl logs -f <application-pod-failure-xxxxx-xxxxx> -n e2e
```
To get the test-case result, get the corresponding e2e custom-resource `e2eresult` (short name: e2er ) and check its phase (Running or Completed) and result (Pass or Fail).
```
kubectl get e2er
kubectl get e2er application-pod-failure -n e2e --no-headers -o custom-columns=:.spec.testStatus.phase
kubectl get e2er application-pod-failure -n e2e --no-headers -o custom-columns=:.spec.testStatus.result
```

View file

@ -0,0 +1,5 @@
{% if data_persistence is defined and data_persistence == 'mysql' %}
consistencyutil: /e2e-tests/utils/applications/mysql/mysql_data_persistence.yml
{% elif data_persistence is defined and data_persistence == 'busybox' %}
consistencyutil: /e2e-tests/utils/applications/busybox/busybox_data_persistence.yml
{% endif %}

View file

@ -0,0 +1,57 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: app-pod-failure
namespace: e2e
data:
parameters.yml: |
---
apiVersion: batch/v1
kind: Job
metadata:
generateName: application-pod-failure-
namespace: e2e
spec:
template:
metadata:
labels:
name: application-pod-failure
spec:
serviceAccountName: e2e
restartPolicy: Never
containers:
- name: ansibletest
image: openebs/zfs-localpv-e2e:ci
imagePullPolicy: IfNotPresent
env:
- name: ANSIBLE_STDOUT_CALLBACK
value: default
# This is the namespace where application pod is deployed
# on which we have to perform this pod-failure chaos
- name: APP_NAMESPACE
value: ''
# Application pod label
- name: APP_LABEL
value: ''
# Specify the container runtime used , to pick the relevant chaos util
- name: CONTAINER_RUNTIME
value: docker
#check if the data is consistent. Currently supported values are 'mysql' and 'busybox'
- name: DATA_PERSISTENCE
value: ""
command: ["/bin/bash"]
args: ["-c", "ansible-playbook ./e2e-tests/experiments/chaos/app_pod_failure/test.yml -i /etc/ansible/hosts -vv; exit 0"]
volumeMounts:
- name: parameters
mountPath: /mnt/
volumes:
- name: parameters
configMap:
name: app-pod-failure

View file

@ -0,0 +1,105 @@
---
- hosts: localhost
connection: local
gather_facts: False
vars_files:
- test_vars.yml
- /mnt/parameters.yml
tasks:
- block:
## Generating the testname for application pod failure chaos test
- include_tasks: /e2e-tests/hack/create_testname.yml
## Record SOT (start of test) in e2e result e2e-cr (e2e-custom-resource)
- include_tasks: /e2e-tests/hack/update_e2e_result_resource.yml
vars:
status: 'SOT'
- name: Identify the data consistency util to be invoked
template:
src: data_persistence.j2
dest: data_persistence.yml
- include_vars:
file: data_persistence.yml
- name: Record the data consistency util path
set_fact:
data_consistency_util_path: "{{ consistencyutil }}"
when: data_persistence != ''
- name: Display the app information passed via the test job
debug:
msg:
- "The application info is as follows:"
- "Namespace : {{ app_ns }}"
- "Label : {{ app_label }}"
- block:
- name: Get application pod name
shell: >
kubectl get pods -n {{ app_ns }} -l {{ app_label }} --no-headers
-o=custom-columns=NAME:".metadata.name" | shuf -n 1
args:
executable: /bin/bash
register: app_pod_name
- name: Check that application pod is in running state
shell: >
kubectl get pod {{ app_pod_name.stdout }} -n {{ app_ns }}
--no-headers -o custom-columns=:.status.phase
args:
executable: /bin/bash
register: pod_status
failed_when: "pod_status.stdout != 'Running'"
- name: Create some test data
include: "{{ data_consistency_util_path }}"
vars:
status: 'LOAD'
ns: "{{ app_ns }}"
pod_name: "{{ app_pod_name.stdout }}"
when: data_persistence != ''
## APPLICATION FAULT INJECTION
- include_tasks: /e2e-tests/chaoslib/pumba/pod_failure_by_sigkill.yaml
vars:
action: "killapp"
app_pod: "{{ app_pod_name.stdout }}"
namespace: "{{ app_ns }}"
label: "{{ app_label }}"
- name: Verify application data persistence
include: "{{ data_consistency_util_path }}"
vars:
status: 'VERIFY'
ns: "{{ app_ns }}"
label: "{{ app_label }}"
pod_name: "{{ app_pod_name.stdout }}"
when: data_persistence != ''
- set_fact:
flag: "Pass"
rescue:
- set_fact:
flag: "Fail"
always:
## RECORD END-OF-TEST IN e2e RESULT CR
- include_tasks: /e2e-tests/hack/update_e2e_result_resource.yml
vars:
status: 'EOT'
chaostype: ""
app: ""
- include_tasks: /e2e-tests/chaoslib/pumba/pod_failure_by_sigkill.yaml
vars:
action: "deletepumba"
namespace: "{{ app_ns }}"

View file

@ -0,0 +1,9 @@
test_name: application-pod-failure
app_ns: "{{ lookup('env','APP_NAMESPACE') }}"
app_label: "{{ lookup('env','APP_LABEL') }}"
cri: "{{ lookup('env','CONTAINER_RUNTIME') }}"
data_persistence: "{{ lookup('env','DATA_PERSISTENCE') }}"