--- - hosts: yourhostshere name: Ceph Health Check After Node Reboot any_errors_fatal: true serial: 1 tasks: - name: Check Ceph PG State command: ceph pg stat --format=json register: ceph_pg_stat until: > (ceph_pg_stat.stdout | from_json).pg_summary.num_pg_by_state | selectattr("name", "equalto", "active+clean") | map(attribute="num") | first == (ceph_pg_stat.stdout | from_json).pg_summary.num_pgs retries: 60 delay: 10 - name: Check Ceph Cluster Health command: ceph health --format=json register: ceph_health until: > (ceph_health.stdout | from_json).status == 'HEALTH_OK' retries: 60 delay: 10 - name: Get OSD status on current node command: ceph osd tree --format=json register: ceph_osd_tree - name: Ensure all OSDs are 'up' and 'in' assert: that: - "item.status == 'up'" - "item.reweight > 0" fail_msg: "OSD {{ item.id }} named {{ item.name }} is not in 'up' state or has a reweight value not greater than 0." success_msg: "All OSDs are in 'up' state and have reweight value greater than 0." loop: "{{ ceph_osd_tree.stdout | from_json | json_query('nodes[?type==`osd`]') }}" loop_control: label: "{{ item.id }}" - name: Check Ceph MGR status command: ceph mgr dump --format=json register: ceph_mgr_dump until: > (ceph_mgr_dump.stdout | from_json).active_name is defined and (ceph_mgr_dump.stdout | from_json).active_name != "" retries: 60 delay: 10