---
- hosts: yourhostshere
  name: Ceph Health Check After Node Reboot
  any_errors_fatal: true
  serial: 1
  tasks:
    - name: Check Ceph PG State
      command: ceph pg stat --format=json
      register: ceph_pg_stat
      until: >
        (ceph_pg_stat.stdout | from_json).pg_summary.num_pg_by_state | selectattr("name", "equalto", "active+clean") | map(attribute="num") | first == (ceph_pg_stat.stdout | from_json).pg_summary.num_pgs
      retries: 60
      delay: 10

    - name: Check Ceph Cluster Health
      command: ceph health --format=json
      register: ceph_health
      until: >
        (ceph_health.stdout | from_json).status == 'HEALTH_OK'
      retries: 60
      delay: 10

    - name: Get OSD status on current node
      command: ceph osd tree --format=json
      register: ceph_osd_tree

    - name: Ensure all OSDs are 'up' and 'in'
      assert:
        that:
          - "item.status == 'up'"
          - "item.reweight > 0"
        fail_msg: "OSD {{ item.id }} named {{ item.name }} is not in 'up' state or has a reweight value not greater than 0."
        success_msg: "All OSDs are in 'up' state and have reweight value greater than 0."
      loop: "{{ ceph_osd_tree.stdout | from_json | json_query('nodes[?type==`osd`]') }}"
      loop_control:
        label: "{{ item.id }}"

    - name: Check Ceph MGR status
      command: ceph mgr dump --format=json
      register: ceph_mgr_dump
      until: >
        (ceph_mgr_dump.stdout | from_json).active_name is defined and (ceph_mgr_dump.stdout | from_json).active_name != ""
      retries: 60
      delay: 10