#!/bin/bash set -euo pipefail KUBE_OVN_NS=kube-system WITHOUT_KUBE_PROXY=${WITHOUT_KUBE_PROXY:-false} OVN_NB_POD= OVN_SB_POD= OVN_IC_NB_POD= OVN_IC_SB_POD= KUBE_OVN_VERSION= REGISTRY="kubeovn" OVN_NORTHD_POD= PERF_TIMES=5 PERF_LABEL="PerfTest" CONN_CHECK_LABEL="conn-check" CONN_CHECK_SERVER="conn-check-server" PERF_GC_COMMAND=() LAST_PERF_FAILED_LOG="" showHelp(){ echo "kubectl ko {subcommand} [option...]" echo "Available Subcommands:" echo " [nb|sb] [status|kick|backup|dbstatus|restore] ovn-db operations show cluster status, kick stale server, backup database, get db consistency status or restore ovn nb db when met 'inconsistent data' error" echo " nbctl [ovn-nbctl options ...] invoke ovn-nbctl" echo " sbctl [ovn-sbctl options ...] invoke ovn-sbctl" echo " vsctl {nodeName} [ovs-vsctl options ...] invoke ovs-vsctl on the specified node" echo " ofctl {nodeName} [ovs-ofctl options ...] invoke ovs-ofctl on the specified node" echo " dpctl {nodeName} [ovs-dpctl options ...] invoke ovs-dpctl on the specified node" echo " appctl {nodeName} [ovs-appctl options ...] invoke ovs-appctl on the specified node" echo " tcpdump {namespace/podname} [tcpdump options ...] capture pod traffic" echo " trace ... trace ovn microflow of specific packet" echo " trace {namespace/podname} {target ip address} [target mac address] {icmp|tcp|udp} [target tcp/udp port] trace ICMP/TCP/UDP" echo " trace {namespace/podname} {target ip address} [target mac address] arp {request|reply} trace ARP request/reply" echo " diagnose {all|node|subnet|IPPorts} [nodename|subnetName|{proto1}-{IP1}-{Port1},{proto2}-{IP2}-{Port2}] diagnose connectivity of all nodes or a specific node or specify subnet's ds pod or IPPorts like 'tcp-172.18.0.2-53,udp-172.18.0.3-53'" echo " env-check check the environment configuration" echo " tuning {install-fastpath|local-install-fastpath|remove-fastpath|install-stt|local-install-stt|remove-stt} {centos7|centos8}} [kernel-devel-version] deploy kernel optimisation components to the system" echo " reload restart all kube-ovn components" echo " perf [image] performance test default image is kubeovn/test:v1.12.0" echo " icnbctl [ovn-nbctl options ...] invoke ovn-ic-nbctl" echo " icsbctl [ovn-sbctl options ...] invoke ovn-ic-sbctl" } # usage: ipv4_to_hex 192.168.0.1 ipv4_to_hex(){ printf "%02x" ${1//./ } } # convert hex to dec (portable version) hex2dec(){ for i in $(echo "$@"); do printf "%d\n" "$(( 0x$i ))" done } # https://github.com/chmduquesne/wg-ip # usage: expand_ipv6 2001::1 expand_ipv6(){ local ip=$1 # prepend 0 if we start with : echo $ip | grep -qs "^:" && ip="0${ip}" # expand :: if echo $ip | grep -qs "::"; then local colons=$(echo $ip | sed 's/[^:]//g') local missing=$(echo ":::::::::" | sed "s/$colons//") local expanded=$(echo $missing | sed 's/:/:0/g') ip=$(echo $ip | sed "s/::/$expanded/") fi local blocks=$(echo $ip | grep -o "[0-9a-f]\+") set $blocks printf "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n" \ $(hex2dec $@) } # convert an IPv6 address to bytes ipv6_bytes(){ for x in $(expand_ipv6 $1 | tr ':' ' '); do printf "%d %d " $((0x$x >> 8 & 0xff)) $((0x$x & 0xff)) done echo } # usage: ipIsInCidr 192.168.0.1 192.168.0.0/24 # return: 0 for true, 1 for false ipIsInCidr(){ local ip=$1 local cidr=$2 if [[ $ip =~ .*:.* ]]; then # IPv6 cidr=${cidr#*,} local network=${cidr%/*} local prefix=${cidr#*/} local ip_bytes=($(ipv6_bytes $ip)) local network_bytes=($(ipv6_bytes $network)) for ((i=0; i<${#ip_bytes[*]}; i++)); do if [ ${ip_bytes[$i]} -eq ${network_bytes[$i]} ]; then continue fi if [ $((($i+1)*8)) -le $prefix ]; then return 1 fi if [ $(($i*8)) -ge $prefix ]; then return 0 fi if [ $((($i+1)*8)) -le $prefix ]; then return 1 fi local bits=$(($prefix-$i*8)) local mask=$((0xff<<$bits & 0xff)) # TODO: check whether the IP is network/broadcast address if [ $((${ip_bytes[$i]} & $mask)) -ne ${network_bytes[$i]} ]; then return 1 fi done return 0 fi # IPv4 cidr=${cidr%,*} local network=${cidr%/*} local prefix=${cidr#*/} local ip_hex=$(ipv4_to_hex $ip) local ip_dec=$((0x$ip_hex)) local network_hex=$(ipv4_to_hex $network) local network_dec=$((0x$network_hex)) local broadcast_dec=$(($network_dec + 2**(32-$prefix) - 1)) # TODO: check whether the IP is network/broadcast address if [ $ip_dec -gt $network_dec -a $ip_dec -lt $broadcast_dec ]; then return 0 fi return 1 } tcpdump(){ namespacedPod="$1"; shift namespace=$(echo "$namespacedPod" | cut -d "/" -f1) podName=$(echo "$namespacedPod" | cut -d "/" -f2) if [ "$podName" = "$namespacedPod" ]; then namespace="default" fi nodeName=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.spec.nodeName}) hostNetwork=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.spec.hostNetwork}) if [ -z "$nodeName" ]; then echo "Pod $namespacedPod not exists on any node" exit 1 fi ovnCni=$(kubectl get pod -n $KUBE_OVN_NS -l app=kube-ovn-cni -o 'jsonpath={.items[?(@.spec.nodeName=="'$nodeName'")].metadata.name}') if [ -z "$ovnCni" ]; then echo "kube-ovn-cni not exist on node $nodeName" exit 1 fi if [ "$hostNetwork" = "true" ]; then set -x kubectl exec "$ovnCni" -n $KUBE_OVN_NS -- tcpdump -nn "$@" else nicName=$(kubectl exec "$ovnCni" -n $KUBE_OVN_NS -- ovs-vsctl --data=bare --no-heading --columns=name find interface external-ids:iface-id="$podName"."$namespace" | tr -d '\r') if [ -z "$nicName" ]; then echo "nic doesn't exist on node $nodeName" exit 1 fi podNicType=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.metadata.annotations.ovn\\.kubernetes\\.io/pod_nic_type}) podNetNs=$(kubectl exec "$ovnCni" -n $KUBE_OVN_NS -- ovs-vsctl --data=bare --no-heading get interface "$nicName" external-ids:pod_netns | tr -d '\r' | sed -e 's/^"//' -e 's/"$//') set -x if [ "$podNicType" = "internal-port" ]; then kubectl exec "$ovnCni" -n $KUBE_OVN_NS -- nsenter --net="$podNetNs" tcpdump -nn -i "$nicName" "$@" else kubectl exec "$ovnCni" -n $KUBE_OVN_NS -- nsenter --net="$podNetNs" tcpdump -nn -i eth0 "$@" fi fi } trace(){ set +u namespacedPod="$1" namespace=$(echo "$namespacedPod" | cut -d "/" -f1) podName=$(echo "$namespacedPod" | cut -d "/" -f2) if [ "$podName" = "$namespacedPod" ]; then namespace="default" fi dst="$2" if [ -z "$dst" ]; then echo "Error: missing target IP address" exit 1 fi hostNetwork=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.spec.hostNetwork}) if [ "$hostNetwork" = "true" ]; then echo "Can not trace host network pod" exit 1 fi af="4" nw="nw" proto="" if [[ "$dst" =~ .*:.* ]]; then af="6" nw="ipv6" proto="6" fi dstMac="" if echo "$3" | grep -qE '^([[:xdigit:]]{1,2}:){5}[[:xdigit:]]{1,2}$'; then dstMac=$3 shift fi type="$3" if [ -z "$type" ]; then echo "Error: missing protocol" echo "Usage:" echo " kubectl ko trace {namespace/podname} {target ip address} [target mac address] {icmp|tcp|udp} [target tcp/udp port]" echo " kubectl ko trace {namespace/podname} {target ip address} [target mac address] arp {request|reply}" exit 1 fi if [ "$type" = "arp" ]; then if [ $af -eq 6 ]; then echo "Error: invalid target IP address: $dst" exit 1 fi fi podIPs=($(kubectl get pod "$podName" -n "$namespace" -o jsonpath="{.status.podIPs[*].ip}")) if [ ${#podIPs[@]} -eq 0 ]; then podIPs=($(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.metadata.annotations.ovn\\.kubernetes\\.io/ip_address} | sed 's/,/ /g')) if [ ${#podIPs[@]} -eq 0 ]; then echo "Error: Pod address not ready" exit 1 fi fi podIP="" for ip in ${podIPs[@]}; do if [ "$af" = "4" ]; then if [[ ! "$ip" =~ .*:.* ]]; then podIP=$ip break fi elif [[ "$ip" =~ .*:.* ]]; then podIP=$ip break fi done if [ -z "$podIP" ]; then echo "Error: Pod $namespacedPod has no IPv$af address" exit 1 fi nodeName=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.spec.nodeName}) ovnCni=$(kubectl get pod -n $KUBE_OVN_NS -l app=kube-ovn-cni -o 'jsonpath={.items[?(@.spec.nodeName=="'$nodeName'")].metadata.name}') if [ -z "$ovnCni" ]; then echo "Error: no kube-ovn-cni Pod running on node $nodeName" exit 1 fi ls=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.metadata.annotations.ovn\\.kubernetes\\.io/logical_switch}) if [ -z "$ls" ]; then echo "Error: Pod address not ready" exit 1 fi local cidr=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.metadata.annotations.ovn\\.kubernetes\\.io/cidr}) mac=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.metadata.annotations.ovn\\.kubernetes\\.io/mac_address}) if [ "$type" != "arp" -o "$4" = "reply" ]; then # need destination mac if [ -z "$dstMac" ]; then if ipIsInCidr $dst $cidr; then set +o pipefail if [ $af -eq 4 ]; then dstMac=$(kubectl exec $OVN_NB_POD -n $KUBE_OVN_NS -c ovn-central -- ovn-nbctl --data=bare --no-heading --columns=addresses list logical_switch_port | grep -w "$(echo $dst | tr . '\.')" | awk '{print $1}') else dstMac=$(kubectl exec $OVN_NB_POD -n $KUBE_OVN_NS -c ovn-central -- ovn-nbctl --data=bare --no-heading --columns=addresses list logical_switch_port | grep -i " $dst\$" | awk '{print $1}') fi set -o pipefail fi fi if [ -z "$dstMac" ]; then if [ "$type" = "arp" ]; then # destination mac must be set for ARP reply echo "Error: missing destination mac address" exit 1 fi vlan=$(kubectl get subnet "$ls" -o jsonpath={.spec.vlan}) logicalGateway=$(kubectl get subnet "$ls" -o jsonpath={.spec.logicalGateway}) u2oIC=$(kubectl get subnet "$ls" -o jsonpath={.spec.u2oInterconnection}) if [ ! -z "$vlan" -a "$logicalGateway" != "true" -a "$u2oIC" != "true" ]; then gateway=$(kubectl get subnet "$ls" -o jsonpath={.spec.gateway}) if [[ "$gateway" =~ .*,.* ]]; then if [ "$af" = "4" ]; then gateway=${gateway%%,*} else gateway=${gateway##*,} fi fi nicName=$(kubectl exec "$ovnCni" -c cni-server -n $KUBE_OVN_NS -- ovs-vsctl --data=bare --no-heading --columns=name find interface external-ids:iface-id="$podName"."$namespace" | tr -d '\r') if [ -z "$nicName" ]; then echo "Error: failed to find ovs interface for Pod namespacedPod on node $nodeName" exit 1 fi podNicType=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.metadata.annotations.ovn\\.kubernetes\\.io/pod_nic_type}) podNetNs=$(kubectl exec "$ovnCni" -c cni-server -n $KUBE_OVN_NS -- ovs-vsctl --data=bare --no-heading get interface "$nicName" external-ids:pod_netns | tr -d '\r' | sed -e 's/^"//' -e 's/"$//') if [ "$podNicType" != "internal-port" ]; then interface=$(kubectl exec "$ovnCni" -c cni-server -n $KUBE_OVN_NS -- ovs-vsctl --format=csv --data=bare --no-heading --columns=name find interface external_id:iface-id="$podName"."$namespace") peer=$(kubectl exec "$ovnCni" -c cni-server -n $KUBE_OVN_NS -- ip link show $interface | grep -oE "^[0-9]+:\\s$interface@if[0-9]+" | awk -F @ '{print $2}') peerIndex=${peer//if/} peer=$(kubectl exec "$ovnCni" -c cni-server -n $KUBE_OVN_NS -- nsenter --net="$podNetNs" ip link show type veth | grep "^$peerIndex:" | awk -F @ '{print $1}') nicName=$(echo $peer | awk '{print $2}') fi set +o pipefail master=$(kubectl exec "$ovnCni" -c cni-server -n $KUBE_OVN_NS -- nsenter --net="$podNetNs" ip link show $nicName | grep -Eo '\smaster\s\w+\s' | awk '{print $2}') set -o pipefail if [ ! -z "$master" ]; then echo "Error: Pod nic $nicName is a slave of $master, please set the destination mac address." exit 1 fi if [[ "$gateway" =~ .*:.* ]]; then cmd="ndisc6 -q $gateway $nicName" output=$(kubectl exec "$ovnCni" -c cni-server -n $KUBE_OVN_NS -- nsenter --net="$podNetNs" ndisc6 -q "$gateway" "$nicName") else cmd="arping -c3 -C1 -i1 -I $nicName $gateway" output=$(kubectl exec "$ovnCni" -c cni-server -n $KUBE_OVN_NS -- nsenter --net="$podNetNs" arping -c3 -C1 -i1 -I "$nicName" "$gateway") fi if [ $? -ne 0 ]; then echo "Error: failed to execute '$cmd' in Pod's netns" exit 1 fi dstMac=$(echo "$output" | grep -oE '([[:xdigit:]]{1,2}:){5}[[:xdigit:]]{1,2}') fi fi if [ -z "$dstMac" ]; then echo "Using the logical gateway mac address as destination" lr=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.metadata.annotations.ovn\\.kubernetes\\.io/logical_router}) if [ -z "$lr" ]; then lr=$(kubectl get subnet "$ls" -o jsonpath={.spec.vpc}) fi dstMac=$(kubectl exec $OVN_NB_POD -n $KUBE_OVN_NS -c ovn-central -- ovn-nbctl --data=bare --no-heading --columns=mac find logical_router_port name="$lr-$ls" | tr -d '\r') fi if [ -z "$dstMac" ]; then echo "Error: failed to get destination mac" exit 1 fi fi if [ -z "$dstMac" ]; then # set default destination mac address for ARP request dstMac="ff:ff:ff:ff:ff:ff" fi lsp="$podName.$namespace" lspUUID=$(kubectl exec $OVN_NB_POD -n $KUBE_OVN_NS -c ovn-central -- ovn-nbctl --data=bare --no-heading --columns=_uuid find logical_switch_port name="$lsp") if [ -z "$lspUUID" ]; then echo "Notice: LSP $lsp does not exist" fi vmOwner=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath='{.metadata.ownerReferences[?(@.kind=="VirtualMachineInstance")].name}') if [ ! -z "$vmOwner" ]; then lsp="$vmOwner.$namespace" fi if [ -z "$lsp" ]; then echo "Error: failed to get LSP of Pod $namespace/$podName" exit 1 fi case $type in icmp) icmp_type="" if [ $af -eq 6 ]; then # echo request icmp_type="6.type == 128" fi set -x kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovn-trace --ct=new --ct=new --ct=new --ct=new "$ls" \ "inport == \"$lsp\" && ip.ttl == 255 && icmp$icmp_type && eth.src == $mac && ip$af.src == $podIP && eth.dst == $dstMac && ip$af.dst == $dst" ;; tcp|udp) tcp_flags="" if [ "$type" = "tcp" ]; then # TCP SYN tcp_flags=" && tcp.flags == 2" fi set -x kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovn-trace --ct=new --ct=new --ct=new --ct=new "$ls" \ "inport == \"$lsp\" && ip.ttl == 255 && eth.src == $mac && ip$af.src == $podIP && eth.dst == $dstMac && ip$af.dst == $dst && $type.src == 30000 && $type.dst == $4 $tcp_flags" ;; arp) case "$4" in ""|request) set -x kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovn-trace "$ls" \ "inport == \"$lsp\" && eth.src == $mac && eth.dst == $dstMac && arp.op == 1 && arp.sha == $mac && arp.tha == 00:00:00:00:00:00 && arp.spa == $podIP && arp.tpa == $dst" ;; reply) set -x kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovn-trace "$ls" \ "inport == \"$lsp\" && eth.src == $mac && eth.dst == $dstMac && arp.op == 2 && arp.sha == $mac && arp.tha == $dstMac && arp.spa == $podIP && arp.tpa == $dst" ;; *) echo "Error: invalid ARP type $4" echo "Usage:" echo " kubectl ko trace {namespace/podname} {target ip address} [target mac address] arp {request|reply}" exit 1 ;; esac ;; *) echo "Error: invalid type $type" echo "Usage:" echo " kubectl ko trace {namespace/podname} {target ip address} [target mac address] {icmp|tcp|udp} [target tcp/udp port]" echo " kubectl ko trace {namespace/podname} {target ip address} [target mac address] arp {request|reply}" exit 1 ;; esac set +x echo "--------" echo "Start OVS Tracing" echo "" echo "" inPort=$(kubectl exec "$ovnCni" -c cni-server -n $KUBE_OVN_NS -- ovs-vsctl --format=csv --data=bare --no-heading --columns=ofport find interface external_id:iface-id="$podName"."$namespace") case $type in icmp) set -x kubectl exec "$ovnCni" -c cni-server -n $KUBE_OVN_NS -- ovs-appctl ofproto/trace br-int "in_port=$inPort,icmp$proto,nw_ttl=64,${nw}_src=$podIP,${nw}_dst=$dst,dl_src=$mac,dl_dst=$dstMac" ;; tcp|udp) set -x kubectl exec "$ovnCni" -c cni-server -n $KUBE_OVN_NS -- ovs-appctl ofproto/trace br-int "in_port=$inPort,$type$proto,nw_ttl=64,${nw}_src=$podIP,${nw}_dst=$dst,dl_src=$mac,dl_dst=$dstMac,${type}_src=1000,${type}_dst=$4" ;; arp) case "$4" in ""|request) set -x kubectl exec "$ovnCni" -c cni-server -n $KUBE_OVN_NS -- ovs-appctl ofproto/trace br-int "in_port=$inPort,arp,arp_op=1,dl_src=$mac,dl_dst=$dstMac,arp_spa=$podIP,arp_tpa=$dst,arp_sha=$mac,arp_tha=00:00:00:00:00:00" ;; reply) set -x kubectl exec "$ovnCni" -c cni-server -n $KUBE_OVN_NS -- ovs-appctl ofproto/trace br-int "in_port=$inPort,arp,arp_op=2,dl_src=$mac,dl_dst=$dstMac,arp_spa=$podIP,arp_tpa=$dst,arp_sha=$mac,arp_tha=$dstMac" ;; esac ;; esac } xxctl(){ subcommand="$1"; shift nodeName="$1"; shift kubectl get no "$nodeName" > /dev/null ovsPod=$(kubectl get pod -n $KUBE_OVN_NS -l app=ovs -o 'jsonpath={.items[?(@.spec.nodeName=="'$nodeName'")].metadata.name}') if [ -z "$ovsPod" ]; then echo "ovs pod doesn't exist on node $nodeName" exit 1 fi kubectl exec "$ovsPod" -n $KUBE_OVN_NS -- ovs-$subcommand "$@" } checkLeader(){ component="$1"; shift set +o pipefail count=$(kubectl get ep ovn-$component -n $KUBE_OVN_NS -o yaml | grep ip | wc -l) set -o pipefail if [ $count -eq 0 ]; then echo "no ovn-$component exists !!" exit 1 fi if [ $count -gt 1 ]; then echo "ovn-$component has more than one leader !!" exit 1 fi echo "ovn-$component leader check ok" } applyConnServerDaemonset(){ subnetName=$1 if [ $(kubectl get subnet $subnetName | wc -l) -eq 0 ]; then echo "no subnet $subnetName exists !!" exit 1 fi imageID=$(kubectl get ds -n $KUBE_OVN_NS kube-ovn-pinger -o jsonpath={.spec.template.spec.containers[0].image}) tmpFileName="conn-server.yaml" cat < $tmpFileName kind: DaemonSet apiVersion: apps/v1 metadata: name: $subnetName-$CONN_CHECK_SERVER namespace: $KUBE_OVN_NS labels: app: $CONN_CHECK_LABEL spec: selector: matchLabels: app: $CONN_CHECK_LABEL template: metadata: annotations: ovn.kubernetes.io/logical_switch: $subnetName labels: app: $CONN_CHECK_LABEL spec: serviceAccountName: ovn containers: - name: $subnetName-$CONN_CHECK_SERVER imagePullPolicy: IfNotPresent image: $imageID command: - /kube-ovn/kube-ovn-pinger args: - --enable-verbose-conn-check=true env: - name: POD_NAME valueFrom: fieldRef: fieldPath: metadata.name EOF kubectl apply -f $tmpFileName rm $tmpFileName isfailed=true for i in {0..59} do if kubectl wait pod --for=condition=Ready -l app=$CONN_CHECK_LABEL -n $KUBE_OVN_NS 2> /dev/null; then isfailed=false break fi sleep 1; \ done if $isfailed; then echo "Error ds $subnetName-$CONN_CHECK_SERVER pod not ready" return fi } applyTestNodePortService() { local svcName="test-node-port" tmpFileName="$svcName.yaml" cat < $tmpFileName apiVersion: v1 kind: Service metadata: labels: app: $CONN_CHECK_LABEL name: $svcName namespace: $KUBE_OVN_NS spec: type: NodePort ports: - port: 60001 protocol: TCP targetPort: 8080 name: kube-ovn-pinger selector: app: kube-ovn-pinger EOF kubectl apply -f $tmpFileName rm $tmpFileName } getTestNodePortServiceIPPorts() { targetIPPorts="" nodeIPs=($(kubectl get node -o wide | grep -v "INTERNAL-IP" | awk '{print $6}')) nodePort=$(kubectl get svc test-node-port -n $KUBE_OVN_NS -o 'jsonpath={.spec.ports[0].nodePort}') for nodeIP in "${nodeIPs[@]}" do if [ -z "$targetIPPorts" ]; then targetIPPorts="tcp-$nodeIP-$nodePort" else targetIPPorts="$targetIPPorts,tcp-$nodeIP-$nodePort" fi done echo "$targetIPPorts" } diagnose(){ gcCommands=() kubectl get crd vpcs.kubeovn.io kubectl get crd vpc-nat-gateways.kubeovn.io kubectl get crd subnets.kubeovn.io kubectl get crd ips.kubeovn.io kubectl get crd vlans.kubeovn.io kubectl get crd provider-networks.kubeovn.io set +eu if ! kubectl get svc kube-dns -n kube-system ; then echo "Warning: kube-dns doesn't exist, maybe there is coredns service." fi set -eu kubectl get svc kubernetes -n default kubectl get sa -n kube-system ovn kubectl get clusterrole system:ovn kubectl get clusterrolebinding ovn kubectl get no -o wide kubectl ko nbctl show kubectl ko nbctl lr-policy-list ovn-cluster kubectl ko nbctl lr-route-list ovn-cluster kubectl ko nbctl ls-lb-list ovn-default kubectl ko nbctl list address_set kubectl ko nbctl list acl kubectl ko sbctl show if [ "${WITHOUT_KUBE_PROXY}" = "false" ]; then checkKubeProxy fi checkDeployment ovn-central checkDeployment kube-ovn-controller checkDaemonSet kube-ovn-cni checkDaemonSet ovs-ovn checkDeployment coredns checkLeader nb checkLeader sb checkLeader northd quitDiagnose() { if [ ! ${#gcCommands[@]} -eq 0 ]; then for item in "${gcCommands[@]}" do echo $item eval "$item" done fi } trap quitDiagnose EXIT type="$1" if [[ $type != "IPPorts" ]]; then gcCommands+=("kubectl delete svc -l app=$CONN_CHECK_LABEL -n $KUBE_OVN_NS") applyTestNodePortService targetIPPorts=$(getTestNodePortServiceIPPorts) fi case $type in all) echo "### kube-ovn-controller recent log" set +e kubectl logs -n $KUBE_OVN_NS -l app=kube-ovn-controller --tail=100 | grep E$(date +%m%d) set -e echo "" pingers=$(kubectl -n $KUBE_OVN_NS get po --no-headers -o custom-columns=NAME:.metadata.name -l app=kube-ovn-pinger) for pinger in $pingers do nodeName=$(kubectl get pod "$pinger" -n "$KUBE_OVN_NS" -o jsonpath={.spec.nodeName}) echo "### start to diagnose node $nodeName" echo "#### ovn-controller log:" kubectl exec -n $KUBE_OVN_NS "$pinger" -- tail /var/log/ovn/ovn-controller.log echo "" echo "#### ovs-vswitchd log:" kubectl exec -n $KUBE_OVN_NS "$pinger" -- tail /var/log/openvswitch/ovs-vswitchd.log echo "" echo "#### ovs-vsctl show results:" kubectl exec -n $KUBE_OVN_NS "$pinger" -- ovs-vsctl show echo "" echo "#### pinger diagnose results:" kubectl exec -n $KUBE_OVN_NS "$pinger" -- /kube-ovn/kube-ovn-pinger --mode=job --external-address=114.114.114.114,2400:3200::1 --target-ip-ports=$targetIPPorts echo "### finish diagnose node $nodeName" echo "" done ;; node) nodeName="$2" kubectl get no "$nodeName" > /dev/null pinger=$(kubectl -n $KUBE_OVN_NS get po -l app=kube-ovn-pinger -o 'jsonpath={.items[?(@.spec.nodeName=="'$nodeName'")].metadata.name}') if [ ! -n "$pinger" ]; then echo "Error: No kube-ovn-pinger running on node $nodeName" exit 1 fi echo "### start to diagnose node $nodeName" echo "#### ovn-controller log:" kubectl exec -n $KUBE_OVN_NS "$pinger" -- tail /var/log/ovn/ovn-controller.log echo "" echo "#### ovs-vswitchd log:" kubectl exec -n $KUBE_OVN_NS "$pinger" -- tail /var/log/openvswitch/ovs-vswitchd.log echo "" kubectl exec -n $KUBE_OVN_NS "$pinger" -- /kube-ovn/kube-ovn-pinger --mode=job --external-address=114.114.114.114,2400:3200::1 --target-ip-ports=$targetIPPorts echo "### finish diagnose node $nodeName" echo "" ;; subnet) subnetName="$2" gcCommands+=("kubectl delete ds -l app=$CONN_CHECK_LABEL -n $KUBE_OVN_NS") applyConnServerDaemonset $subnetName if [ $(kubectl get ds kube-ovn-cni -n $KUBE_OVN_NS -oyaml | grep enable-verbose-conn-check | wc -l) -eq 0 ]; then echo "Warning: kube-ovn-cni not have args enable-verbose-conn-check, it will fail when check node tcp/udp connectivity" fi pingers=$(kubectl -n $KUBE_OVN_NS get po --no-headers -o custom-columns=NAME:.metadata.name -l app=kube-ovn-pinger) for pinger in $pingers do echo "#### pinger $pinger on namespace $KUBE_OVN_NS diagnose results:" kubectl exec -n $KUBE_OVN_NS "$pinger" -- /kube-ovn/kube-ovn-pinger --mode=job --ds-name=$subnetName-$CONN_CHECK_SERVER --ds-namespace=$KUBE_OVN_NS --enable-verbose-conn-check=true --external-address=114.114.114.114,2400:3200::1 --target-ip-ports=$targetIPPorts echo "" done ;; IPPorts) targetIPPorts="$2" pingers=$(kubectl -n $KUBE_OVN_NS get po --no-headers -o custom-columns=NAME:.metadata.name -l app=kube-ovn-pinger) for pinger in $pingers do echo "#### pinger $pinger on namespace $KUBE_OVN_NS diagnose results:" kubectl exec -n $KUBE_OVN_NS "$pinger" -- /kube-ovn/kube-ovn-pinger --mode=job --target-ip-ports=$targetIPPorts echo "" done ;; *) echo "type $type not supported" echo "kubectl ko diagnose {all|node|subnet|IPPorts} [nodename|subnetName|{proto1}-{IP1}-{Port1},{proto2}-{IP2}-{Port2}]" ;; esac } getOvnCentralPod(){ NB_POD=$(kubectl get pod -n $KUBE_OVN_NS -l ovn-nb-leader=true | grep ovn-central | head -n 1 | awk '{print $1}') if [ -z "$NB_POD" ]; then echo "nb leader not exists" exit 1 fi OVN_NB_POD=$NB_POD SB_POD=$(kubectl get pod -n $KUBE_OVN_NS -l ovn-sb-leader=true | grep ovn-central | head -n 1 | awk '{print $1}') if [ -z "$SB_POD" ]; then echo "nb leader not exists" exit 1 fi OVN_SB_POD=$SB_POD NORTHD_POD=$(kubectl get pod -n kube-system -l ovn-northd-leader=true | grep ovn-central | head -n 1 | awk '{print $1}') if [ -z "$NORTHD_POD" ]; then echo "ovn northd not exists" exit 1 fi OVN_NORTHD_POD=$NORTHD_POD image=$(kubectl -n kube-system get pods -l app=kube-ovn-cni -o jsonpath='{.items[0].spec.containers[0].image}') if [ -z "$image" ]; then echo "cannot get kube-ovn image" exit 1 fi REGISTRY=$(dirname $image) KUBE_OVN_VERSION=$(basename $image | awk -F ':' '{print $2}') } getOVNICNBPod(){ OVN_IC_NB_POD=$(kubectl get pod -n $KUBE_OVN_NS -l ovn-ic-nb-leader=true | grep ovn-ic-server | head -n 1 | awk '{print $1}') if [ -z "$OVN_IC_NB_POD" ]; then echo "ic nb leader not exists" exit 1 fi } getOVNICSBPod(){ OVN_IC_SB_POD=$(kubectl get pod -n $KUBE_OVN_NS -l ovn-ic-sb-leader=true | grep ovn-ic-server | head -n 1 | awk '{print $1}') if [ -z "$OVN_IC_SB_POD" ]; then echo "ic sb leader not exists" exit 1 fi } getOvnCentralDbStatus(){ NB_PODS=$(kubectl get pod -n $KUBE_OVN_NS | grep ovn-central | awk '{print $1}') for pod in $NB_PODS do echo "get dbstatus in pod $pod" nbstatus=`kubectl exec "$pod" -n $KUBE_OVN_NS -c ovn-central -- ovn-appctl -t /var/run/ovn/ovnnb_db.ctl ovsdb-server/get-db-storage-status OVN_Northbound` echo "nb db status $nbstatus" sbstatus=`kubectl exec "$pod" -n $KUBE_OVN_NS -c ovn-central -- ovn-appctl -t /var/run/ovn/ovnsb_db.ctl ovsdb-server/get-db-storage-status OVN_Southbound` echo "sb db status $sbstatus" done } checkDaemonSet(){ name="$1" currentScheduled=$(kubectl get ds -n $KUBE_OVN_NS "$name" -o jsonpath={.status.currentNumberScheduled}) desiredScheduled=$(kubectl get ds -n $KUBE_OVN_NS "$name" -o jsonpath={.status.desiredNumberScheduled}) available=$(kubectl get ds -n $KUBE_OVN_NS "$name" -o jsonpath={.status.numberAvailable}) ready=$(kubectl get ds -n $KUBE_OVN_NS "$name" -o jsonpath={.status.numberReady}) if [ "$currentScheduled" = "$desiredScheduled" ] && [ "$desiredScheduled" = "$available" ] && [ "$available" = "$ready" ]; then echo "ds $name ready" else echo "Error ds $name not ready" exit 1 fi } checkDeployment(){ name="$1" isfailed=true for i in {0..29} do ready=$(kubectl get deployment -n $KUBE_OVN_NS "$name" -o jsonpath={.status.readyReplicas}) updated=$(kubectl get deployment -n $KUBE_OVN_NS "$name" -o jsonpath={.status.updatedReplicas}) desire=$(kubectl get deployment -n $KUBE_OVN_NS "$name" -o jsonpath={.status.replicas}) available=$(kubectl get deployment -n $KUBE_OVN_NS "$name" -o jsonpath={.status.availableReplicas}) if [ "$ready" = "$updated" ] && [ "$updated" = "$desire" ] && [ "$desire" = "$available" ]; then echo "deployment $name ready" isfailed=false break fi sleep 1 done if $isfailed; then echo "Error deployment $name not ready" fi } checkKubeProxy(){ if kubectl get ds -n kube-system --no-headers -o custom-columns=NAME:.metadata.name | grep '^kube-proxy$' >/dev/null; then checkDaemonSet kube-proxy else for node in $(kubectl get node --no-headers -o custom-columns=NAME:.metadata.name); do local pod=$(kubectl get pod -n $KUBE_OVN_NS -l app=kube-ovn-cni -o 'jsonpath={.items[?(@.spec.nodeName=="'$node'")].metadata.name}') local ip=$(kubectl get pod -n $KUBE_OVN_NS -l app=kube-ovn-cni -o 'jsonpath={.items[?(@.spec.nodeName=="'$node'")].status.podIP}') local arg="" if [[ $ip =~ .*:.* ]]; then arg="g6" ip="[$ip]" fi healthResult=$(kubectl -n $KUBE_OVN_NS exec $pod -- curl -s${arg} -m 3 -w %{http_code} http://$ip:10256/healthz -o /dev/null | grep -v 200 || true) if [ -n "$healthResult" ]; then echo "$node kube-proxy's health check failed" exit 1 fi done fi echo "kube-proxy ready" } dbtool(){ suffix=$(date +%m%d%H%M%s) component="$1"; shift action="$1"; shift case $component in nb) case $action in status) kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/status OVN_Northbound kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnnb_db.ctl ovsdb-server/get-db-storage-status OVN_Northbound ;; kick) kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/kick OVN_Northbound "$1" ;; backup) kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovsdb-tool cluster-to-standalone /etc/ovn/ovnnb_db.$suffix.backup /etc/ovn/ovnnb_db.db kubectl cp $KUBE_OVN_NS/$OVN_NB_POD:/etc/ovn/ovnnb_db.$suffix.backup $(pwd)/ovnnb_db.$suffix.backup kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /etc/ovn/ovnnb_db.$suffix.backup echo "backup ovn-$component db to $(pwd)/ovnnb_db.$suffix.backup" ;; dbstatus) getOvnCentralDbStatus ;; restore) # set ovn-central replicas to 0 replicas=$(kubectl get deployment -n $KUBE_OVN_NS ovn-central -o jsonpath={.spec.replicas}) kubectl scale deployment -n $KUBE_OVN_NS ovn-central --replicas=0 echo "ovn-central original replicas is $replicas" # backup ovn-nb db declare nodeIpArray declare podNameArray declare nodeIps if [[ $(kubectl get deployment -n kube-system ovn-central -o jsonpath='{.spec.template.spec.containers[0].env[1]}') =~ "NODE_IPS" ]]; then nodeIpVals=`kubectl get deployment -n kube-system ovn-central -o jsonpath='{.spec.template.spec.containers[0].env[1].value}'` nodeIps=(${nodeIpVals//,/ }) else nodeIps=`kubectl get node -lkube-ovn/role=master -o wide | grep -v "INTERNAL-IP" | awk '{print $6}'` fi firstIP=${nodeIps[0]} podNames=`kubectl get pod -n $KUBE_OVN_NS | grep ovs-ovn | awk '{print $1}'` echo "first nodeIP is $firstIP" i=0 for nodeIp in ${nodeIps[@]} do for pod in $podNames do hostip=$(kubectl get pod -n $KUBE_OVN_NS $pod -o jsonpath={.status.hostIP}) if [ $nodeIp = $hostip ]; then nodeIpArray[$i]=$nodeIp podNameArray[$i]=$pod i=`expr $i + 1` echo "ovs-ovn pod on node $nodeIp is $pod" break fi done done echo "backup nb db file" kubectl exec -it -n $KUBE_OVN_NS ${podNameArray[0]} -- ovsdb-tool cluster-to-standalone /etc/ovn/ovnnb_db_standalone.db /etc/ovn/ovnnb_db.db # mv all db files for pod in ${podNameArray[@]} do kubectl exec -it -n $KUBE_OVN_NS $pod -- mv -f /etc/ovn/ovnnb_db.db /tmp kubectl exec -it -n $KUBE_OVN_NS $pod -- mv -f /etc/ovn/ovnsb_db.db /tmp done # restore db and replicas echo "restore nb db file, operate in pod ${podNameArray[0]}" kubectl exec -it -n $KUBE_OVN_NS ${podNameArray[0]} -- mv -f /etc/ovn/ovnnb_db_standalone.db /etc/ovn/ovnnb_db.db kubectl scale deployment -n $KUBE_OVN_NS ovn-central --replicas=$replicas # wait ovn-central pods running availabelNum=$(kubectl get deployment -n $KUBE_OVN_NS | grep ovn-central | awk {'print $4'}) while [ $availabelNum != $replicas ] do availabelNum=$(kubectl get deployment -n $KUBE_OVN_NS | grep ovn-central | awk {'print $4'}) echo "wait all ovn-central pods running, availabel $availabelNum" sleep 1 done echo "finish restore nb db file and ovn-central replicas" echo "recreate ovs-ovn pods" kubectl delete pod -n $KUBE_OVN_NS -l app=ovs ;; *) echo "unknown action $action" esac ;; sb) case $action in status) kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/status OVN_Southbound kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnsb_db.ctl ovsdb-server/get-db-storage-status OVN_Southbound ;; kick) kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/kick OVN_Southbound "$1" ;; backup) kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovsdb-tool cluster-to-standalone /etc/ovn/ovnsb_db.$suffix.backup /etc/ovn/ovnsb_db.db kubectl cp $KUBE_OVN_NS/$OVN_SB_POD:/etc/ovn/ovnsb_db.$suffix.backup $(pwd)/ovnsb_db.$suffix.backup kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /etc/ovn/ovnsb_db.$suffix.backup echo "backup ovn-$component db to $(pwd)/ovnsb_db.$suffix.backup" ;; dbstatus) getOvnCentralDbStatus ;; restore) echo "restore cmd is only used for nb db" ;; *) echo "unknown action $action" esac ;; *) echo "unknown subcommand $component" esac } tuning(){ action="$1"; shift sys="$1"; shift case $action in install-fastpath) case $sys in centos7) docker run -it --privileged -v /lib/modules:/lib/modules -v /usr/src:/usr/src -v /tmp/:/tmp/ $REGISTRY/centos7-compile:"$KUBE_OVN_VERSION" bash -c "./module.sh centos install" while [ ! -f /tmp/kube_ovn_fastpath.ko ]; do sleep 1 done for i in $(kubectl -n kube-system get pods | grep ovn-cni | awk '{print $1}'); do kubectl cp /tmp/kube_ovn_fastpath.ko kube-system/"$i":/tmp/ done ;; centos8) docker run -it --privileged -v /lib/modules:/lib/modules -v /usr/src:/usr/src -v /tmp/:/tmp/ $REGISTRY/centos8-compile:"$KUBE_OVN_VERSION" bash -c "./module.sh centos install" while [ ! -f /tmp/kube_ovn_fastpath.ko ]; do sleep 1 done for i in $(kubectl -n kube-system get pods | grep ovn-cni | awk '{print $1}'); do kubectl cp /tmp/kube_ovn_fastpath.ko kube-system/"$i":/tmp/ done ;; *) echo "unknown system $sys" esac ;; local-install-fastpath) case $sys in centos7) # shellcheck disable=SC2145 docker run -it --privileged -v /lib/modules:/lib/modules -v /usr/src:/usr/src -v /tmp:/tmp $REGISTRY/centos7-compile:"$KUBE_OVN_VERSION" bash -c "./module.sh centos local-install $@" for i in $(kubectl -n kube-system get pods | grep ovn-cni | awk '{print $1}'); do kubectl cp /tmp/kube_ovn_fastpath.ko kube-system/"$i":/tmp/ done ;; centos8) # shellcheck disable=SC2145 docker run -it --privileged -v /lib/modules:/lib/modules -v /usr/src:/usr/src -v /tmp:/tmp $REGISTRY/centos8-compile:"$KUBE_OVN_VERSION" bash -c "./module.sh centos local-install $@" for i in $(kubectl -n kube-system get pods | grep ovn-cni | awk '{print $1}'); do kubectl cp /tmp/kube_ovn_fastpath.ko kube-system/"$i":/tmp/ done ;; *) echo "unknown system $sys" esac ;; remove-fastpath) case $sys in centos) for i in $(kubectl -n kube-system get pods | grep ovn-cni | awk '{print $1}'); do kubectl -n kube-system exec "$i" -- rm -f /tmp/kube_ovn_fastpath.ko done ;; *) echo "unknown system $sys" esac ;; install-stt) case $sys in centos7) # shellcheck disable=SC2145 docker run -it --privileged -v /lib/modules:/lib/modules -v /usr/src:/usr/src -v /tmp:/tmp $REGISTRY/centos7-compile:"$KUBE_OVN_VERSION" bash -c "./module.sh stt install" for i in $(kubectl -n kube-system get pods | grep ovn-cni | awk '{print $1}'); do for k in /tmp/*.rpm; do kubectl cp "$k" kube-system/"$i":/tmp/ done done ;; centos8) # shellcheck disable=SC2145 docker run -it --privileged -v /lib/modules:/lib/modules -v /usr/src:/usr/src -v /tmp:/tmp $REGISTRY/centos8-compile:"$KUBE_OVN_VERSION" bash -c "./module.sh stt install" for i in $(kubectl -n kube-system get pods | grep ovn-cni | awk '{print $1}'); do for k in /tmp/*.rpm; do kubectl cp "$k" kube-system/"$i":/tmp/ done done ;; *) echo "unknown system $sys" esac ;; local-install-stt) case $sys in centos7) # shellcheck disable=SC2145 docker run -it --privileged -v /lib/modules:/lib/modules -v /usr/src:/usr/src -v /tmp:/tmp $REGISTRY/centos7-compile:"$KUBE_OVN_VERSION" bash -c "./module.sh stt local-install $@" for i in $(kubectl -n kube-system get pods | grep ovn-cni | awk '{print $1}'); do for k in /tmp/*.rpm; do kubectl cp "$k" kube-system/"$i":/tmp/ done done ;; centos8) # shellcheck disable=SC2145 docker run -it --privileged -v /lib/modules:/lib/modules -v /usr/src:/usr/src -v /tmp:/tmp $REGISTRY/centos8-compile:"$KUBE_OVN_VERSION" bash -c "./module.sh stt local-install $@" for i in $(kubectl -n kube-system get pods | grep ovn-cni | awk '{print $1}'); do for k in /tmp/*.rpm; do kubectl cp "$k" kube-system/"$i":/tmp/ done done ;; *) echo "unknown system $sys" esac ;; remove-stt) case $sys in centos) for i in $(kubectl -n kube-system get pods | grep ovn-cni | awk '{print $1}'); do kubectl -n kube-system exec "$i" -- rm -f /tmp/openvswitch-kmod*.rpm done ;; *) echo "unknown system $sys" esac ;; *) echo "unknown action $action" esac } reload(){ kubectl delete pod -n kube-system -l app=ovn-central kubectl rollout status deployment/ovn-central -n kube-system kubectl delete pod -n kube-system -l app=ovs kubectl delete pod -n kube-system -l app=kube-ovn-controller kubectl rollout status deployment/kube-ovn-controller -n kube-system kubectl delete pod -n kube-system -l app=kube-ovn-cni kubectl rollout status daemonset/kube-ovn-cni -n kube-system kubectl delete pod -n kube-system -l app=kube-ovn-pinger kubectl rollout status daemonset/kube-ovn-pinger -n kube-system kubectl delete pod -n kube-system -l app=kube-ovn-monitor kubectl rollout status deployment/kube-ovn-monitor -n kube-system } env-check(){ set +e KUBE_OVN_NS=kube-system podNames=`kubectl get pod -n $KUBE_OVN_NS -l app=kube-ovn-cni -o 'jsonpath={.items[*].metadata.name}'` for pod in $podNames; do nodeName=$(kubectl get pod $pod -n $KUBE_OVN_NS -o jsonpath={.spec.nodeName}) echo "************************************************" echo "Start environment check for node $nodeName" echo "************************************************" kubectl exec -it -n $KUBE_OVN_NS $pod -c cni-server -- bash /kube-ovn/env-check.sh done } applyTestServer() { podName="test-server" nodeID=$1 imageID=$2 tmpFileName="$podName.yaml" cat < $tmpFileName apiVersion: v1 kind: Pod metadata: name: $podName namespace: $KUBE_OVN_NS labels: app: $PERF_LABEL env: server spec: containers: - name: $podName image: $imageID imagePullPolicy: IfNotPresent command: ["sh", "-c"] args: - | qperf & ./test-server.sh nodeSelector: kubernetes.io/hostname: $nodeID EOF kubectl apply -f $tmpFileName rm $tmpFileName } applyTestHostServer() { podName="test-host-server" nodeID=$1 imageID=$2 tmpFileName="$podName.yaml" cat < $tmpFileName apiVersion: v1 kind: Pod metadata: name: $podName namespace: $KUBE_OVN_NS labels: app: $PERF_LABEL spec: hostNetwork: true containers: - name: $podName image: $imageID imagePullPolicy: IfNotPresent command: ["sh", "-c"] args: - | qperf & ./test-server.sh nodeSelector: kubernetes.io/hostname: $nodeID EOF kubectl apply -f $tmpFileName rm $tmpFileName } applyTestClient() { local podName="test-client" local nodeID=$1 local imageID=$2 tmpFileName="$podName.yaml" cat < $tmpFileName apiVersion: v1 kind: Pod metadata: name: $podName namespace: $KUBE_OVN_NS labels: app: $PERF_LABEL spec: containers: - name: $podName image: $imageID imagePullPolicy: IfNotPresent command: ["sh", "-c", "sleep infinity"] nodeSelector: kubernetes.io/hostname: $nodeID EOF kubectl apply -f $tmpFileName rm $tmpFileName } applyTestHostClient() { local podName="test-host-client" local nodeID=$1 local imageID=$2 tmpFileName="$podName.yaml" cat < $tmpFileName apiVersion: v1 kind: Pod metadata: name: $podName namespace: $KUBE_OVN_NS labels: app: $PERF_LABEL spec: hostNetwork: true containers: - name: $podName image: $imageID imagePullPolicy: IfNotPresent command: ["sh", "-c", "sleep infinity"] nodeSelector: kubernetes.io/hostname: $nodeID EOF kubectl apply -f $tmpFileName rm $tmpFileName } applyTestServerService() { local svcName="test-server" tmpFileName="$svcName.yaml" cat < $tmpFileName apiVersion: v1 kind: Service metadata: labels: app: $PERF_LABEL name: $svcName namespace: $KUBE_OVN_NS spec: ports: - port: 19765 protocol: UDP targetPort: 19765 name: qperf-udp - port: 5201 protocol: UDP targetPort: 5201 name: iperf3-udp - port: 19765 protocol: TCP targetPort: 19765 name: qperf-tcp - port: 5201 protocol: TCP targetPort: 5201 name: iperf3-tcp selector: env: server EOF kubectl apply -f $tmpFileName rm $tmpFileName } addHeaderDecoration() { local header="$1" local decorator="=" local totalLength=100 local headerLength=${#header} local decoratorLength=$(( (totalLength - headerLength - 2) / 2 )) local leftDecorators=$(printf "%0.s${decorator}" $(seq 1 $decoratorLength)) local rightDecorators=$(printf "%0.s${decorator}" $(seq 1 $((totalLength - headerLength - decoratorLength - 2)))) printf "%s %s %s\n" "${leftDecorators}" "${header}" "${rightDecorators}" } addEndDecoration() { echo "====================================================================================================" } quitPerfTest() { if [ ! $? -eq 0 ]; then addHeaderDecoration "Performance Test Failed with below: " cat ./$LAST_PERF_FAILED_LOG addEndDecoration fi addHeaderDecoration "Remove Performance Test Resource" if [ ! ${#PERF_GC_COMMAND[@]} -eq 0 ]; then for item in "${PERF_GC_COMMAND[@]}" do echo $item eval "$item" done fi kubectl delete pods -l app=$PERF_LABEL -n $KUBE_OVN_NS kubectl delete svc -l app=$PERF_LABEL -n $KUBE_OVN_NS local pids=($(ps -ef | grep -v "kubectl exec" | grep "iperf -s -B 224.0.0.100 -i 1 -u" | grep -v grep | awk '{print $2}')) if [ ! ${#pids[@]} -eq 0 ]; then for pid in "${pids[@]}"; do kill $pid done fi addEndDecoration exit 0 } perf(){ addHeaderDecoration "Prepareing Performance Test Resources" imageID=${1:-"kubeovn/test:v1.12.0"} nodes=($(kubectl get node --no-headers -o custom-columns=NAME:.metadata.name)) if [[ ${#nodes} -eq 1 ]]; then applyTestClient ${nodes[0]} $imageID applyTestHostClient ${nodes[0]} $imageID applyTestServer ${nodes[0]} $imageID applyTestHostServer ${nodes[0]} $imageID elif [[ ${#nodes} -le 0 ]]; then echo "can't find node in the cluster" return elif [[ ${#nodes} -ge 2 ]]; then applyTestClient ${nodes[1]} $imageID applyTestHostClient ${nodes[1]} $imageID applyTestServer ${nodes[0]} $imageID applyTestHostServer ${nodes[0]} $imageID fi applyTestServerService isfailed=true for i in {0..300} do if kubectl wait pod --for=condition=Ready -l app=$PERF_LABEL -n kube-system 2>/dev/null ; then isfailed=false break fi sleep 1; \ done if $isfailed; then echo "Error test pod not ready" return fi addEndDecoration local serverPodIP=$(kubectl get pod test-server -n $KUBE_OVN_NS -o jsonpath={.status.podIP}) local hostserverPodIP=$(kubectl get pod test-host-server -n $KUBE_OVN_NS -o jsonpath={.status.podIP}) local svcIP=$(kubectl get svc test-server -n $KUBE_OVN_NS -o jsonpath={.spec.clusterIP}) trap quitPerfTest EXIT addHeaderDecoration "Start Pod Network Unicast Performance Test" unicastPerfTest test-client $serverPodIP addEndDecoration addHeaderDecoration "Start Host Network Performance Test" unicastPerfTest test-host-client $hostserverPodIP addEndDecoration addHeaderDecoration "Start Service Network Performance Test" unicastPerfTest test-client $svcIP $serverPodIP addEndDecoration addHeaderDecoration "Start Pod Multicast Network Performance Test" multicastPerfTest addEndDecoration addHeaderDecoration "Start Host Multicast Network Performance" multicastHostPerfTest addEndDecoration addHeaderDecoration "Start Leader Recover Time Test" checkLeaderRecover addEndDecoration } unicastPerfTest() { clientPodName=$1 serverIP=$2 backendIP=${3:-""} tmpFileName="unicast-$clientPodName.log" PERF_GC_COMMAND+=("rm -f $tmpFileName") LAST_PERF_FAILED_LOG=$tmpFileName if [[ $backendIP != "" ]]; then # qperf use other random tcp/udp port not only 19765 PERF_GC_COMMAND+=("kubectl ko nbctl lb-del test-server") kubectl ko nbctl lb-add test-server $serverIP $backendIP kubectl ko nbctl ls-lb-add ovn-default test-server fi printf "%-15s %-15s %-15s %-15s %-15s %-15s\n" "Size" "TCP Latency" "TCP Bandwidth" "UDP Latency" "UDP Lost Rate" "UDP Bandwidth" for size in "64" "128" "512" "1k" "4k" do kubectl exec $clientPodName -n $KUBE_OVN_NS -- qperf -t $PERF_TIMES $serverIP -ub -oo msg_size:$size -vu tcp_lat udp_lat > $tmpFileName 2> /dev/null formattedInput=$(cat $tmpFileName | tr -d '\n' | tr -s ' ') tcpLat=$(echo $formattedInput | grep -oP 'tcp_lat: latency = \K[\d.]+ (us|ms|sec)') udpLat=$(echo $formattedInput | grep -oP 'udp_lat: latency = \K[\d.]+ (us|ms|sec)') kubectl exec $clientPodName -n $KUBE_OVN_NS -- iperf3 -c $serverIP -u -t $PERF_TIMES -i 1 -P 10 -b 1000G -l $size > $tmpFileName 2> /dev/null udpBw=$(cat $tmpFileName | grep -oP '\d+\.?\d* [KMG]bits/sec' | tail -n 1) udpLostRate=$(cat $tmpFileName | grep -oP '\(\d+(\.\d+)?%\)' | tail -n 1) kubectl exec $clientPodName -n $KUBE_OVN_NS -- iperf3 -c $serverIP -t $PERF_TIMES -i 1 -P 10 -l $size > $tmpFileName 2> /dev/null tcpBw=$(cat $tmpFileName | grep -oP '\d+\.?\d* [KMG]bits/sec' | tail -n 1) printf "%-15s %-15s %-15s %-15s %-15s %-15s\n" "$size" "$tcpLat" "$tcpBw" "$udpLat" "$udpLostRate" "$udpBw" done } getAddressNic() { podName=$1 ipAddress=$2 interface=$(kubectl exec $podName -n $KUBE_OVN_NS -- ip -o addr show | awk '{split($4, a, "/"); print $2, a[1]}' | awk -v ip="$ipAddress" '$0 ~ ip {print $1}') echo "$interface" } multicastHostPerfTest() { clientNode=$(kubectl get pod test-host-client -n $KUBE_OVN_NS -o jsonpath={.spec.nodeName}) serverNode=$(kubectl get pod test-host-server -n $KUBE_OVN_NS -o jsonpath={.spec.nodeName}) clientHostIP=$(kubectl get pod test-host-client -n $KUBE_OVN_NS -o jsonpath={.status.hostIP}) serverHostIP=$(kubectl get pod test-host-server -n $KUBE_OVN_NS -o jsonpath={.status.hostIP}) clientNic=$(getAddressNic test-host-client $clientHostIP) serverNic=$(getAddressNic test-host-server $serverHostIP) clientovsPod=$(kubectl get pod -owide -A |grep ovs-ovn | grep $clientNode | awk '{print $2}') PERF_GC_COMMAND+=("kubectl exec $clientovsPod -n kube-system -- ip maddr del 01:00:5e:00:00:64 dev $clientNic") kubectl exec $clientovsPod -n kube-system -- ip maddr add 01:00:5e:00:00:64 dev $clientNic serverovsPod=$(kubectl get pod -owide -A |grep ovs-ovn | grep $serverNode | awk '{print $2}') PERF_GC_COMMAND+=("kubectl exec $serverovsPod -n kube-system -- ip maddr del 01:00:5e:00:00:64 dev $serverNic") kubectl exec $serverovsPod -n kube-system -- ip maddr add 01:00:5e:00:00:64 dev $serverNic genMulticastPerfResult test-host-server test-host-client } multicastPerfTest() { clientNode=$(kubectl get pod test-client -n $KUBE_OVN_NS -o jsonpath={.spec.nodeName}) serverNode=$(kubectl get pod test-server -n $KUBE_OVN_NS -o jsonpath={.spec.nodeName}) clientNs=$(kubectl ko vsctl $clientNode --column=external_ids find interface external_ids:iface-id=test-client.$KUBE_OVN_NS | awk -F 'pod_netns=' '{print $2}' | grep -o 'cni-[0-9a-f]\{8\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{12\}') serverNs=$(kubectl ko vsctl $serverNode --column=external_ids find interface external_ids:iface-id=test-server.$KUBE_OVN_NS | awk -F 'pod_netns=' '{print $2}' | grep -o 'cni-[0-9a-f]\{8\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{12\}') clientovsPod=$(kubectl get pod -owide -A |grep ovs-ovn | grep $clientNode | awk '{print $2}') kubectl exec $clientovsPod -n kube-system -- ip netns exec $clientNs ip maddr add 01:00:5e:00:00:64 dev eth0 serverovsPod=$(kubectl get pod -owide -A |grep ovs-ovn | grep $serverNode | awk '{print $2}') kubectl exec $serverovsPod -n kube-system -- ip netns exec $serverNs ip maddr add 01:00:5e:00:00:64 dev eth0 genMulticastPerfResult test-server test-client } genMulticastPerfResult() { serverName=$1 clientName=$2 tmpFileName="multicast-$serverName.log" PERF_GC_COMMAND+=("rm -f $tmpFileName") LAST_PERF_FAILED_LOG=$tmpFileName start_server_cmd="iperf -s -B 224.0.0.100 -i 1 -u" kubectl exec $serverName -n $KUBE_OVN_NS -- $start_server_cmd > $tmpFileName & sleep 1 printf "%-15s %-15s %-15s %-15s\n" "Size" "UDP Latency" "UDP Lost Rate" "UDP Bandwidth" for size in "64" "128" "512" "1k" "4k" do kubectl exec $clientName -n $KUBE_OVN_NS -- iperf -c 224.0.0.100 -u -T 32 -t $PERF_TIMES -i 1 -b 1000G -l $size > /dev/null udpBw=$(cat $tmpFileName | grep -oP '\d+\.?\d* [KMG]bits/sec' | tail -n 1) udpLostRate=$(cat $tmpFileName |grep -oP '\(\d+(\.\d+)?%\)' | tail -n 1) kubectl exec $clientName -n $KUBE_OVN_NS -- iperf -c 224.0.0.100 -u -T 32 -t $PERF_TIMES -i 1 -l $size > /dev/null udpLat=$(cat $tmpFileName | grep -oP '\d+\.?\d* ms' | tail -n 1) printf "%-15s %-15s %-15s %-15s\n" "$size" "$udpLat" "$udpLostRate" "$udpBw" done } checkLeaderRecover() { getOvnCentralPod getPodRecoverTime "nb" sleep 5 getOvnCentralPod getPodRecoverTime "sb" sleep 5 getOvnCentralPod getPodRecoverTime "northd" } getPodRecoverTime(){ component_name=$1 start_time=$(date +%s.%N) echo "Delete ovn central $component_name pod" if [[ $component_name == "nb" ]]; then kubectl delete pod $OVN_NB_POD -n kube-system elif [[ $component_name == "sb" ]]; then kubectl delete pod $OVN_SB_POD -n kube-system elif [[ $component_name == "northd" ]]; then kubectl delete pod $OVN_NORTHD_POD -n kube-system fi echo "Waiting for ovn central $component_name pod running" replicas=$(kubectl get deployment -n kube-system ovn-central -o jsonpath={.spec.replicas}) availableNum=$(kubectl get deployment -n kube-system | grep ovn-central | awk {'print $4'}) while [ $availableNum != $replicas ] do availableNum=$(kubectl get deployment -n kube-system | grep ovn-central | awk {'print $4'}) sleep 0.001 done end_time=$(date +%s.%N) elapsed_time=$(echo "$end_time - $start_time" | bc) addHeaderDecoration "OVN $component_name Recovery takes $elapsed_time s" } if [ $# -lt 1 ]; then showHelp exit 0 fi subcommand="$1"; shift getOvnCentralPod case $subcommand in nbctl) kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovn-nbctl "$@" ;; sbctl) kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovn-sbctl "$@" ;; icnbctl) getOVNICNBPod kubectl exec "$OVN_IC_NB_POD" -n $KUBE_OVN_NS -- ovn-ic-nbctl "$@" ;; icsbctl) getOVNICSBPod kubectl exec "$OVN_IC_SB_POD" -n $KUBE_OVN_NS -- ovn-ic-sbctl "$@" ;; vsctl|ofctl|dpctl|appctl) xxctl "$subcommand" "$@" ;; nb|sb) dbtool "$subcommand" "$@" ;; tcpdump) tcpdump "$@" ;; trace) trace "$@" ;; diagnose) diagnose "$@" ;; reload) reload ;; tuning) tuning "$@" ;; env-check) env-check ;; perf) perf "$@" ;; *) showHelp exit 1 ;; esac