#!/bin/bash # install-sensor-proxy.sh - Installs pulse-sensor-proxy on Proxmox host for secure temperature monitoring # Supports --uninstall [--purge] to remove the proxy and cleanup resources. # This script is idempotent and can be safely re-run set -euo pipefail CONFIG_FILE="/etc/pulse-sensor-proxy/config.yaml" ALLOWED_NODES_FILE="/etc/pulse-sensor-proxy/allowed_nodes.yaml" MIN_ALLOWED_NODES_FILE_VERSION="v4.31.1" ALLOWLIST_MODE="file" INSTALLED_PROXY_VERSION="" PENDING_CONTROL_PLANE_FILE="/etc/pulse-sensor-proxy/pending-control-plane.env" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # No Color print_info() { if [ "$QUIET" != true ]; then echo -e "${GREEN}[INFO]${NC} $1" fi } print_warn() { echo -e "${YELLOW}[WARN]${NC} $1" } print_error() { echo -e "${RED}[ERROR]${NC} $1" } print_success() { echo -e "${GREEN}✓${NC} $1" } normalize_semver() { local ver="${1#v}" ver="${ver%%+*}" ver="${ver%%-*}" printf '%s' "$ver" } semver_to_tuple() { local ver ver="$(normalize_semver "$1")" IFS='.' read -r major minor patch <<< "$ver" [[ -n "$major" ]] || major=0 [[ -n "$minor" ]] || minor=0 [[ -n "$patch" ]] || patch=0 printf '%s %s %s' "$major" "$minor" "$patch" } version_at_least() { local current target read -r c_major c_minor c_patch <<< "$(semver_to_tuple "$1")" read -r t_major t_minor t_patch <<< "$(semver_to_tuple "$2")" if (( c_major > t_major )); then return 0 elif (( c_major < t_major )); then return 1 fi if (( c_minor > t_minor )); then return 0 elif (( c_minor < t_minor )); then return 1 fi if (( c_patch >= t_patch )); then return 0 fi return 1 } detect_proxy_version() { local binary="$1" if [[ -x "$binary" ]]; then "$binary" version 2>/dev/null | awk '/pulse-sensor-proxy/{print $2; exit}' fi } config_command_supported() { local subcommand="$1" shift || true if [[ ! -x "$BINARY_PATH" ]]; then return 1 fi local help_output help_output="$("$BINARY_PATH" config --help 2>/dev/null || true)" if ! grep -Eq "^[[:space:]]*${subcommand}([[:space:]]|$)" <<<"$help_output"; then return 1 fi if [[ "$#" -eq 0 ]]; then return 0 fi local sub_help sub_help="$("$BINARY_PATH" config "$subcommand" --help 2>/dev/null || true)" for flag in "$@"; do if ! grep -q -- "$flag" <<<"$sub_help"; then return 1 fi done return 0 } determine_allowlist_mode() { INSTALLED_PROXY_VERSION="$(detect_proxy_version "$BINARY_PATH")" if [[ -z "$INSTALLED_PROXY_VERSION" ]]; then # During initial install, version detection fails - that's expected ALLOWLIST_MODE="file" return fi if version_at_least "$INSTALLED_PROXY_VERSION" "$MIN_ALLOWED_NODES_FILE_VERSION"; then if [[ "$QUIET" != true ]]; then print_info "Detected pulse-sensor-proxy ${INSTALLED_PROXY_VERSION} (allowed_nodes_file supported)" fi ALLOWLIST_MODE="file" return fi # Refuse to install/upgrade on unsupported versions print_error "pulse-sensor-proxy ${INSTALLED_PROXY_VERSION} is too old (< ${MIN_ALLOWED_NODES_FILE_VERSION})" print_error "File-based allowlist is now required. Please upgrade the proxy binary first." print_error "Download latest from: https://github.com/rcourtman/Pulse/releases/latest" exit 1 } record_pending_control_plane() { local mode="$1" if [[ -z "$PULSE_SERVER" ]]; then return fi cat > "$PENDING_CONTROL_PLANE_FILE" </dev/null || true } clear_pending_control_plane() { rm -f "$PENDING_CONTROL_PLANE_FILE" 2>/dev/null || true } format_ip_to_cidr() { local ip="$1" if [[ -z "$ip" ]]; then return fi if [[ "$ip" == */* ]]; then printf '%s' "$ip" return fi if [[ "$ip" == *:* ]]; then printf '%s/128' "$ip" else printf '%s/32' "$ip" fi } ensure_allowed_source_subnet() { local subnet="$1" if [[ -z "$subnet" || ! -f "$CONFIG_FILE" ]]; then return fi # Use robust binary config management if available if config_command_supported "add-subnet"; then if "$BINARY_PATH" config add-subnet "$subnet" --config "$CONFIG_FILE"; then print_info "Added allowed_source_subnets entry ${subnet}" return else print_warn "Failed to add subnet using binary; falling back to legacy method" fi fi local escaped_subnet="${subnet//\//\\/}" if grep -Eq "^[[:space:]]+-[[:space:]]*${escaped_subnet}([[:space:]]|$)" "$CONFIG_FILE"; then return fi local tmp tmp=$(mktemp) if grep -Eq "^[[:space:]]*allowed_source_subnets:" "$CONFIG_FILE"; then awk -v subnet="$subnet" ' /^allowed_source_subnets:/ {print; in_block=1; indent=" "; next} in_block && /^[[:space:]]+-/ { # Capture indentation from existing items if (!captured) { match($0, /^[[:space:]]+/) indent = substr($0, RSTART, RLENGTH) captured = 1 } } in_block && /^[^[:space:]]/ { if (!added) { printf("%s- %s\n", indent, subnet); added=1 } in_block=0 } {print} END { if (in_block && !added) { printf("%s- %s\n", indent, subnet) } } ' "$CONFIG_FILE" > "$tmp" else cat "$CONFIG_FILE" > "$tmp" { echo "" echo "allowed_source_subnets:" echo " - $subnet" } >> "$tmp" fi if mv "$tmp" "$CONFIG_FILE"; then print_info "Added allowed_source_subnets entry ${subnet}" else rm -f "$tmp" print_warn "Failed to update allowed_source_subnets with ${subnet}" fi } configure_local_authorized_key() { local auth_line=$1 local auth_keys_file="/root/.ssh/authorized_keys" local tmp_auth tmp_auth=$(mktemp) mkdir -p /root/.ssh touch "$tmp_auth" if [[ -f "$auth_keys_file" ]]; then grep -vF '# pulse-managed-key' "$auth_keys_file" >"$tmp_auth" 2>/dev/null || true chmod --reference="$auth_keys_file" "$tmp_auth" 2>/dev/null || chmod 600 "$tmp_auth" chown --reference="$auth_keys_file" "$tmp_auth" 2>/dev/null || true else chmod 600 "$tmp_auth" fi echo "${auth_line}" >>"$tmp_auth" if mv "$tmp_auth" "$auth_keys_file"; then if [ "$QUIET" != true ]; then print_success "SSH key configured on localhost" fi else rm -f "$tmp_auth" print_warn "Failed to configure SSH key on localhost" print_info "Add this line manually to /root/.ssh/authorized_keys:" print_info " ${auth_line}" fi } configure_container_proxy_env() { local socket_line="PULSE_SENSOR_PROXY_SOCKET=/mnt/pulse-proxy/pulse-sensor-proxy.sock" if ! SOCKET_LINE="$socket_line" pct exec "$CTID" -- bash <<'EOF' set -e ENV_FILE="/etc/pulse/.env" mkdir -p /etc/pulse if [[ -f "$ENV_FILE" ]] && grep -q "^PULSE_SENSOR_PROXY_SOCKET=" "$ENV_FILE" 2>/dev/null; then sed -i "s|^PULSE_SENSOR_PROXY_SOCKET=.*|$SOCKET_LINE|" "$ENV_FILE" else echo "$SOCKET_LINE" >> "$ENV_FILE" fi chmod 600 "$ENV_FILE" 2>/dev/null || true chown pulse:pulse "$ENV_FILE" 2>/dev/null || true EOF then print_warn "Unable to update /etc/pulse/.env inside container $CTID" fi } ensure_allowed_nodes_file_reference() { if [[ "$ALLOWLIST_MODE" != "file" ]]; then if [[ -f "$CONFIG_FILE" ]]; then sed -i '/^[[:space:]]*allowed_nodes_file:/d' "$CONFIG_FILE" 2>/dev/null || true fi return fi normalize_allowed_nodes_section } remove_allowed_nodes_block() { if [[ "$ALLOWLIST_MODE" != "file" ]]; then return fi normalize_allowed_nodes_section } normalize_allowed_nodes_section() { if [[ ! -f "$CONFIG_FILE" ]]; then return fi if command -v python3 >/dev/null 2>&1; then python3 - "$CONFIG_FILE" <<'PY' import sys from pathlib import Path path = Path(sys.argv[1]) if not path.exists(): sys.exit(0) lines = path.read_text().splitlines() to_skip = set() saved_comment = None def capture_comment_block(idx: int): global saved_comment blanks = [] comments = [] j = idx - 1 while j >= 0 and lines[j].strip() == "": blanks.append((j, lines[j])) j -= 1 while j >= 0 and lines[j].lstrip().startswith("#"): comments.append((j, lines[j])) j -= 1 if not comments: return [] blanks.reverse() comments.reverse() block = blanks + comments for index, _ in block: to_skip.add(index) return [text for _, text in block] i = 0 while i < len(lines): line = lines[i] stripped = line.lstrip() if stripped.startswith("allowed_nodes_file:"): comment_block = capture_comment_block(i) if comment_block: saved_comment = comment_block to_skip.add(i) i += 1 continue if stripped.startswith("allowed_nodes:"): comment_block = capture_comment_block(i) if comment_block: saved_comment = comment_block to_skip.add(i) i += 1 while i < len(lines): next_line = lines[i] next_stripped = next_line.lstrip() if ( next_stripped == "" or next_stripped.startswith("#") or next_stripped.startswith("-") or next_line.startswith((" ", "\t")) ): to_skip.add(i) i += 1 continue break continue i += 1 result = [text for idx, text in enumerate(lines) if idx not in to_skip] default_comment = [ "# Cluster nodes (auto-discovered during installation)", "# These nodes are allowed to request temperature data when cluster IPC validation is unavailable", ] if saved_comment is None: saved_comment = [""] + default_comment else: while saved_comment and saved_comment[-1].strip() == "": saved_comment.pop() if saved_comment and saved_comment[0].strip() != "": saved_comment.insert(0, "") if result and result[-1].strip() != "": result.append("") result.extend(saved_comment) result.append('allowed_nodes_file: "/etc/pulse-sensor-proxy/allowed_nodes.yaml"') path.write_text("\n".join(result).rstrip() + "\n") PY return fi # Fallback when python3 is unavailable sed -i '/^[[:space:]]*allowed_nodes:/,/^[^[:space:]]/d' "$CONFIG_FILE" 2>/dev/null || true sed -i '/^[[:space:]]*allowed_nodes_file:/d' "$CONFIG_FILE" 2>/dev/null || true if ! grep -q "allowed_nodes_file" "$CONFIG_FILE" 2>/dev/null; then { echo "" echo "# Cluster nodes (auto-discovered during installation)" echo "# These nodes are allowed to request temperature data when cluster IPC validation is unavailable" echo 'allowed_nodes_file: "/etc/pulse-sensor-proxy/allowed_nodes.yaml"' } >>"$CONFIG_FILE" fi } migrate_inline_allowed_nodes_to_file() { # Phase 2: Use config CLI for migration - no Python manipulation if [[ ! -f "$CONFIG_FILE" ]]; then return fi if [[ ! -x "$BINARY_PATH" ]]; then print_warn "Binary not available yet; skipping migration" return fi # Use CLI to atomically migrate inline nodes to file if "$BINARY_PATH" config migrate-to-file --config "$CONFIG_FILE" --allowed-nodes "$ALLOWED_NODES_FILE"; then print_success "Migration complete: inline allowed_nodes moved to file" fi } write_inline_allowed_nodes() { local comment_line="$1" shift || true local nodes=("$@") if [[ "$ALLOWLIST_MODE" != "inline" ]]; then return fi if ! command -v python3 >/dev/null 2>&1; then print_warn "python3 is required to manage inline allowed_nodes; skipping update" return fi python3 - "$CONFIG_FILE" <<'PY' import sys from pathlib import Path path = Path(sys.argv[1]) if not path.exists(): sys.exit(0) lines = path.read_text().splitlines() to_skip = set() def capture_leading_comment(idx: int): """Remove contiguous blank/comment lines immediately above idx.""" j = idx - 1 while j >= 0 and lines[j].strip() == "": to_skip.add(j) j -= 1 while j >= 0 and lines[j].lstrip().startswith("#"): to_skip.add(j) j -= 1 i = 0 while i < len(lines): line = lines[i] stripped = line.lstrip() if stripped.startswith("allowed_nodes_file:"): to_skip.add(i) i += 1 continue if stripped.startswith("allowed_nodes:"): capture_leading_comment(i) to_skip.add(i) i += 1 while i < len(lines): next_line = lines[i] next_stripped = next_line.lstrip() if ( next_stripped == "" or next_stripped.startswith("#") or next_stripped.startswith("-") or next_line.startswith((" ", "\t")) ): to_skip.add(i) i += 1 continue break continue i += 1 result = [text for idx, text in enumerate(lines) if idx not in to_skip] path.write_text("\n".join(result).rstrip() + "\n") PY python3 - "$CONFIG_FILE" "$comment_line" "${nodes[@]}" <<'PY' import sys from pathlib import Path path = Path(sys.argv[1]) comment = (sys.argv[2] or "").strip() new_nodes = [n.strip() for n in sys.argv[3:] if n.strip()] lines = [] if path.exists(): lines = path.read_text().splitlines() skip = set() existing = [] i = 0 while i < len(lines): line = lines[i] stripped = line.lstrip() if stripped.startswith("allowed_nodes_file:"): skip.add(i) i += 1 continue if stripped.startswith("allowed_nodes:"): skip.add(i) i += 1 while i < len(lines): current = lines[i] current_stripped = current.lstrip() if current_stripped.startswith("-"): value = current_stripped[1:].strip() if value: existing.append(value) skip.add(i) i += 1 continue if ( current_stripped == "" or current_stripped.startswith("#") or current.startswith((" ", "\t")) ): skip.add(i) i += 1 continue break continue i += 1 result = [line for idx, line in enumerate(lines) if idx not in skip] seen = set() merged = [] for entry in existing + new_nodes: normalized = entry.strip() if not normalized: continue key = normalized.lower() if key in seen: continue seen.add(key) merged.append(normalized) if merged: if result and result[-1].strip() != "": result.append("") if comment: result.append(f"# {comment}") else: result.append("# Cluster nodes (auto-discovered during installation)") result.append("allowed_nodes:") for entry in merged: result.append(f" - {entry}") content = "\n".join(result).rstrip() if content: content += "\n" path.parent.mkdir(parents=True, exist_ok=True) path.write_text(content) PY } cleanup_inline_allowed_nodes() { if [[ "$ALLOWLIST_MODE" != "inline" ]]; then return fi if ! command -v python3 >/dev/null 2>&1; then return fi if [[ ! -f "$CONFIG_FILE" ]]; then return fi python3 - "$CONFIG_FILE" <<'PY' import sys from pathlib import Path path = Path(sys.argv[1]) if not path.exists(): sys.exit(0) lines = path.read_text().splitlines() blocks = [] i = 0 while i < len(lines): line = lines[i] stripped = line.lstrip() if stripped.startswith("allowed_nodes:"): start = i entries = [] j = i + 1 while j < len(lines): nxt = lines[j] nxt_stripped = nxt.lstrip() if nxt_stripped.startswith("-"): entries.append(nxt_stripped[1:].strip()) j += 1 continue if ( nxt_stripped.startswith("#") or nxt_stripped == "" or nxt.startswith((" ", "\t")) ): j += 1 continue break comment_indices = set() comment_text = [] k = start - 1 while k >= 0 and lines[k].strip() == "": comment_indices.add(k) k -= 1 while k >= 0 and lines[k].lstrip().startswith("#"): comment_indices.add(k) comment_text.append(lines[k]) k -= 1 comment_text.reverse() blocks.append( { "start": start, "end": j, "comment_indices": comment_indices, "comment_text": comment_text, "entries": entries, } ) i = j continue i += 1 if len(blocks) <= 1: sys.exit(0) seen = set() merged = [] for block in blocks: for entry in block["entries"]: key = entry.lower() if not key or key in seen: continue seen.add(key) merged.append(entry) if not merged: sys.exit(0) first_block = blocks[0] insert_at = min( [first_block["start"]] + list(first_block["comment_indices"]) ) if first_block["comment_indices"] else first_block["start"] def build_comment(): if first_block["comment_text"]: return first_block["comment_text"] return ["# Cluster nodes (auto-discovered during installation)"] comment_block = build_comment() replacement = [] replacement.extend(comment_block) if replacement and replacement[-1].strip() != "": replacement.append("") replacement.append("allowed_nodes:") for entry in merged: replacement.append(f" - {entry}") replacement.append("") indices_to_remove = set() for block in blocks: indices_to_remove.update(range(block["start"], block["end"])) indices_to_remove.update(block["comment_indices"]) result = [] inserted = False for idx, line in enumerate(lines): if not inserted and idx == insert_at: result.extend(replacement) inserted = True if idx in indices_to_remove: continue result.append(line) if not inserted: if result and result[-1].strip() != "": result.append("") result.extend(replacement) content = "\n".join(result).rstrip() + "\n" path.write_text(content) PY } update_allowed_nodes() { local comment_line="$1" shift local nodes=("$@") # Phase 2: Use config CLI exclusively - no shell/Python manipulation # First, migrate any inline allowed_nodes to file mode (failures are fatal) if ! "$BINARY_PATH" config migrate-to-file --config "$CONFIG_FILE" --allowed-nodes "$ALLOWED_NODES_FILE"; then print_error "Failed to migrate config to file mode" return 1 fi # Build --merge flags for the CLI local merge_args=() for node in "${nodes[@]}"; do if [[ -n "$node" ]]; then merge_args+=(--merge "$node") fi done if [[ ${#merge_args[@]} -eq 0 ]]; then return fi # Use the config CLI for atomic, locked updates if "$BINARY_PATH" config set-allowed-nodes --allowed-nodes "$ALLOWED_NODES_FILE" "${merge_args[@]}"; then chmod 0644 "$ALLOWED_NODES_FILE" 2>/dev/null || true chown pulse-sensor-proxy:pulse-sensor-proxy "$ALLOWED_NODES_FILE" 2>/dev/null || true else print_error "Failed to update allowed_nodes using config CLI" return 1 fi } # Installation root - writable location that works on read-only /usr systems INSTALL_ROOT="/opt/pulse/sensor-proxy" # Binaries and scripts (in writable location) BINARY_PATH="${INSTALL_ROOT}/bin/pulse-sensor-proxy" WRAPPER_SCRIPT="${INSTALL_ROOT}/bin/pulse-sensor-wrapper.sh" CLEANUP_SCRIPT_PATH="${INSTALL_ROOT}/bin/pulse-sensor-cleanup.sh" SELFHEAL_SCRIPT="${INSTALL_ROOT}/bin/pulse-sensor-proxy-selfheal.sh" STORED_INSTALLER="${INSTALL_ROOT}/install-sensor-proxy.sh" # System configuration (standard locations) SERVICE_PATH="/etc/systemd/system/pulse-sensor-proxy.service" RUNTIME_DIR="/run/pulse-sensor-proxy" SOCKET_PATH="${RUNTIME_DIR}/pulse-sensor-proxy.sock" WORK_DIR="/var/lib/pulse-sensor-proxy" SSH_DIR="${WORK_DIR}/ssh" CONFIG_DIR="/etc/pulse-sensor-proxy" CTID_FILE="${CONFIG_DIR}/ctid" CLEANUP_PATH_UNIT="/etc/systemd/system/pulse-sensor-cleanup.path" CLEANUP_SERVICE_UNIT="/etc/systemd/system/pulse-sensor-cleanup.service" CLEANUP_REQUEST_PATH="${WORK_DIR}/cleanup-request.json" SERVICE_USER="pulse-sensor-proxy" LOG_DIR="/var/log/pulse/sensor-proxy" SELFHEAL_SERVICE_UNIT="/etc/systemd/system/pulse-sensor-proxy-selfheal.service" SELFHEAL_TIMER_UNIT="/etc/systemd/system/pulse-sensor-proxy-selfheal.timer" SCRIPT_SOURCE="$(readlink -f "${BASH_SOURCE[0]:-$0}" 2>/dev/null || printf '%s' "${BASH_SOURCE[0]:-$0}")" SKIP_SELF_HEAL_SETUP="${PULSE_SENSOR_PROXY_SELFHEAL:-false}" GITHUB_REPO="rcourtman/Pulse" LATEST_RELEASE_TAG="" REQUESTED_VERSION="" INSTALLER_CACHE_REASON="" DEFER_SOCKET_VERIFICATION=false cleanup_local_authorized_keys() { local auth_keys_file="/root/.ssh/authorized_keys" if [[ ! -f "$auth_keys_file" ]]; then return fi if grep -q '# pulse-\(managed\|proxy\)-key$' "$auth_keys_file"; then if sed -i -e '/# pulse-managed-key$/d' -e '/# pulse-proxy-key$/d' "$auth_keys_file"; then print_info "Removed Pulse SSH keys from ${auth_keys_file}" else print_warn "Failed to clean Pulse SSH keys from ${auth_keys_file}" fi fi } cleanup_cluster_authorized_keys_manual() { local nodes=() if command -v pvecm >/dev/null 2>&1; then while IFS= read -r node_ip; do [[ -n "$node_ip" ]] && nodes+=("$node_ip") done < <(pvecm status 2>/dev/null | grep -vEi "qdevice" | awk '/0x[0-9a-f]+.*[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) print $i}' || true) fi if [[ ${#nodes[@]} -eq 0 ]]; then cleanup_local_authorized_keys return fi local local_ips local_ips="$(hostname -I 2>/dev/null || echo "")" local local_hostnames local_hostnames="$(hostname 2>/dev/null || echo "") $(hostname -f 2>/dev/null || echo "")" for node_ip in "${nodes[@]}"; do local is_local=false for local_ip in $local_ips; do if [[ "$node_ip" == "$local_ip" ]]; then is_local=true break fi done if [[ " $local_hostnames " == *" $node_ip "* ]]; then is_local=true fi if [[ "$node_ip" == "127.0.0.1" || "$node_ip" == "localhost" ]]; then is_local=true fi if [[ "$is_local" == true ]]; then cleanup_local_authorized_keys continue fi print_info "Removing Pulse SSH keys from node ${node_ip}" if ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 root@"$node_ip" \ "sed -i -e '/# pulse-managed-key\$/d' -e '/# pulse-proxy-key\$/d' /root/.ssh/authorized_keys" 2>/dev/null; then print_info " SSH keys cleaned on ${node_ip}" else print_warn " Unable to clean Pulse SSH keys on ${node_ip}" fi done cleanup_local_authorized_keys } determine_installer_ref() { if [[ -n "$REQUESTED_VERSION" && "$REQUESTED_VERSION" != "latest" && "$REQUESTED_VERSION" != "main" ]]; then printf '%s' "$REQUESTED_VERSION" return 0 fi if [[ -n "$LATEST_RELEASE_TAG" ]]; then printf '%s' "$LATEST_RELEASE_TAG" return 0 fi if [[ "$REQUESTED_VERSION" == "main" ]]; then printf 'main' return 0 fi printf 'main' } cache_installer_for_self_heal() { INSTALLER_CACHE_REASON="" install -d "${INSTALL_ROOT}" local source_issue="" if [[ -n "$SCRIPT_SOURCE" && -f "$SCRIPT_SOURCE" ]]; then if install -m 0755 "$SCRIPT_SOURCE" "$STORED_INSTALLER"; then return 0 fi source_issue="failed to copy ${SCRIPT_SOURCE}" else source_issue="no readable source" fi local repo="${GITHUB_REPO:-rcourtman/Pulse}" local ref ref="$(determine_installer_ref)" [[ -z "$ref" ]] && ref="main" local candidate_urls=() if [[ "$ref" != "main" ]]; then # Try specific version from releases first candidate_urls+=("https://github.com/${repo}/releases/download/${ref}/install-sensor-proxy.sh") fi # Fall back to latest release candidate_urls+=("https://github.com/${repo}/releases/latest/download/install-sensor-proxy.sh") local tmp_file tmp_file=$(mktemp) local tmp_err tmp_err=$(mktemp) local last_error="" for url in "${candidate_urls[@]}"; do if curl --fail --silent --location --connect-timeout 10 --max-time 60 "$url" -o "$tmp_file" 2>"$tmp_err"; then if install -m 0755 "$tmp_file" "$STORED_INSTALLER"; then rm -f "$tmp_file" "$tmp_err" if [[ "$QUIET" != true ]]; then print_info "Cached installer script for self-heal from ${url}" fi return 0 fi last_error="failed to write cached installer to ${STORED_INSTALLER}" break fi if [[ -s "$tmp_err" ]]; then last_error="$(cat "$tmp_err")" else last_error="HTTP error" fi : >"$tmp_err" done rm -f "$tmp_file" "$tmp_err" if [[ -n "$source_issue" && -n "$last_error" ]]; then INSTALLER_CACHE_REASON="${source_issue}; download failed (${last_error})" elif [[ -n "$source_issue" ]]; then INSTALLER_CACHE_REASON="$source_issue" elif [[ -n "$last_error" ]]; then INSTALLER_CACHE_REASON="download failed (${last_error})" else INSTALLER_CACHE_REASON="unknown failure" fi return 1 } perform_uninstall() { print_info "Starting pulse-sensor-proxy uninstall..." if command -v systemctl >/dev/null 2>&1; then print_info "Stopping pulse-sensor-proxy service" systemctl stop pulse-sensor-proxy 2>/dev/null || true print_info "Disabling pulse-sensor-proxy service" systemctl disable pulse-sensor-proxy 2>/dev/null || true print_info "Stopping cleanup path watcher" systemctl stop pulse-sensor-cleanup.path 2>/dev/null || true systemctl disable pulse-sensor-cleanup.path 2>/dev/null || true systemctl stop pulse-sensor-cleanup.service 2>/dev/null || true systemctl disable pulse-sensor-cleanup.service 2>/dev/null || true else print_warn "systemctl not available; skipping service disable" fi if [[ -x "$CLEANUP_SCRIPT_PATH" ]]; then print_info "Invoking cleanup script to remove Pulse SSH keys" mkdir -p "$WORK_DIR" cat > "$CLEANUP_REQUEST_PATH" <<'EOF' {"host":""} EOF if "$CLEANUP_SCRIPT_PATH"; then print_success "Cleanup script removed Pulse SSH keys" else print_warn "Cleanup script reported errors; attempting manual cleanup" cleanup_cluster_authorized_keys_manual fi rm -f "$CLEANUP_REQUEST_PATH" else cleanup_cluster_authorized_keys_manual fi if [[ -f "$BINARY_PATH" ]]; then rm -f "$BINARY_PATH" print_success "Removed binary ${BINARY_PATH}" else print_info "Binary already absent at ${BINARY_PATH}" fi if [[ -f "$SERVICE_PATH" ]]; then rm -f "$SERVICE_PATH" print_success "Removed service unit ${SERVICE_PATH}" fi if [[ -f "$CLEANUP_PATH_UNIT" ]]; then rm -f "$CLEANUP_PATH_UNIT" print_success "Removed cleanup path unit ${CLEANUP_PATH_UNIT}" fi if [[ -f "$CLEANUP_SERVICE_UNIT" ]]; then rm -f "$CLEANUP_SERVICE_UNIT" print_success "Removed cleanup service unit ${CLEANUP_SERVICE_UNIT}" fi if [[ -f "$SELFHEAL_TIMER_UNIT" ]]; then systemctl stop pulse-sensor-proxy-selfheal.timer 2>/dev/null || true systemctl disable pulse-sensor-proxy-selfheal.timer 2>/dev/null || true rm -f "$SELFHEAL_TIMER_UNIT" print_success "Removed self-heal timer ${SELFHEAL_TIMER_UNIT}" fi if [[ -f "$SELFHEAL_SERVICE_UNIT" ]]; then systemctl stop pulse-sensor-proxy-selfheal.service 2>/dev/null || true systemctl disable pulse-sensor-proxy-selfheal.service 2>/dev/null || true rm -f "$SELFHEAL_SERVICE_UNIT" print_success "Removed self-heal service ${SELFHEAL_SERVICE_UNIT}" fi if [[ -f "$SELFHEAL_SCRIPT" ]]; then rm -f "$SELFHEAL_SCRIPT" print_success "Removed self-heal helper ${SELFHEAL_SCRIPT}" fi if [[ -f "$STORED_INSTALLER" ]]; then rm -f "$STORED_INSTALLER" print_success "Removed cached installer ${STORED_INSTALLER}" fi if [[ -f "$CTID_FILE" ]]; then rm -f "$CTID_FILE" fi rm -f "$PENDING_CONTROL_PLANE_FILE" 2>/dev/null || true if command -v systemctl >/dev/null 2>&1; then systemctl daemon-reload 2>/dev/null || true fi rm -f "$CLEANUP_SCRIPT_PATH" "$CLEANUP_REQUEST_PATH" 2>/dev/null || true rm -f "$SOCKET_PATH" 2>/dev/null || true rm -rf "$RUNTIME_DIR" 2>/dev/null || true # Always remove HTTP secrets and TLS material (security best practice) if [[ -f "/etc/pulse-sensor-proxy/.http-auth-token" ]]; then rm -f "/etc/pulse-sensor-proxy/.http-auth-token" print_success "Removed HTTP auth token" fi if [[ -d "/etc/pulse-sensor-proxy/tls" ]]; then rm -rf "/etc/pulse-sensor-proxy/tls" print_success "Removed TLS certificates" fi # Check for and remove LXC bind mounts on any containers if command -v pct >/dev/null 2>&1; then print_info "Checking for LXC bind mounts..." # Find all containers with pulse-sensor-proxy bind mounts for ctid in $(pct list | awk 'NR>1 {print $1}'); do if grep -q "pulse-sensor-proxy" /etc/pve/lxc/${ctid}.conf 2>/dev/null; then CONTAINER_NAME=$(pct list | awk -v id="$ctid" '$1==id {print $3}') print_info "Found bind mount in container $ctid ($CONTAINER_NAME)" if sed -i '/pulse-sensor-proxy/d' /etc/pve/lxc/${ctid}.conf 2>/dev/null; then print_success "Removed bind mount from container $ctid ($CONTAINER_NAME)" print_warn "Container restart required for change to take effect" else print_warn "Failed to remove bind mount from container $ctid" fi fi done fi if [[ "$PURGE" == true ]]; then print_info "Purging Pulse sensor proxy state" rm -rf "$WORK_DIR" "$CONFIG_DIR" 2>/dev/null || true if [[ -d "$LOG_DIR" ]]; then print_info "Removing log directory ${LOG_DIR}" fi rm -rf "$LOG_DIR" 2>/dev/null || true if id -u "$SERVICE_USER" >/dev/null 2>&1; then if userdel --remove "$SERVICE_USER" 2>/dev/null; then print_success "Removed service user ${SERVICE_USER}" elif userdel "$SERVICE_USER" 2>/dev/null; then print_success "Removed service user ${SERVICE_USER}" else print_warn "Failed to remove service user ${SERVICE_USER}" fi fi if getent group "$SERVICE_USER" >/dev/null 2>&1; then if groupdel "$SERVICE_USER" 2>/dev/null; then print_success "Removed service group ${SERVICE_USER}" else print_warn "Failed to remove service group ${SERVICE_USER}" fi fi else if [[ -d "$WORK_DIR" ]]; then print_info "Preserving data directory ${WORK_DIR} (use --purge to remove)" fi if [[ -d "$CONFIG_DIR" ]]; then print_info "Preserving config directory ${CONFIG_DIR} (use --purge to remove)" fi if [[ -d "$LOG_DIR" ]]; then print_info "Preserving log directory ${LOG_DIR} (use --purge to remove)" fi fi print_success "pulse-sensor-proxy uninstall complete" } # Parse arguments first to check for standalone mode CTID="" VERSION="v4.32.0" LOCAL_BINARY="" QUIET=false PULSE_SERVER="" STANDALONE=false HTTP_MODE=false HTTP_ADDR=":8443" FALLBACK_BASE="${PULSE_SENSOR_PROXY_FALLBACK_URL:-}" SKIP_RESTART=false RESTART_PULSE=false UNINSTALL=false PURGE=false CONTROL_PLANE_TOKEN="" CONTROL_PLANE_REFRESH="" SHORT_HOSTNAME=$(hostname -s 2>/dev/null || hostname | cut -d'.' -f1) while [[ $# -gt 0 ]]; do case $1 in --ctid) CTID="$2" shift 2 ;; --version) VERSION="$2" shift 2 ;; --local-binary) LOCAL_BINARY="$2" shift 2 ;; --pulse-server) PULSE_SERVER="$2" shift 2 ;; --quiet) QUIET=true shift ;; --standalone) STANDALONE=true shift ;; --http-mode) HTTP_MODE=true shift ;; --http-addr) HTTP_ADDR="$2" shift 2 ;; --skip-restart) SKIP_RESTART=true shift ;; --restart-pulse) RESTART_PULSE=true shift ;; --uninstall) UNINSTALL=true shift ;; --purge) PURGE=true shift ;; *) print_error "Unknown option: $1" exit 1 ;; esac done if [[ "$PURGE" == true && "$UNINSTALL" != true ]]; then print_warn "--purge is only valid together with --uninstall; ignoring" PURGE=false fi if [[ "$UNINSTALL" == true ]]; then perform_uninstall exit 0 fi REQUESTED_VERSION="${VERSION:-latest}" # If --pulse-server was provided, use it as the fallback base if [[ -n "$PULSE_SERVER" ]]; then FALLBACK_BASE="${PULSE_SERVER}/api/install/pulse-sensor-proxy" fi # Preflight checks if [[ $EUID -ne 0 ]]; then print_error "This script must be run as root" print_error "Use: sudo $0 $*" exit 1 fi # Check required commands REQUIRED_CMDS="curl openssl systemctl useradd groupadd install chmod chown mkdir jq" if [[ "$HTTP_MODE" == true ]]; then REQUIRED_CMDS="$REQUIRED_CMDS hostname awk" fi if [[ "$STANDALONE" == false ]]; then REQUIRED_CMDS="$REQUIRED_CMDS pvecm" fi for cmd in $REQUIRED_CMDS; do if ! command -v "$cmd" >/dev/null 2>&1; then print_error "Required command not found: $cmd" print_error "Please install it and try again" exit 1 fi done # Check if running on Proxmox host (only required for LXC mode) if [[ "$STANDALONE" == false ]]; then if ! command -v pvecm >/dev/null 2>&1; then print_error "This script must be run on a Proxmox VE host" exit 1 fi fi # Validate arguments based on mode CONTAINER_ON_THIS_NODE=true if [[ "$STANDALONE" == false ]]; then if [[ -z "$CTID" ]]; then print_error "Missing required argument: --ctid " echo "Usage: $0 --ctid [--pulse-server ] [--version ] [--local-binary ]" echo " Or: $0 --standalone [--pulse-server ] [--version ] [--local-binary ]" echo " Or: $0 --uninstall [--purge]" exit 1 fi # Verify container exists on this node if ! pct status "$CTID" >/dev/null 2>&1; then # Container doesn't exist locally - might be on another cluster node # Continue installation for host temperature monitoring, skip container-specific config print_warn "Container $CTID does not exist on this node" print_warn "Will install sensor-proxy for host temperature monitoring only" print_warn "Container-specific socket mount configuration will be skipped" CONTAINER_ON_THIS_NODE=false fi fi if [[ "$STANDALONE" == true ]]; then print_info "Installing pulse-sensor-proxy for standalone/Docker deployment" elif [[ "$CONTAINER_ON_THIS_NODE" == true ]]; then print_info "Installing pulse-sensor-proxy for container $CTID" else print_info "Installing pulse-sensor-proxy for host monitoring (container $CTID on another node)" fi # Create dedicated service account if it doesn't exist if ! id -u pulse-sensor-proxy >/dev/null 2>&1; then print_info "Creating pulse-sensor-proxy service account..." useradd --system --user-group --no-create-home --shell /usr/sbin/nologin pulse-sensor-proxy print_info "Service account created" fi # Ensure group exists (in case user was created without it) if ! getent group pulse-sensor-proxy >/dev/null 2>&1; then print_info "Creating pulse-sensor-proxy group..." groupadd --system pulse-sensor-proxy usermod -aG pulse-sensor-proxy pulse-sensor-proxy fi # Add pulse-sensor-proxy user to www-data group for Proxmox IPC access (pvecm commands) if ! groups pulse-sensor-proxy | grep -q '\bwww-data\b'; then print_info "Adding pulse-sensor-proxy to www-data group for Proxmox IPC access..." usermod -aG www-data pulse-sensor-proxy fi # Create installation directories before binary installation (handles fresh installs and upgrades) print_info "Setting up installation directories..." install -d -o root -g root -m 0755 "${INSTALL_ROOT}" install -d -o root -g root -m 0755 "${INSTALL_ROOT}/bin" # Install binary - either from local file or download from GitHub if [[ -n "$LOCAL_BINARY" ]]; then # Use local binary for testing print_info "Using local binary: $LOCAL_BINARY" if [[ ! -f "$LOCAL_BINARY" ]]; then print_error "Local binary not found: $LOCAL_BINARY" exit 1 fi cp "$LOCAL_BINARY" "$BINARY_PATH" chmod +x "$BINARY_PATH" print_info "Binary installed to $BINARY_PATH" else # Detect architecture ARCH=$(uname -m) case $ARCH in x86_64) BINARY_NAME="pulse-sensor-proxy-linux-amd64" ARCH_LABEL="linux-amd64" ;; aarch64|arm64) BINARY_NAME="pulse-sensor-proxy-linux-arm64" ARCH_LABEL="linux-arm64" ;; armv7l|armhf) BINARY_NAME="pulse-sensor-proxy-linux-armv7" ARCH_LABEL="linux-armv7" ;; armv6l) BINARY_NAME="pulse-sensor-proxy-linux-armv6" ARCH_LABEL="linux-armv6" ;; i386|i686) BINARY_NAME="pulse-sensor-proxy-linux-386" ARCH_LABEL="linux-386" ;; *) print_error "Unsupported architecture: $ARCH" exit 1 ;; esac DOWNLOAD_SUCCESS=false ATTEMPTED_SOURCES=() fetch_latest_release_tag() { local api_url="https://api.github.com/repos/$GITHUB_REPO/releases?per_page=25" local tmp_err tmp_err=$(mktemp) local response response=$(curl --fail --silent --location --connect-timeout 10 --max-time 30 "$api_url" 2>"$tmp_err") local status=$? if [[ $status -ne 0 ]]; then if [[ -s "$tmp_err" ]]; then print_warn "Failed to resolve latest GitHub release: $(cat "$tmp_err")" else print_warn "Failed to resolve latest GitHub release (HTTP $status)" fi rm -f "$tmp_err" return 1 fi rm -f "$tmp_err" local tag="" if command -v python3 >/dev/null 2>&1; then if ! tag=$(printf '%s' "$response" | python3 -c ' import json import sys binary_name = sys.argv[1] arch_label = sys.argv[2] tar_suffix = arch_label or "" def has_sensor_assets(tag, assets): names = {asset.get("name") for asset in assets if isinstance(asset, dict) and asset.get("name")} if binary_name and binary_name in names: return True if tar_suffix: tarball = f"pulse-{tag}-{tar_suffix}.tar.gz" if tarball in names or f"{tarball}.sha256" in names: return True universal = f"pulse-{tag}.tar.gz" if universal in names or f"{universal}.sha256" in names: return True return False try: releases = json.load(sys.stdin) except json.JSONDecodeError: sys.exit(1) for release in releases: tag_name = (release.get("tag_name") or "").strip() if not tag_name or tag_name.startswith("helm-chart"): continue assets = release.get("assets") or [] if has_sensor_assets(tag_name, assets): sys.stdout.write(tag_name) sys.exit(0) sys.exit(0) ' "$BINARY_NAME" "$ARCH_LABEL"); then print_warn "Failed to parse GitHub releases via python3; falling back to heuristic tag detection" tag="" fi fi if [[ -z "$tag" ]]; then tag=$(printf '%s\n' "$response" | grep -o '"tag_name"[[:space:]]*:[[:space:]]*"[^"]*"' | cut -d'"' -f4 | grep -Ev '^helm-chart-' | head -n 1 || true) fi if [[ -n "$tag" ]]; then tag="${tag%%$'\n'*}" fi if [[ -z "$tag" ]]; then print_warn "Could not determine latest GitHub release for pulse-sensor-proxy" return 1 fi LATEST_RELEASE_TAG="$tag" return 0 } attempt_github_asset_or_tarball() { local tag="$1" [[ -z "$tag" ]] && return 1 local asset_url="https://github.com/$GITHUB_REPO/releases/download/${tag}/${BINARY_NAME}" ATTEMPTED_SOURCES+=("GitHub release asset ${tag}") print_info "Downloading $BINARY_NAME from GitHub release ${tag}..." local tmp_err tmp_err=$(mktemp) if curl --fail --silent --location --connect-timeout 10 --max-time 120 "$asset_url" -o "$BINARY_PATH.tmp" 2>"$tmp_err"; then rm -f "$tmp_err" DOWNLOAD_SUCCESS=true return 0 fi local asset_error="" if [[ -s "$tmp_err" ]]; then asset_error="$(cat "$tmp_err")" fi rm -f "$tmp_err" rm -f "$BINARY_PATH.tmp" 2>/dev/null || true local tarball_name="pulse-${tag}-linux-${ARCH_LABEL#linux-}.tar.gz" local tarball_url="https://github.com/$GITHUB_REPO/releases/download/${tag}/${tarball_name}" ATTEMPTED_SOURCES+=("GitHub release tarball ${tarball_name}") print_info "Downloading ${tarball_name} to extract pulse-sensor-proxy..." tmp_err=$(mktemp) local tarball_tmp tarball_tmp=$(mktemp) if curl --fail --silent --location --connect-timeout 10 --max-time 240 "$tarball_url" -o "$tarball_tmp" 2>"$tmp_err"; then if tar -tzf "$tarball_tmp" >/dev/null 2>&1 && tar -xzf "$tarball_tmp" -C "$(dirname "$tarball_tmp")" ./bin/pulse-sensor-proxy >/dev/null 2>&1; then mv "$(dirname "$tarball_tmp")/bin/pulse-sensor-proxy" "$BINARY_PATH.tmp" rm -f "$tarball_tmp" "$tmp_err" DOWNLOAD_SUCCESS=true return 0 else print_warn "Release tarball did not contain expected ./bin/pulse-sensor-proxy" fi else if [[ -s "$tmp_err" ]]; then print_warn "Tarball download failed: $(cat "$tmp_err")" else print_warn "Tarball download failed (HTTP error)" fi fi rm -f "$tarball_tmp" "$tmp_err" if [[ -n "$asset_error" ]]; then print_warn "GitHub release asset error: $asset_error" fi return 1 } if [[ "$REQUESTED_VERSION" == "latest" || "$REQUESTED_VERSION" == "main" || -z "$REQUESTED_VERSION" ]]; then if fetch_latest_release_tag; then attempt_github_asset_or_tarball "$LATEST_RELEASE_TAG" || true fi else attempt_github_asset_or_tarball "$REQUESTED_VERSION" || true fi if [[ "$DOWNLOAD_SUCCESS" != true ]] && [[ -n "$FALLBACK_BASE" ]]; then fallback_url="$FALLBACK_BASE" if [[ "$fallback_url" == *"?"* ]]; then fallback_url="$fallback_url" elif [[ "$fallback_url" == *"pulse-sensor-proxy-"* ]]; then fallback_url="${fallback_url}" else fallback_url="${fallback_url%/}?arch=${ARCH_LABEL}" fi ATTEMPTED_SOURCES+=("Fallback ${fallback_url}") print_info "Downloading $BINARY_NAME from fallback source..." fallback_err=$(mktemp) if curl --fail --silent --location --connect-timeout 10 --max-time 120 "$fallback_url" -o "$BINARY_PATH.tmp" 2>"$fallback_err"; then rm -f "$fallback_err" DOWNLOAD_SUCCESS=true else if [[ -s "$fallback_err" ]]; then print_error "Fallback download failed: $(cat "$fallback_err")" else print_error "Fallback download failed (HTTP error)" fi rm -f "$fallback_err" rm -f "$BINARY_PATH.tmp" 2>/dev/null || true fi fi if [[ "$DOWNLOAD_SUCCESS" != true ]] && [[ -n "$CTID" ]] && command -v pct >/dev/null 2>&1; then pull_targets=( "/opt/pulse/bin/${BINARY_NAME}" "/opt/pulse/bin/pulse-sensor-proxy" ) for src in "${pull_targets[@]}"; do tmp_pull=$(mktemp) if pct pull "$CTID" "$src" "$tmp_pull" >/dev/null 2>&1; then mv "$tmp_pull" "$BINARY_PATH.tmp" print_info "Copied pulse-sensor-proxy binary from container $CTID ($src)" DOWNLOAD_SUCCESS=true break fi rm -f "$tmp_pull" done fi if [[ "$DOWNLOAD_SUCCESS" != true ]]; then print_error "Unable to download pulse-sensor-proxy binary." if [[ ${#ATTEMPTED_SOURCES[@]} -gt 0 ]]; then print_error "Sources attempted:" for src in "${ATTEMPTED_SOURCES[@]}"; do print_error " - $src" done fi print_error "Publish a GitHub release with binary assets or ensure a Pulse server is reachable." exit 1 fi chmod +x "$BINARY_PATH.tmp" mv "$BINARY_PATH.tmp" "$BINARY_PATH" print_info "Binary installed to $BINARY_PATH" fi # Create remaining directories with proper ownership (handles fresh installs and upgrades) print_info "Setting up service directories with proper ownership..." if ! install -d -o pulse-sensor-proxy -g pulse-sensor-proxy -m 0750 /var/lib/pulse-sensor-proxy; then print_error "Failed to create /var/lib/pulse-sensor-proxy" exit 1 fi if ! install -d -o pulse-sensor-proxy -g pulse-sensor-proxy -m 0700 "$SSH_DIR"; then print_error "Failed to create $SSH_DIR" exit 1 fi if ! install -m 0600 -o pulse-sensor-proxy -g pulse-sensor-proxy /dev/null "$SSH_DIR/known_hosts"; then print_error "Failed to create $SSH_DIR/known_hosts" exit 1 fi if ! install -d -o pulse-sensor-proxy -g pulse-sensor-proxy -m 0755 /etc/pulse-sensor-proxy; then print_error "Failed to create /etc/pulse-sensor-proxy" exit 1 fi if [[ -n "$CTID" ]]; then echo "$CTID" > "$CTID_FILE" chmod 0644 "$CTID_FILE" fi # HTTP Mode Setup Functions setup_tls_certificates() { local cert_path="$1" local key_path="$2" # Create TLS directory install -d -o root -g pulse-sensor-proxy -m 0750 /etc/pulse-sensor-proxy/tls if [[ -n "$cert_path" && -n "$key_path" ]]; then # Use provided certificates print_info "Using provided TLS certificates..." cp "$cert_path" /etc/pulse-sensor-proxy/tls/server.crt cp "$key_path" /etc/pulse-sensor-proxy/tls/server.key chmod 640 /etc/pulse-sensor-proxy/tls/server.crt chmod 640 /etc/pulse-sensor-proxy/tls/server.key chown root:pulse-sensor-proxy /etc/pulse-sensor-proxy/tls/server.crt chown root:pulse-sensor-proxy /etc/pulse-sensor-proxy/tls/server.key else # Generate self-signed certificate print_info "Generating self-signed TLS certificate..." # Get hostname and IPs for SAN HOSTNAME=$(hostname -f 2>/dev/null || hostname) IP_ADDRESSES=$(hostname -I 2>/dev/null | tr ' ' '\n' | grep -v '^$' | head -5) # Build SAN list SAN="DNS:${HOSTNAME},DNS:localhost" for ip in $IP_ADDRESSES; do SAN="${SAN},IP:${ip}" done # Generate 4096-bit RSA key and self-signed cert valid for 10 years openssl req -newkey rsa:4096 -nodes -x509 -days 3650 \ -subj "/CN=${HOSTNAME}/O=Pulse Sensor Proxy" \ -addext "subjectAltName=${SAN}" \ -keyout /etc/pulse-sensor-proxy/tls/server.key \ -out /etc/pulse-sensor-proxy/tls/server.crt \ 2>/dev/null || { print_error "Failed to generate TLS certificate" exit 1 } chmod 640 /etc/pulse-sensor-proxy/tls/server.key chmod 640 /etc/pulse-sensor-proxy/tls/server.crt chown root:pulse-sensor-proxy /etc/pulse-sensor-proxy/tls/server.key chown root:pulse-sensor-proxy /etc/pulse-sensor-proxy/tls/server.crt # Log certificate fingerprint for audit CERT_FINGERPRINT=$(openssl x509 -in /etc/pulse-sensor-proxy/tls/server.crt -noout -fingerprint -sha256 2>/dev/null | cut -d= -f2) print_success "TLS certificate generated (SHA256: ${CERT_FINGERPRINT})" fi } register_with_pulse() { local pulse_url="$1" local hostname="$2" local proxy_url="$3" local mode="${4:-}" if [[ -z "$mode" ]]; then mode="socket" fi # Output to stderr so it doesn't interfere with command substitution print_info "Registering temperature proxy with Pulse at $pulse_url..." >&2 # Build registration request with retry logic local response body local http_code local attempt local max_attempts=3 local register_url="${pulse_url}/api/temperature-proxy/register" for attempt in $(seq 1 $max_attempts); do if [[ $attempt -gt 1 ]]; then print_info "Retry attempt $attempt/$max_attempts..." >&2 sleep 2 fi response=$(curl -w "\n%{http_code}" -sS -X POST \ -H "Content-Type: application/json" \ -d "{\"hostname\":\"${hostname}\",\"proxy_url\":\"${proxy_url}\",\"mode\":\"${mode}\"}" \ "$register_url") local curl_exit=$? http_code=$(echo "$response" | tail -1) body=$(echo "$response" | head -n -1) # Retry network errors if [[ $curl_exit -ne 0 && -z "$http_code" ]]; then continue fi if [[ "$http_code" =~ ^20 ]]; then print_success "Registered successfully" >&2 echo "$body" return 0 fi if [[ "$http_code" == "404" && "$body" == *'"pve_instance_not_found"'* ]]; then print_warn "Pulse has not been configured with a Proxmox instance named '$hostname' yet." >&2 print_warn "Add the node in Pulse (Settings → Nodes) and re-run the sensor proxy installer to enable control-plane sync." >&2 return 0 fi if [[ "$http_code" == "400" && "$body" == *'"missing_proxy_url"'* && "$mode" != "http" ]]; then if [[ $attempt -lt $max_attempts ]]; then print_warn "Pulse reported node '$hostname' is not ready yet; retrying..." >&2 sleep 2 continue fi print_warn "Pulse refused proxy registration because the node '$hostname' hasn't been added yet." >&2 print_warn "Control-plane sync will be deferred until the node exists in Pulse; temperature proxy will run with a local allow list." >&2 return 0 fi if [[ $attempt -eq $max_attempts ]]; then print_error "Failed to register with Pulse API after $max_attempts attempts" >&2 print_error "" >&2 print_error "═══════════════════════════════════════════════════════" >&2 print_error "Registration Details:" >&2 print_error "═══════════════════════════════════════════════════════" >&2 print_error "URL: $register_url" >&2 print_error "HTTP Code: $http_code" >&2 print_error "Hostname: $hostname" >&2 print_error "Proxy URL: $proxy_url" >&2 print_error "Response: $body" >&2 print_error "" >&2 print_error "Troubleshooting:" >&2 print_error " 1. Ensure this PVE instance is added to Pulse first" >&2 print_error " 2. Verify hostname matches instance name: $hostname" >&2 print_error " 3. Check Pulse logs: docker logs pulse | tail -50" >&2 print_error " 4. Test registration manually:" >&2 print_error " curl -X POST -H 'Content-Type: application/json' \\" >&2 print_error " -d '{\"hostname\":\"${hostname}\",\"proxy_url\":\"${proxy_url}\"}' \\" >&2 print_error " $register_url" >&2 return 1 fi done return 1 } write_control_plane_token() { local token="$1" if [[ -z "$token" ]]; then return fi print_info "Writing control plane token..." echo "$token" > /etc/pulse-sensor-proxy/.pulse-control-token chmod 600 /etc/pulse-sensor-proxy/.pulse-control-token chown pulse-sensor-proxy:pulse-sensor-proxy /etc/pulse-sensor-proxy/.pulse-control-token } ensure_control_plane_config() { local pulse_url="$1" local refresh="$2" local config_file="/etc/pulse-sensor-proxy/config.yaml" if [[ -z "$pulse_url" ]]; then return fi if [[ -z "$refresh" ]]; then refresh=60 fi # Use robust binary config management if available if config_command_supported "set-control-plane" "--url" "--token-file" "--refresh"; then if "$BINARY_PATH" config set-control-plane --url "$pulse_url" --token-file "/etc/pulse-sensor-proxy/.pulse-control-token" --refresh "$refresh" --config "$config_file"; then chown pulse-sensor-proxy:pulse-sensor-proxy "$config_file" chmod 0644 "$config_file" return else print_warn "Failed to set control plane using binary; falling back to legacy method" fi fi if grep -q "^pulse_control_plane:" "$config_file" 2>/dev/null; then # Re-write the existing control-plane block with the latest URL/token path. local tmp tmp=$(mktemp) awk -v url="$pulse_url" -v refresh="$refresh" ' BEGIN { in_block = 0 } /^pulse_control_plane:/ { print "pulse_control_plane:" print " url: " url print " token_file: /etc/pulse-sensor-proxy/.pulse-control-token" print " refresh_interval: " refresh print "" in_block = 1 next } # Exit the replacement block when we hit a non-indented line in_block && /^[^[:space:]]/ { in_block = 0 } in_block { next } { print } ' "$config_file" > "$tmp" mv "$tmp" "$config_file" chown pulse-sensor-proxy:pulse-sensor-proxy "$config_file" chmod 0644 "$config_file" return fi cat >> "$config_file" << EOF # Pulse control plane configuration (auto-generated) pulse_control_plane: url: "$pulse_url" token_file: "/etc/pulse-sensor-proxy/.pulse-control-token" refresh_interval: $refresh EOF } declare -a CONTROL_PLANE_ALLOWED_NODE_LIST=() apply_allowed_nodes_from_response() { local response="$1" if [[ -z "$response" ]]; then return fi if ! command -v python3 >/dev/null 2>&1; then return fi local parsed_nodes parsed_nodes=$(printf '%s' "$response" | python3 -c ' import json, sys try: payload = json.load(sys.stdin) except Exception: payload = {} nodes = payload.get("allowed_nodes") or [] for entry in nodes: ip = entry.get("ip") or "" name = entry.get("name") or "" value = (ip or name).strip() if value: print(value) ') if [[ -z "$parsed_nodes" ]]; then return fi mapfile -t __allowed_nodes <<<"$parsed_nodes" if [[ ${#__allowed_nodes[@]} -gt 0 ]]; then CONTROL_PLANE_ALLOWED_NODE_LIST=("${__allowed_nodes[@]}") fi } determine_allowlist_mode # Migrate any existing inline allowed_nodes to file (Phase 1 hotfix for config corruption) migrate_inline_allowed_nodes_to_file cleanup_inline_allowed_nodes # Create base config file if it doesn't exist if [[ ! -f /etc/pulse-sensor-proxy/config.yaml ]]; then print_info "Creating base configuration file..." cat > /etc/pulse-sensor-proxy/config.yaml << 'EOF' # Pulse Temperature Proxy Configuration allowed_peer_uids: [1000] # Allow ID-mapped root (LXC containers with sub-UID mapping) allow_idmapped_root: true allowed_idmap_users: - root metrics_address: "127.0.0.1:9127" rate_limit: per_peer_interval_ms: 333 per_peer_burst: 10 EOF chown pulse-sensor-proxy:pulse-sensor-proxy /etc/pulse-sensor-proxy/config.yaml chmod 0644 /etc/pulse-sensor-proxy/config.yaml fi # Phase 2: Migration handled by update_allowed_nodes() -> config migrate-to-file # No need to call ensure_allowed_nodes_file_reference anymore # Register socket-mode proxy with Pulse if server provided if [[ "$HTTP_MODE" != true ]]; then if [[ -z "$PULSE_SERVER" ]]; then print_warn "PULSE_SERVER not provided; control plane sync disabled. Temperatures will only work on this host." else print_info "Registering socket proxy with Pulse server ${PULSE_SERVER}..." registration_response=$(register_with_pulse "$PULSE_SERVER" "$SHORT_HOSTNAME" "" "socket") if [[ $? -eq 0 && -n "$registration_response" ]]; then CONTROL_PLANE_TOKEN=$(echo "$registration_response" | grep -o '"control_token":"[^"]*"' | head -1 | cut -d'"' -f4) CONTROL_PLANE_REFRESH=$(echo "$registration_response" | grep -o '"refresh_interval":[0-9]*' | head -1 | awk -F: '{print $2}') if [[ -z "$CONTROL_PLANE_REFRESH" ]]; then CONTROL_PLANE_REFRESH="60" fi apply_allowed_nodes_from_response "$registration_response" clear_pending_control_plane else print_warn "Failed to register socket proxy with Pulse; continuing without control plane sync" record_pending_control_plane "socket" fi fi fi # HTTP Mode Configuration if [[ "$HTTP_MODE" == true ]]; then echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo " HTTP Mode Setup (External PVE Host)" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "" # Validate required parameters if [[ -z "$PULSE_SERVER" ]]; then print_error "HTTP mode requires --pulse-server parameter" print_error "Example: --pulse-server https://pulse.example.com:7655" exit 1 fi # Test Pulse server reachability before proceeding print_info "Testing connection to Pulse server..." if ! curl -f -s -m 5 "${PULSE_SERVER}/api/health" >/dev/null 2>&1; then print_error "Cannot reach Pulse server at: $PULSE_SERVER" print_error "" print_error "Troubleshooting:" print_error " 1. Verify Pulse is running: docker ps | grep pulse" print_error " 2. Check URL is correct (include protocol and port)" print_error " 3. Test connectivity: curl -v ${PULSE_SERVER}/api/health" print_error " 4. Check firewall allows access from this host" print_error "" print_error "Installation aborted to prevent incomplete setup" exit 1 fi print_success "Pulse server is reachable" # Check if port is already in use PORT_NUMBER="${HTTP_ADDR#:}" if ss -ltn | grep -q ":${PORT_NUMBER} "; then # Port is in use - check if it's our own service (refresh scenario) if systemctl is-active --quiet pulse-sensor-proxy 2>/dev/null; then # Check if the process using the port is pulse-sensor-proxy PORT_OWNER=$(ss -ltnp | grep ":${PORT_NUMBER} " | grep -o 'pulse-sensor-pr' || true) if [[ -n "$PORT_OWNER" ]]; then # Our service is using the port - this is a refresh, continue print_info "Existing pulse-sensor-proxy detected on port ${PORT_NUMBER} - will refresh configuration" else # Service is active but something else is using the port print_error "Port ${PORT_NUMBER} is already in use by another process" print_error "" print_error "Currently using port ${PORT_NUMBER}:" ss -ltnp | grep ":${PORT_NUMBER} " || true exit 1 fi else # Service not active, port conflict with something else print_error "Port ${PORT_NUMBER} is already in use" print_error "" print_error "Currently using port ${PORT_NUMBER}:" ss -ltnp | grep ":${PORT_NUMBER} " || true print_error "" print_error "Options:" print_error " 1. Stop the conflicting service" print_error " 2. Use a different port: --http-addr :PORT" print_error " 3. If this is a previous sensor-proxy, uninstall first:" print_error " $0 --uninstall" exit 1 fi fi # Setup TLS certificates setup_tls_certificates "" "" # Empty params = auto-generate # Determine proxy URL - use IP address for reliable network access PRIMARY_IP=$(hostname -I 2>/dev/null | awk '{print $1}') if [[ -z "$PRIMARY_IP" ]]; then print_error "Failed to determine primary IP address" print_error "Use --proxy-url to specify manually" exit 1 fi # Validate it's an IPv4 address (not IPv6) if [[ ! "$PRIMARY_IP" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then print_warn "Primary IP appears to be IPv6 or invalid: $PRIMARY_IP" print_warn "Attempting to find first IPv4 address..." PRIMARY_IP=$(hostname -I 2>/dev/null | tr ' ' '\n' | grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' | head -1) if [[ -z "$PRIMARY_IP" ]]; then print_error "No IPv4 address found" print_error "Use --proxy-url https://YOUR_IP${HTTP_ADDR} to specify manually" exit 1 fi print_info "Using IPv4 address: $PRIMARY_IP" fi # Warn if using loopback if [[ "$PRIMARY_IP" == "127.0.0.1" ]]; then print_warn "Primary IP is loopback (127.0.0.1)" print_warn "Pulse will not be able to reach this proxy!" print_error "Use --proxy-url https://YOUR_REAL_IP${HTTP_ADDR} to specify a reachable address" exit 1 fi PROXY_URL="https://${PRIMARY_IP}${HTTP_ADDR}" print_info "Proxy will be accessible at: $PROXY_URL" # Register with Pulse and get auth/control tokens registration_response=$(register_with_pulse "$PULSE_SERVER" "$SHORT_HOSTNAME" "$PROXY_URL" "http") if [[ $? -ne 0 || -z "$registration_response" ]]; then print_error "Failed to register with Pulse - aborting installation" print_error "Fix the issue and re-run the installer" record_pending_control_plane "http" exit 1 fi HTTP_AUTH_TOKEN=$(echo "$registration_response" | grep -o '"token":"[^"]*"' | head -1 | cut -d'"' -f4) CONTROL_PLANE_TOKEN=$(echo "$registration_response" | grep -o '"control_token":"[^"]*"' | head -1 | cut -d'"' -f4) CONTROL_PLANE_REFRESH=$(echo "$registration_response" | grep -o '"refresh_interval":[0-9]*' | head -1 | awk -F: '{print $2}') if [[ -z "$CONTROL_PLANE_REFRESH" ]]; then CONTROL_PLANE_REFRESH="60" fi apply_allowed_nodes_from_response "$registration_response" clear_pending_control_plane if [[ -z "$HTTP_AUTH_TOKEN" ]]; then print_error "Registration succeeded but Pulse did not return an auth token" print_error "Response: $registration_response" exit 1 fi echo "$HTTP_AUTH_TOKEN" > /etc/pulse-sensor-proxy/.http-auth-token chmod 600 /etc/pulse-sensor-proxy/.http-auth-token chown pulse-sensor-proxy:pulse-sensor-proxy /etc/pulse-sensor-proxy/.http-auth-token # Backup config and token files before modifying if [[ -f "$CONFIG_FILE" ]]; then BACKUP_TIMESTAMP="$(date +%s)" BACKUP_CONFIG="${CONFIG_FILE}.backup.$BACKUP_TIMESTAMP" cp "$CONFIG_FILE" "$BACKUP_CONFIG" print_info "Config backed up to: $BACKUP_CONFIG" # Also backup token files so rollback restores matching secrets if [[ -f /etc/pulse-sensor-proxy/.pulse-control-token ]]; then BACKUP_CONTROL_TOKEN="/etc/pulse-sensor-proxy/.pulse-control-token.backup.$BACKUP_TIMESTAMP" cp /etc/pulse-sensor-proxy/.pulse-control-token "$BACKUP_CONTROL_TOKEN" fi # Remove any existing HTTP configuration to prevent duplicates if grep -q "^# HTTP Mode Configuration" "$CONFIG_FILE"; then print_info "Removing existing HTTP configuration..." # Remove from "# HTTP Mode Configuration" to end of file sed -i '/^# HTTP Mode Configuration/,$ d' "$CONFIG_FILE" fi fi # Extract Pulse server IP/hostname for allowed_source_subnets # Remove protocol and port to get just the host PULSE_HOST=$(echo "$PULSE_SERVER" | sed -E 's#^https?://##' | sed -E 's#:[0-9]+$##') # Try to resolve to IP if it's a hostname PULSE_IP=$(getent hosts "$PULSE_HOST" 2>/dev/null | awk '{print $1; exit}') if [[ -z "$PULSE_IP" ]]; then # Fallback: assume PULSE_HOST is already an IP or use it as-is PULSE_IP="$PULSE_HOST" fi print_info "Pulse server detected at: $PULSE_IP" HTTP_ALLOWED_SUBNETS=() PULSE_HTTP_SUBNET="$(format_ip_to_cidr "$PULSE_IP")" LOCAL_HTTP_SUBNET="$(format_ip_to_cidr "$PRIMARY_IP")" LOOPBACK_HTTP_SUBNET="127.0.0.1/32" [[ -n "$PULSE_HTTP_SUBNET" ]] && HTTP_ALLOWED_SUBNETS+=("$PULSE_HTTP_SUBNET") HTTP_ALLOWED_SUBNETS+=("$LOOPBACK_HTTP_SUBNET") [[ -n "$LOCAL_HTTP_SUBNET" ]] && HTTP_ALLOWED_SUBNETS+=("$LOCAL_HTTP_SUBNET") declare -A HTTP_SUBNET_SEEN=() deduped_http_subnets=() for subnet in "${HTTP_ALLOWED_SUBNETS[@]}"; do [[ -z "$subnet" ]] && continue if [[ -z "${HTTP_SUBNET_SEEN[$subnet]+x}" ]]; then HTTP_SUBNET_SEEN[$subnet]=1 deduped_http_subnets+=("$subnet") fi done HTTP_ALLOWED_SUBNETS=("${deduped_http_subnets[@]}") # Configure HTTP mode - check if already configured to avoid duplicates print_info "Configuring HTTP mode..." if config_command_supported "set-http" "--enabled" "--listen-addr" "--auth-token" "--tls-cert" "--tls-key"; then if "$BINARY_PATH" config set-http \ --enabled=true \ --listen-addr="$HTTP_ADDR" \ --auth-token="$HTTP_AUTH_TOKEN" \ --tls-cert="/etc/pulse-sensor-proxy/tls/server.crt" \ --tls-key="/etc/pulse-sensor-proxy/tls/server.key" \ --config="$CONFIG_FILE"; then for subnet in "${HTTP_ALLOWED_SUBNETS[@]}"; do ensure_allowed_source_subnet "$subnet" done print_info "HTTP mode configured successfully (using binary)" else print_warn "Failed to set HTTP config using binary; falling back to legacy method" # Fallback to legacy logic if grep -q "^http_enabled:" "$CONFIG_FILE" 2>/dev/null; then sed -i "s|^http_auth_token:.*|http_auth_token: $HTTP_AUTH_TOKEN|" "$CONFIG_FILE" for subnet in "${HTTP_ALLOWED_SUBNETS[@]}"; do ensure_allowed_source_subnet "$subnet" done print_info "Updated HTTP auth token (existing HTTP mode configuration kept)" else cat >> "$CONFIG_FILE" << EOF # HTTP Mode Configuration (External PVE Host) http_enabled: true http_listen_addr: "$HTTP_ADDR" http_tls_cert: /etc/pulse-sensor-proxy/tls/server.crt http_tls_key: /etc/pulse-sensor-proxy/tls/server.key http_auth_token: "$HTTP_AUTH_TOKEN" # Allow HTTP connections from Pulse server, localhost, and this host allowed_source_subnets: EOF for subnet in "${HTTP_ALLOWED_SUBNETS[@]}"; do echo " - $subnet" >> "$CONFIG_FILE" done fi fi else if grep -q "^http_enabled:" "$CONFIG_FILE" 2>/dev/null; then # HTTP mode already configured - only update the token (avoid duplicates) sed -i "s|^http_auth_token:.*|http_auth_token: $HTTP_AUTH_TOKEN|" "$CONFIG_FILE" for subnet in "${HTTP_ALLOWED_SUBNETS[@]}"; do ensure_allowed_source_subnet "$subnet" done print_info "Updated HTTP auth token (existing HTTP mode configuration kept)" else # Fresh HTTP mode configuration - append to file cat >> "$CONFIG_FILE" << EOF # HTTP Mode Configuration (External PVE Host) http_enabled: true http_listen_addr: "$HTTP_ADDR" http_tls_cert: /etc/pulse-sensor-proxy/tls/server.crt http_tls_key: /etc/pulse-sensor-proxy/tls/server.key http_auth_token: "$HTTP_AUTH_TOKEN" # Allow HTTP connections from Pulse server, localhost, and this host allowed_source_subnets: EOF for subnet in "${HTTP_ALLOWED_SUBNETS[@]}"; do echo " - $subnet" >> "$CONFIG_FILE" done fi fi chown pulse-sensor-proxy:pulse-sensor-proxy "$CONFIG_FILE" chmod 0644 "$CONFIG_FILE" print_success "HTTP mode configured successfully" echo "" print_info "Firewall configuration required:" print_info " Allow inbound TCP connections on port ${HTTP_ADDR#:} from Pulse server" print_info " Command: ufw allow from to any port ${HTTP_ADDR#:}" echo "" fi if [[ -n "$CONTROL_PLANE_TOKEN" && -n "$PULSE_SERVER" ]]; then write_control_plane_token "$CONTROL_PLANE_TOKEN" ensure_control_plane_config "$PULSE_SERVER" "$CONTROL_PLANE_REFRESH" fi # Stop existing service if running (for upgrades) if systemctl is-active --quiet pulse-sensor-proxy 2>/dev/null; then print_info "Stopping existing service for upgrade..." # Tolerate timeout from slow HTTPS shutdown (can take 30s) systemctl stop pulse-sensor-proxy || true # Clear any failed state from the stop systemctl reset-failed pulse-sensor-proxy 2>/dev/null || true fi # Install hardened systemd service print_info "Installing hardened systemd service..." # Generate service file based on mode (Proxmox vs standalone) if [[ "$STANDALONE" == true ]]; then # Standalone/Docker mode - no Proxmox-specific paths cat > "$SERVICE_PATH" < "$SERVICE_PATH" </dev/null || true if systemctl start pulse-sensor-proxy.service; then print_success "Service restarted with previous configuration" print_error "" print_error "HTTP mode installation failed but previous config restored" print_error "Temperature monitoring should still work via Unix socket" print_error "Review the error above and fix before retrying" exit 1 else print_error "Rollback failed - service won't start even with old config" fi else print_error "Failed to restore config from backup" fi fi print_error "═══════════════════════════════════════════════════════" print_error "Service Status:" print_error "═══════════════════════════════════════════════════════" systemctl status pulse-sensor-proxy --no-pager --lines=0 2>&1 || true print_error "" print_error "═══════════════════════════════════════════════════════" print_error "Recent Logs (last 40 lines):" print_error "═══════════════════════════════════════════════════════" journalctl -u pulse-sensor-proxy -n 40 --no-pager 2>&1 || true print_error "" print_error "═══════════════════════════════════════════════════════" print_error "Common Issues:" print_error "═══════════════════════════════════════════════════════" print_error "1. Missing user: Run 'useradd --system --no-create-home --group pulse-sensor-proxy'" print_error "2. Permission errors: Check ownership of /var/lib/pulse-sensor-proxy" print_error "3. lm-sensors not installed: Run 'apt-get install lm-sensors && sensors-detect --auto'" print_error "4. Standalone node detection: If you see 'pvecm' errors, this is expected for non-clustered hosts" print_error "5. Port already in use: Check 'ss -tlnp | grep ${HTTP_ADDR#:}'" print_error "" print_error "For more help: https://github.com/rcourtman/Pulse/blob/main/docs/TROUBLESHOOTING.md" exit 1 fi # Wait for socket to appear print_info "Waiting for socket..." for i in {1..10}; do if [[ -S "$SOCKET_PATH" ]]; then break fi sleep 1 done if [[ ! -S "$SOCKET_PATH" ]]; then print_error "Socket did not appear at $SOCKET_PATH after 10 seconds" print_error "" print_error "═══════════════════════════════════════════════════════" print_error "Diagnostics:" print_error "═══════════════════════════════════════════════════════" print_error "Service Status:" systemctl status pulse-sensor-proxy --no-pager 2>&1 || true print_error "" print_error "Socket Directory Permissions:" ls -la /run/pulse-sensor-proxy/ 2>&1 || echo "Directory does not exist" print_error "" print_error "Recent Logs:" journalctl -u pulse-sensor-proxy -n 20 --no-pager 2>&1 || true print_error "" print_error "Common Causes:" print_error " • Service failed to start (check logs above)" print_error " • RuntimeDirectory permissions issue" print_error " • Systemd socket creation failed" print_error "" print_error "Try: systemctl restart pulse-sensor-proxy && watch -n 0.5 'ls -la /run/pulse-sensor-proxy/'" exit 1 fi print_info "Socket ready at $SOCKET_PATH" # If socket verification was deferred because the runtime directory was # missing earlier, test the container mount now that the proxy is running. if [[ "$STANDALONE" == false && "$DEFER_SOCKET_VERIFICATION" = true ]]; then print_info "Validating container socket visibility now that host proxy is running..." if pct exec "$CTID" -- test -S "${MOUNT_TARGET}/pulse-sensor-proxy.sock"; then print_info "✓ Secure socket communication ready" DEFER_SOCKET_VERIFICATION=false [ -n "$LXC_CONFIG_BACKUP" ] && rm -f "$LXC_CONFIG_BACKUP" else print_error "Socket not visible at ${MOUNT_TARGET}/pulse-sensor-proxy.sock" print_error "Bind mount exists but container still cannot access the proxy socket" print_error "This usually indicates the container needs a restart or the mount failed to attach" if [ -n "$LXC_CONFIG_BACKUP" ] && [ -f "$LXC_CONFIG_BACKUP" ]; then print_warn "Rolling back container configuration changes..." cp "$LXC_CONFIG_BACKUP" "$LXC_CONFIG" rm -f "$LXC_CONFIG_BACKUP" print_info "Container configuration restored to previous state" fi exit 1 fi fi # Validate HTTP endpoint if HTTP mode is enabled if [[ "$HTTP_MODE" == true ]]; then print_info "Validating HTTP endpoint..." # Wait a moment for HTTP server to fully start sleep 2 # Test HTTP endpoint HTTP_CHECK_URL="https://${PRIMARY_IP}${HTTP_ADDR}/health" if curl -f -s -k -m 5 \ -H "Authorization: Bearer ${HTTP_AUTH_TOKEN}" \ "$HTTP_CHECK_URL" >/dev/null 2>&1; then print_success "HTTP endpoint validated successfully" else print_error "HTTP endpoint validation failed" print_error "URL: $HTTP_CHECK_URL" print_error "" print_error "Troubleshooting:" print_error " 1. Check if port ${HTTP_ADDR#:} is listening: ss -tlnp | grep ${HTTP_ADDR#:}" print_error " 2. Check sensor-proxy logs: journalctl -u pulse-sensor-proxy -n 50" print_error " 3. Test manually: curl -k -H 'Authorization: Bearer \$TOKEN' $HTTP_CHECK_URL" print_error "" print_warn "Service is running but HTTP endpoint may not be accessible" print_warn "Temperature monitoring may not work properly" fi fi # Install sensor wrapper script for combined sensor and SMART data collection print_info "Installing sensor wrapper script..." cat > "$WRAPPER_SCRIPT" << 'WRAPPER_EOF' #!/bin/bash # # pulse-sensor-wrapper.sh # Combined sensor and SMART temperature collection for Pulse monitoring # # This script is deployed as the SSH forced command for the sensor proxy. # It collects CPU/GPU temps via sensors and disk temps via smartctl, # returning a unified JSON payload. set -euo pipefail # Configuration CACHE_DIR="/var/cache/pulse-sensor-proxy" SMART_CACHE_TTL=1800 # 30 minutes MAX_SMARTCTL_TIME=5 # seconds per disk # Ensure cache directory exists mkdir -p "$CACHE_DIR" 2>/dev/null || true # Function to get cached SMART data get_cached_smart() { local cache_file="$CACHE_DIR/smart-temps.json" local now=$(date +%s) # Check if cache exists and is fresh if [[ -f "$cache_file" ]]; then local mtime=$(stat -c %Y "$cache_file" 2>/dev/null || echo 0) local age=$((now - mtime)) if [[ $age -lt $SMART_CACHE_TTL ]]; then cat "$cache_file" return 0 fi fi # Cache miss or stale - return empty array and trigger background refresh echo "[]" # Trigger async refresh if not already running (use lock file) local lock_file="$CACHE_DIR/smart-refresh.lock" if ! [ -f "$lock_file" ]; then (refresh_smart_cache &) fi return 0 } # Function to refresh SMART cache in background refresh_smart_cache() { local lock_file="$CACHE_DIR/smart-refresh.lock" local cache_file="$CACHE_DIR/smart-temps.json" local temp_file="${cache_file}.tmp.$$" # Create lock file and ensure cleanup on exit touch "$lock_file" 2>/dev/null || return 1 trap "rm -f '$lock_file' '$temp_file'" EXIT local disks=() # Find all physical disks (skip partitions, loop devices, etc.) while IFS= read -r dev; do [[ -b "$dev" ]] && disks+=("$dev") done < <(lsblk -nd -o NAME,TYPE | awk '$2=="disk" {print "/dev/"$1}') local results=() for dev in "${disks[@]}"; do # Use smartctl with standby check to avoid waking sleeping drives # -n standby: skip if drive is in standby/sleep mode # -i: include identity data (serial/WWN/model) # --json=o: output original smartctl JSON format # timeout: prevent hanging on problematic drives local output if output=$(timeout ${MAX_SMARTCTL_TIME}s smartctl -n standby -i -A --json=o "$dev" 2>/dev/null); then # Parse the JSON output local temp=$(echo "$output" | jq -r ' .temperature.current // (.ata_smart_attributes.table[] | select(.id == 194) | .raw.value) // (.nvme_smart_health_information_log.temperature // empty) ' 2>/dev/null) local serial=$(echo "$output" | jq -r '.serial_number // empty' 2>/dev/null) local wwn=$(echo "$output" | jq -r '.wwn.naa // .wwn.oui // empty' 2>/dev/null) local model=$(echo "$output" | jq -r '.model_name // .model_family // empty' 2>/dev/null) local transport=$(echo "$output" | jq -r '.device.type // empty' 2>/dev/null) # Only include if we got a valid temperature if [[ -n "$temp" && "$temp" != "null" && "$temp" =~ ^[0-9]+$ ]]; then local entry=$(jq -n \ --arg dev "$dev" \ --arg serial "$serial" \ --arg wwn "$wwn" \ --arg model "$model" \ --arg transport "$transport" \ --argjson temp "$temp" \ --arg updated "$(date -Iseconds)" \ '{ device: $dev, serial: $serial, wwn: $wwn, model: $model, type: $transport, temperature: $temp, lastUpdated: $updated, standbySkipped: false }') results+=("$entry") fi elif echo "$output" | grep -q "standby"; then # Drive is in standby - record it but don't wake it local entry=$(jq -n \ --arg dev "$dev" \ --arg updated "$(date -Iseconds)" \ '{ device: $dev, temperature: null, lastUpdated: $updated, standbySkipped: true }') results+=("$entry") fi # Small delay between disks to avoid saturating SATA controller sleep 0.1 done # Build final JSON array if [[ ${#results[@]} -gt 0 ]]; then local json=$(printf '%s\n' "${results[@]}" | jq -s '.') else local json="[]" fi # Atomic write to cache echo "$json" > "$temp_file" mv "$temp_file" "$cache_file" chmod 644 "$cache_file" 2>/dev/null || true } # Main execution # Collect sensor data (CPU, GPU temps) sensors_data=$(sensors -j 2>/dev/null || echo '{}') # Get SMART data from cache smart_data=$(get_cached_smart) # Combine into unified payload jq -n \ --argjson sensors "$sensors_data" \ --argjson smart "$smart_data" \ '{ sensors: $sensors, smart: $smart }' WRAPPER_EOF chmod +x "$WRAPPER_SCRIPT" print_success "Sensor wrapper installed at $WRAPPER_SCRIPT" # Install cleanup system for full Pulse removal when nodes are deleted print_info "Installing cleanup system..." # Install cleanup script cat > "$CLEANUP_SCRIPT_PATH" <<'CLEANUP_EOF' #!/bin/bash # pulse-sensor-cleanup.sh - Complete Pulse footprint removal when nodes are removed # Removes: SSH keys, proxy service, binaries, API tokens, and LXC bind mounts # This script is triggered by systemd path unit when cleanup-request.json is created set -euo pipefail # Configuration WORK_DIR="/var/lib/pulse-sensor-proxy" CLEANUP_REQUEST="${WORK_DIR}/cleanup-request.json" LOCKFILE="${WORK_DIR}/cleanup.lock" LOG_TAG="pulse-sensor-cleanup" INSTALLER_PATH="/opt/pulse/sensor-proxy/install-sensor-proxy.sh" # Logging functions log_info() { logger -t "$LOG_TAG" -p user.info "$1" echo "[INFO] $1" } log_warn() { logger -t "$LOG_TAG" -p user.warning "$1" echo "[WARN] $1" } log_error() { logger -t "$LOG_TAG" -p user.err "$1" echo "[ERROR] $1" >&2 } # Acquire exclusive lock to prevent concurrent cleanup runs exec 200>"$LOCKFILE" if ! flock -n 200; then log_info "Another cleanup instance is running, exiting" exit 0 fi # Check if cleanup request file exists if [[ ! -f "$CLEANUP_REQUEST" ]]; then log_info "No cleanup request found at $CLEANUP_REQUEST" exit 0 fi log_info "Processing cleanup request from $CLEANUP_REQUEST" # Read and parse the cleanup request CLEANUP_DATA=$(cat "$CLEANUP_REQUEST") HOST=$(echo "$CLEANUP_DATA" | grep -o '"host":"[^"]*"' | cut -d'"' -f4 || echo "") REQUESTED_AT=$(echo "$CLEANUP_DATA" | grep -o '"requestedAt":"[^"]*"' | cut -d'"' -f4 || echo "") log_info "Cleanup requested at: ${REQUESTED_AT:-unknown}" # Rename request file to .processing to prevent re-triggering while allowing retry on failure PROCESSING_FILE="${CLEANUP_REQUEST}.processing" mv "$CLEANUP_REQUEST" "$PROCESSING_FILE" 2>/dev/null || { log_warn "Failed to rename cleanup request file, may have been processed by another instance" exit 0 } # If no specific host was provided, clean up all known nodes if [[ -z "$HOST" ]]; then log_info "No specific host provided - cleaning up all cluster nodes" # Discover cluster nodes if command -v pvecm >/dev/null 2>&1; then CLUSTER_NODES=$(pvecm status 2>/dev/null | grep -vEi "qdevice" | awk '/0x[0-9a-f]+.*[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) print $i}' || true) if [[ -n "$CLUSTER_NODES" ]]; then for node_ip in $CLUSTER_NODES; do log_info "Cleaning up SSH keys on node $node_ip" # Remove both pulse-managed-key and pulse-proxy-key entries ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 root@"$node_ip" \ "sed -i -e '/# pulse-managed-key\$/d' -e '/# pulse-proxy-key\$/d' /root/.ssh/authorized_keys" 2>&1 | \ logger -t "$LOG_TAG" -p user.info || \ log_warn "Failed to clean up SSH keys on $node_ip" done log_info "Cluster cleanup completed" else # Standalone node - clean up localhost log_info "Standalone node detected - cleaning up localhost" sed -i -e '/# pulse-managed-key$/d' -e '/# pulse-proxy-key$/d' /root/.ssh/authorized_keys 2>&1 | \ logger -t "$LOG_TAG" -p user.info || \ log_warn "Failed to clean up SSH keys on localhost" fi else log_warn "pvecm command not available - cleaning up localhost only" sed -i -e '/# pulse-managed-key$/d' -e '/# pulse-proxy-key$/d' /root/.ssh/authorized_keys 2>&1 | \ logger -t "$LOG_TAG" -p user.info || \ log_warn "Failed to clean up SSH keys on localhost" fi else log_info "Cleaning up specific host: $HOST" # Extract hostname/IP from host URL HOST_CLEAN=$(echo "$HOST" | sed -e 's|^https\?://||' -e 's|:.*$||') # Check if this is localhost (by IP, hostname, or FQDN) LOCAL_IPS=$(hostname -I 2>/dev/null || echo "") LOCAL_HOSTNAME=$(hostname 2>/dev/null || echo "") LOCAL_FQDN=$(hostname -f 2>/dev/null || echo "") IS_LOCAL=false # Check against all local IPs for local_ip in $LOCAL_IPS; do if [[ "$HOST_CLEAN" == "$local_ip" ]]; then IS_LOCAL=true break fi done # Check against hostname and FQDN if [[ "$HOST_CLEAN" == "127.0.0.1" || "$HOST_CLEAN" == "localhost" || \ "$HOST_CLEAN" == "$LOCAL_HOSTNAME" || "$HOST_CLEAN" == "$LOCAL_FQDN" ]]; then IS_LOCAL=true fi if [[ "$IS_LOCAL" == true ]]; then log_info "Performing full cleanup on localhost" # 1. Remove SSH keys log_info "Removing SSH keys from authorized_keys" sed -i -e '/# pulse-managed-key$/d' -e '/# pulse-proxy-key$/d' /root/.ssh/authorized_keys 2>&1 | \ logger -t "$LOG_TAG" -p user.info || \ log_warn "Failed to clean up SSH keys" # 2. Delete API tokens and user log_info "Removing Proxmox API tokens and pulse-monitor user" if command -v pveum >/dev/null 2>&1; then # Try JSON output first (pveum with --output-format json) TOKEN_IDS="" if command -v python3 >/dev/null 2>&1; then # Try pveum with JSON output if TOKEN_JSON=$(pveum user token list pulse-monitor@pam --output-format json 2>/dev/null); then TOKEN_IDS=$(echo "$TOKEN_JSON" | python3 -c ' import sys, json try: data = json.load(sys.stdin) if isinstance(data, list): for item in data: if "tokenid" in item: print(item["tokenid"]) except: pass ' || true) fi fi # Fall back to pvesh JSON API if pveum JSON didn't work if [[ -z "$TOKEN_IDS" ]] && command -v pvesh >/dev/null 2>&1; then if TOKEN_JSON=$(pvesh get /access/users/pulse-monitor@pam/token 2>/dev/null); then TOKEN_IDS=$(echo "$TOKEN_JSON" | python3 -c ' import sys, json try: data = json.load(sys.stdin) if isinstance(data, dict) and "data" in data: for item in data["data"]: if "tokenid" in item: print(item["tokenid"]) except: pass ' 2>/dev/null || true) fi fi # Last resort: parse table output with better filtering if [[ -z "$TOKEN_IDS" ]]; then TOKEN_IDS=$(pveum user token list pulse-monitor@pam 2>/dev/null | \ awk 'NR>1 && /^[[:space:]]*pulse/ {print $1}' | grep -v '^[│┌└╞─]' | grep -v '^$' || true) fi if [[ -n "$TOKEN_IDS" ]]; then for token_id in $TOKEN_IDS; do log_info "Deleting API token: $token_id" pveum user token remove pulse-monitor@pam "${token_id}" 2>&1 | \ logger -t "$LOG_TAG" -p user.info || \ log_warn "Failed to delete token $token_id" done else log_info "No API tokens found for pulse-monitor@pam" fi # Remove the pulse-monitor user log_info "Removing pulse-monitor@pam user" pveum user delete pulse-monitor@pam 2>&1 | \ logger -t "$LOG_TAG" -p user.info || \ log_warn "pulse-monitor@pam user not found or already removed" else log_warn "pveum command not available, skipping API token cleanup" fi # 3. Remove LXC bind mounts log_info "Removing LXC bind mounts from container configs" if command -v pct >/dev/null 2>&1; then for ctid in $(pct list 2>/dev/null | awk 'NR>1 {print $1}' || true); do CONF_FILE="/etc/pve/lxc/${ctid}.conf" if [[ -f "$CONF_FILE" ]]; then # Find pulse-sensor-proxy mount points and remove them using pct for mp_key in $(grep -o "^mp[0-9]\+:" "$CONF_FILE" | grep -f <(grep "pulse-sensor-proxy" "$CONF_FILE" | grep -o "^mp[0-9]\+:") || true); do mp_num="${mp_key%:}" log_info "Removing ${mp_num} (pulse-sensor-proxy) from container $ctid" if pct set "$ctid" -delete "${mp_num}" 2>&1 | logger -t "$LOG_TAG" -p user.info; then log_info "Successfully removed ${mp_num} from container $ctid" else log_warn "Failed to remove ${mp_num} from container $ctid" fi done fi done fi # 4. Uninstall proxy service and remove binaries via isolated transient unit log_info "Starting full uninstallation (service, binaries, configs)" if [[ -x "$INSTALLER_PATH" ]]; then # Use systemd-run to create isolated transient unit that won't be killed # when we stop pulse-sensor-proxy.service if command -v systemd-run >/dev/null 2>&1; then # Use UUID for unique unit name (prevents same-second collisions) UNINSTALL_UUID=$(cat /proc/sys/kernel/random/uuid 2>/dev/null || date +%s%N) UNINSTALL_UNIT="pulse-uninstall-${UNINSTALL_UUID}" log_info "Spawning isolated uninstaller unit: $UNINSTALL_UNIT" systemd-run \ --unit="${UNINSTALL_UNIT}" \ --property="Type=oneshot" \ --property="Conflicts=pulse-sensor-proxy.service" \ --collect \ --wait \ --quiet \ -- bash -c "$INSTALLER_PATH --uninstall --purge --quiet >> /var/log/pulse/sensor-proxy/uninstall.log 2>&1" \ 2>&1 | logger -t "$LOG_TAG" -p user.info UNINSTALL_EXIT=$? if [[ $UNINSTALL_EXIT -eq 0 ]]; then log_info "Uninstaller completed successfully" else log_error "Uninstaller failed with exit code $UNINSTALL_EXIT" exit 1 fi else log_warn "systemd-run not available, attempting direct uninstall (may fail)" bash "$INSTALLER_PATH" --uninstall --quiet >> /var/log/pulse/sensor-proxy/uninstall.log 2>&1 || \ log_error "Uninstaller failed - manual cleanup may be required" fi else log_warn "Installer not found at $INSTALLER_PATH, cannot run uninstaller" log_info "Manual cleanup required: systemctl stop pulse-sensor-proxy && systemctl disable pulse-sensor-proxy" fi log_info "Localhost cleanup initiated (uninstaller running in background)" else log_info "Cleaning up remote host: $HOST_CLEAN" # Try to use proxy's SSH key first (for standalone nodes), fall back to default PROXY_KEY="/var/lib/pulse-sensor-proxy/ssh/id_ed25519" SSH_CMD="ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5" if [[ -f "$PROXY_KEY" ]]; then log_info "Using proxy SSH key for cleanup" SSH_CMD="$SSH_CMD -i $PROXY_KEY" fi # Remove both pulse-managed-key and pulse-proxy-key entries from remote host CLEANUP_OUTPUT=$($SSH_CMD root@"$HOST_CLEAN" \ "sed -i -e '/# pulse-managed-key\$/d' -e '/# pulse-proxy-key\$/d' /root/.ssh/authorized_keys && echo 'SUCCESS'" 2>&1) if echo "$CLEANUP_OUTPUT" | grep -q "SUCCESS"; then log_info "Successfully cleaned up SSH keys on $HOST_CLEAN" else # Check if this is a standalone node with forced commands (common case) if echo "$CLEANUP_OUTPUT" | grep -q "cpu_thermal\|coretemp\|k10temp"; then log_warn "Cannot cleanup standalone node $HOST_CLEAN (forced command prevents cleanup)" log_info "Standalone node keys are read-only (sensors -j) - low security risk" log_info "Manual cleanup: ssh root@$HOST_CLEAN \"sed -i '/# pulse-proxy-key\$/d' /root/.ssh/authorized_keys\"" else log_error "Failed to clean up SSH keys on $HOST_CLEAN: $CLEANUP_OUTPUT" exit 1 fi fi fi fi # Remove processing file on success rm -f "$PROCESSING_FILE" log_info "Cleanup completed successfully" exit 0 CLEANUP_EOF chmod +x "$CLEANUP_SCRIPT_PATH" print_info "Cleanup script installed" # Install systemd path unit CLEANUP_PATH_UNIT="/etc/systemd/system/pulse-sensor-cleanup.path" cat > "$CLEANUP_PATH_UNIT" << 'PATH_EOF' [Unit] Description=Watch for Pulse sensor cleanup requests Documentation=https://github.com/rcourtman/Pulse [Path] # Watch for the cleanup request file PathChanged=/var/lib/pulse-sensor-proxy/cleanup-request.json # Also watch for modifications PathModified=/var/lib/pulse-sensor-proxy/cleanup-request.json [Install] WantedBy=multi-user.target PATH_EOF # Install systemd service unit CLEANUP_SERVICE_UNIT="/etc/systemd/system/pulse-sensor-cleanup.service" cat > "$CLEANUP_SERVICE_UNIT" </dev/null 2>&1; then # Extract node IPs from pvecm status CLUSTER_NODES=$(pvecm status 2>/dev/null | grep -vEi "qdevice" | awk '/0x[0-9a-f]+.*[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) print $i}' || true) if [[ -n "$CLUSTER_NODES" ]]; then print_info "Discovered cluster nodes: $(echo $CLUSTER_NODES | tr '\n' ' ')" # Configure SSH key with forced command restriction FORCED_CMD='command="/opt/pulse/sensor-proxy/bin/pulse-sensor-wrapper.sh",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty' AUTH_LINE="${FORCED_CMD} ${PROXY_PUBLIC_KEY} # pulse-managed-key" # Track SSH key push results SSH_SUCCESS_COUNT=0 SSH_FAILURE_COUNT=0 declare -a SSH_FAILED_NODES=() LOCAL_IPS=$(hostname -I 2>/dev/null || echo "") LOCAL_HOSTNAMES="$(hostname 2>/dev/null || echo "") $(hostname -f 2>/dev/null || echo "")" LOCAL_HANDLED=false # Push key to each cluster node for node_ip in $CLUSTER_NODES; do print_info "Authorizing proxy key on node $node_ip..." IS_LOCAL=false # Check if node_ip matches any of the local IPs (exact match with word boundaries) for local_ip in $LOCAL_IPS; do if [[ "$node_ip" == "$local_ip" ]]; then IS_LOCAL=true break fi done if [[ " $LOCAL_HOSTNAMES " == *" $node_ip "* ]]; then IS_LOCAL=true fi if [[ "$node_ip" == "127.0.0.1" || "$node_ip" == "localhost" ]]; then IS_LOCAL=true fi if [[ "$IS_LOCAL" = true ]]; then configure_local_authorized_key "$AUTH_LINE" LOCAL_HANDLED=true ((SSH_SUCCESS_COUNT+=1)) continue fi # Remove any existing proxy keys first ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 root@"$node_ip" \ "sed -i '/# pulse-managed-key\$/d' /root/.ssh/authorized_keys" 2>/dev/null || true # Ensure wrapper compatibility on remote node (supports old installations) # Create symlink if old wrapper exists but new path doesn't ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 root@"$node_ip" \ "if [[ -f /usr/local/bin/pulse-sensor-wrapper.sh && ! -f /opt/pulse/sensor-proxy/bin/pulse-sensor-wrapper.sh ]]; then \ mkdir -p /opt/pulse/sensor-proxy/bin && \ ln -sf /usr/local/bin/pulse-sensor-wrapper.sh /opt/pulse/sensor-proxy/bin/pulse-sensor-wrapper.sh; \ fi" 2>/dev/null || true # Add new key with forced command SSH_ERROR=$(ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 root@"$node_ip" \ "echo '${AUTH_LINE}' >> /root/.ssh/authorized_keys" 2>&1) if [[ $? -eq 0 ]]; then print_success "SSH key configured on $node_ip" ((SSH_SUCCESS_COUNT+=1)) else print_warn "Failed to configure SSH key on $node_ip" ((SSH_FAILURE_COUNT+=1)) SSH_FAILED_NODES+=("$node_ip") # Log detailed error for debugging if [[ -n "$SSH_ERROR" ]]; then print_info " Error details: $(echo "$SSH_ERROR" | head -1)" fi fi done # Print summary print_info "" print_info "SSH key configuration summary:" print_info " ✓ Success: $SSH_SUCCESS_COUNT node(s)" if [[ $SSH_FAILURE_COUNT -gt 0 ]]; then print_warn " ✗ Failed: $SSH_FAILURE_COUNT node(s) - ${SSH_FAILED_NODES[*]}" print_info "" print_info "To retry failed nodes, re-run this script or manually run:" print_info " ssh root@ 'echo \"${AUTH_LINE}\" >> /root/.ssh/authorized_keys'" fi if [[ "$LOCAL_HANDLED" = false ]]; then configure_local_authorized_key "$AUTH_LINE" ((SSH_SUCCESS_COUNT+=1)) fi # Add discovered cluster nodes to config file for allowlist validation print_info "Updating proxy configuration with discovered cluster nodes..." # Collect only IPs (hostnames are not used for SSH temperature collection) all_nodes=() for node_ip in $CLUSTER_NODES; do all_nodes+=("$node_ip") done if [[ ${#CONTROL_PLANE_ALLOWED_NODE_LIST[@]} -gt 0 ]]; then all_nodes+=("${CONTROL_PLANE_ALLOWED_NODE_LIST[@]}") fi # Use helper function to safely update allowed_nodes (prevents duplicates on re-run) if ! update_allowed_nodes "Cluster nodes (auto-discovered during installation)" "${all_nodes[@]}"; then print_error "Failed to update allowed_nodes list" exit 1 fi else # No cluster found - configure standalone node print_info "No cluster detected, configuring standalone node..." # Configure SSH key with forced command restriction FORCED_CMD='command="/opt/pulse/sensor-proxy/bin/pulse-sensor-wrapper.sh",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty' AUTH_LINE="${FORCED_CMD} ${PROXY_PUBLIC_KEY} # pulse-managed-key" print_info "Authorizing proxy key on localhost..." configure_local_authorized_key "$AUTH_LINE" print_info "" print_info "Standalone node configuration complete" # Add localhost to config file for allowlist validation print_info "Updating proxy configuration for standalone mode..." LOCAL_IPS=$(hostname -I 2>/dev/null | tr ' ' '\n' | grep -v '^$' || echo "127.0.0.1") # Collect all local IPs and localhost variants into array all_nodes=() for local_ip in $LOCAL_IPS; do all_nodes+=("$local_ip") done # Always include localhost variants all_nodes+=("127.0.0.1" "localhost") if [[ ${#CONTROL_PLANE_ALLOWED_NODE_LIST[@]} -gt 0 ]]; then all_nodes+=("${CONTROL_PLANE_ALLOWED_NODE_LIST[@]}") fi # Use helper function to safely update allowed_nodes (prevents duplicates on re-run) if ! update_allowed_nodes "Standalone node configuration (auto-configured during installation)" "${all_nodes[@]}"; then print_error "Failed to update allowed_nodes list" exit 1 fi fi else # Proxmox host but pvecm not available (shouldn't happen, but handle it) print_warn "pvecm command not available" print_info "Configuring SSH key for localhost..." # Configure localhost as fallback FORCED_CMD='command="/opt/pulse/sensor-proxy/bin/pulse-sensor-wrapper.sh",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty' AUTH_LINE="${FORCED_CMD} ${PROXY_PUBLIC_KEY} # pulse-managed-key" configure_local_authorized_key "$AUTH_LINE" # Add localhost to config file for allowlist validation print_info "Updating proxy configuration for localhost fallback..." LOCAL_IPS=$(hostname -I 2>/dev/null | tr ' ' '\n' | grep -v '^$' || echo "127.0.0.1") # Collect all local IPs and localhost variants into array all_nodes=() for local_ip in $LOCAL_IPS; do all_nodes+=("$local_ip") done # Always include localhost variants all_nodes+=("127.0.0.1" "localhost") if [[ ${#CONTROL_PLANE_ALLOWED_NODE_LIST[@]} -gt 0 ]]; then all_nodes+=("${CONTROL_PLANE_ALLOWED_NODE_LIST[@]}") fi # Use helper function to safely update allowed_nodes (prevents duplicates on re-run) if ! update_allowed_nodes "Localhost fallback configuration (pvecm unavailable)" "${all_nodes[@]}"; then print_error "Failed to update allowed_nodes list" exit 1 fi fi cleanup_inline_allowed_nodes # Container-specific configuration (skip for standalone mode or if container not on this node) if [[ "$STANDALONE" == false && "$CONTAINER_ON_THIS_NODE" == true ]]; then echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo " Secure Container Communication Setup" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "" echo "Setting up secure socket mount for temperature monitoring:" echo " • Container communicates with host proxy via Unix socket" echo " • No SSH keys exposed inside container (enhanced security)" echo " • Proxy on host manages all temperature collection" echo "" print_info "Configuring socket bind mount..." MOUNT_TARGET="/mnt/pulse-proxy" HOST_SOCKET_SOURCE="/run/pulse-sensor-proxy" LXC_CONFIG="/etc/pve/lxc/${CTID}.conf" LOCAL_MOUNT_ENTRY="lxc.mount.entry: ${HOST_SOCKET_SOURCE} mnt/pulse-proxy none bind,create=dir 0 0" mkdir -p "$HOST_SOCKET_SOURCE" # Back up container config before modifying LXC_CONFIG_BACKUP=$(mktemp) cp "$LXC_CONFIG" "$LXC_CONFIG_BACKUP" 2>/dev/null || { print_warn "Could not back up container config (may not exist yet)" LXC_CONFIG_BACKUP="" } MOUNT_UPDATED=false CT_RUNNING=false SKIP_CONTAINER_POST_STEPS=false if pct status "$CTID" 2>/dev/null | grep -q "running"; then CT_RUNNING=true fi # /etc/pve is a FUSE filesystem (pmxcfs) - direct sed/echo don't work reliably # Must use temp file and copy back to trigger cluster sync # Also, config file contains snapshots sections - only modify main section (before first [) TEMP_CONFIG=$(mktemp) cp "$LXC_CONFIG" "$TEMP_CONFIG" # Extract line number where snapshots start (first line starting with [) SNAPSHOT_START=$(grep -n '^\[' "$TEMP_CONFIG" | head -1 | cut -d: -f1) if grep -Eq '^mp[0-9]+:.*pulse-sensor-proxy|^mp[0-9]+:.*mnt/pulse-proxy' "$TEMP_CONFIG" 2>/dev/null; then print_info "Removing mp mounts for pulse-sensor-proxy to keep snapshots and migrations working" if [ -n "$SNAPSHOT_START" ]; then # Only modify main section (before snapshots) sed -i "1,$((SNAPSHOT_START-1)) { /^mp[0-9]\+:.*pulse-sensor-proxy/d; /^mp[0-9]\+:.*mnt\/pulse-proxy/d }" "$TEMP_CONFIG" else sed -i '/^mp[0-9]\+:.*pulse-sensor-proxy/d; /^mp[0-9]\+:.*mnt\/pulse-proxy/d' "$TEMP_CONFIG" fi MOUNT_UPDATED=true fi if grep -q "^lxc.mount.entry: .*/pulse-sensor-proxy" "$TEMP_CONFIG" 2>/dev/null; then if ! grep -qxF "$LOCAL_MOUNT_ENTRY" "$TEMP_CONFIG"; then print_info "Updating existing lxc.mount.entry for pulse-sensor-proxy" if [ -n "$SNAPSHOT_START" ]; then sed -i "1,$((SNAPSHOT_START-1)) { s#^lxc.mount.entry: .*pulse-sensor-proxy.*#${LOCAL_MOUNT_ENTRY}# }" "$TEMP_CONFIG" else sed -i "s#^lxc.mount.entry: .*pulse-sensor-proxy.*#${LOCAL_MOUNT_ENTRY}#" "$TEMP_CONFIG" fi MOUNT_UPDATED=true else print_info "Container already has migration-safe lxc.mount.entry for proxy" fi else print_info "Adding lxc.mount.entry for pulse-sensor-proxy" # Insert before snapshot section if it exists, otherwise append if [ -n "$SNAPSHOT_START" ]; then sed -i "${SNAPSHOT_START}i ${LOCAL_MOUNT_ENTRY}" "$TEMP_CONFIG" else echo "$LOCAL_MOUNT_ENTRY" >> "$TEMP_CONFIG" fi MOUNT_UPDATED=true fi # Copy back to trigger pmxcfs sync if [[ "$MOUNT_UPDATED" = true ]]; then cp "$TEMP_CONFIG" "$LXC_CONFIG" fi rm -f "$TEMP_CONFIG" if ! pct config "$CTID" | grep -qxF "$LOCAL_MOUNT_ENTRY"; then print_error "Failed to persist migration-safe socket mount in container config" if [ -n "$LXC_CONFIG_BACKUP" ] && [ -f "$LXC_CONFIG_BACKUP" ]; then print_warn "Rolling back container configuration changes..." cp "$LXC_CONFIG_BACKUP" "$LXC_CONFIG" rm -f "$LXC_CONFIG_BACKUP" fi exit 1 fi print_info "✓ Mount configuration recorded in container config" if [[ "$MOUNT_UPDATED" = true ]]; then if [[ "$SKIP_RESTART" = true ]]; then if [[ "$CT_RUNNING" = true ]]; then print_info "Skipping container restart (--skip-restart provided). Changes apply on next restart." else print_info "Skipping automatic container start (--skip-restart provided)." fi else print_info "Restarting container to activate secure communication..." if [[ "$CT_RUNNING" = true ]]; then pct stop "$CTID" && sleep 2 && pct start "$CTID" else pct start "$CTID" fi sleep 5 CT_RUNNING=true fi fi # Verify socket directory and file inside container if [[ "$SKIP_RESTART" = true && "$CT_RUNNING" = true && "$MOUNT_UPDATED" = true ]]; then print_warn "Skipping socket verification until container $CTID is restarted." print_warn "Please restart container and verify socket manually:" print_warn " pct stop $CTID && sleep 2 && pct start $CTID" print_warn " pct exec $CTID -- test -S ${MOUNT_TARGET}/pulse-sensor-proxy.sock && echo 'Socket OK'" elif [[ "$SKIP_RESTART" = true && "$CT_RUNNING" = false ]]; then print_warn "Socket verification deferred. Start container $CTID and run:" print_warn " pct exec $CTID -- test -S ${MOUNT_TARGET}/pulse-sensor-proxy.sock && echo 'Socket OK'" SKIP_CONTAINER_POST_STEPS=true elif [[ "$CT_RUNNING" = false ]]; then print_warn "Container $CTID is stopped. Start it to verify the pulse-sensor-proxy socket:" print_warn " pct start $CTID && pct exec $CTID -- test -S ${MOUNT_TARGET}/pulse-sensor-proxy.sock && echo 'Socket OK'" SKIP_CONTAINER_POST_STEPS=true else if [[ ! -S "$SOCKET_PATH" ]]; then print_warn "Host proxy socket not available yet; deferring container verification until service starts." DEFER_SOCKET_VERIFICATION=true else print_info "Verifying secure communication channel..." if pct exec "$CTID" -- test -S "${MOUNT_TARGET}/pulse-sensor-proxy.sock"; then print_info "✓ Secure socket communication ready" # Clean up backup since verification succeeded [ -n "$LXC_CONFIG_BACKUP" ] && rm -f "$LXC_CONFIG_BACKUP" else print_error "Socket not visible at ${MOUNT_TARGET}/pulse-sensor-proxy.sock" print_error "Mount configuration verified but socket not accessible in container" print_error "This indicates a mount or restart issue" # Rollback container config changes if [ -n "$LXC_CONFIG_BACKUP" ] && [ -f "$LXC_CONFIG_BACKUP" ]; then print_warn "Rolling back container configuration changes..." cp "$LXC_CONFIG_BACKUP" "$LXC_CONFIG" rm -f "$LXC_CONFIG_BACKUP" print_info "Container configuration restored to previous state" fi exit 1 fi fi fi if [[ "$SKIP_CONTAINER_POST_STEPS" != true ]]; then # Configure Pulse backend environment override inside container print_info "Configuring Pulse to use proxy..." # Always make sure the Pulse .env file contains the proxy socket override. configure_container_proxy_env if ! pct exec "$CTID" -- systemctl status pulse >/dev/null 2>&1; then print_warn "Pulse service not found in container $CTID; proxy socket configured but service restart deferred." print_info "Install or restart Pulse inside the container to enable temperature monitoring." else pct exec "$CTID" -- bash -lc "mkdir -p /etc/systemd/system/pulse.service.d" pct exec "$CTID" -- bash -lc "cat <<'EOF' >/etc/systemd/system/pulse.service.d/10-pulse-proxy.conf [Service] Environment=PULSE_SENSOR_PROXY_SOCKET=${MOUNT_TARGET}/pulse-sensor-proxy.sock EOF" pct exec "$CTID" -- systemctl daemon-reload || true # Restart Pulse service to apply the new environment variable if pct exec "$CTID" -- systemctl is-active --quiet pulse 2>/dev/null; then print_info "Restarting Pulse service to apply configuration..." pct exec "$CTID" -- systemctl restart pulse sleep 2 print_success "Pulse service restarted with proxy configuration" fi fi # Check for and remove legacy SSH keys from container print_info "Checking for legacy SSH keys in container..." LEGACY_KEYS_FOUND=false for key_type in id_rsa id_dsa id_ecdsa id_ed25519; do if pct exec "$CTID" -- test -f "/root/.ssh/$key_type" 2>/dev/null; then LEGACY_KEYS_FOUND=true if [ "$QUIET" != true ]; then print_warn "Found legacy SSH key: /root/.ssh/$key_type" fi pct exec "$CTID" -- rm -f "/root/.ssh/$key_type" "/root/.ssh/${key_type}.pub" print_info " Removed /root/.ssh/$key_type" fi done if [ "$LEGACY_KEYS_FOUND" = true ] && [ "$QUIET" != true ]; then print_info "" print_info "Legacy SSH keys removed from container for security" print_info "" fi else if [ -n "$LXC_CONFIG_BACKUP" ] && [ -f "$LXC_CONFIG_BACKUP" ]; then rm -f "$LXC_CONFIG_BACKUP" fi print_warn "Skipping container-side configuration until container $CTID is running." fi # Test proxy status print_info "Testing proxy status..." if systemctl is-active --quiet pulse-sensor-proxy; then print_info "${GREEN}✓${NC} pulse-sensor-proxy is running" else print_error "pulse-sensor-proxy is not running" print_info "Check logs: journalctl -u pulse-sensor-proxy -n 50" exit 1 fi fi # End of container-specific configuration if [[ "$SKIP_SELF_HEAL_SETUP" == "true" ]]; then print_info "Skipping self-heal safeguards during self-heal run" else # Install self-heal safeguards to keep proxy available print_info "Configuring self-heal safeguards..." if ! cache_installer_for_self_heal; then if [[ -n "$INSTALLER_CACHE_REASON" ]]; then print_warn "Unable to cache installer script for self-heal (${INSTALLER_CACHE_REASON})" else print_warn "Unable to cache installer script for self-heal" fi fi cat > "$SELFHEAL_SCRIPT" <<'EOF' #!/bin/bash set -euo pipefail SERVICE="pulse-sensor-proxy" BINARY_PATH="/opt/pulse/sensor-proxy/bin/pulse-sensor-proxy" INSTALLER="/opt/pulse/sensor-proxy/install-sensor-proxy.sh" CTID_FILE="/etc/pulse-sensor-proxy/ctid" PENDING_FILE="/etc/pulse-sensor-proxy/pending-control-plane.env" TOKEN_FILE="/etc/pulse-sensor-proxy/.pulse-control-token" CONFIG_FILE="/etc/pulse-sensor-proxy/config.yaml" LOG_TAG="pulse-sensor-proxy-selfheal" log() { logger -t "$LOG_TAG" "$1" } sanitize_allowed_nodes() { # Phase 2: Use config CLI instead of Python manipulation if [[ ! -f "$CONFIG_FILE" ]]; then return fi if [[ ! -x "$BINARY_PATH" ]]; then log "Binary not available; skipping sanitization" return fi # Use CLI to atomically migrate any inline blocks to file mode if "$BINARY_PATH" config migrate-to-file --config "$CONFIG_FILE" 2>&1 | grep -q "Migration complete"; then log "Migrated inline allowed_nodes to file mode" fi } attempt_control_plane_reconcile() { if [[ ! -f "$PENDING_FILE" ]]; then return fi if [[ -f "$TOKEN_FILE" ]]; then return fi if [[ ! -x "$INSTALLER" ]]; then return fi # shellcheck disable=SC1090 source "$PENDING_FILE" || return if [[ -z "${PENDING_PULSE_SERVER:-}" ]]; then return fi cmd=("$INSTALLER" "--skip-restart" "--quiet" "--pulse-server" "${PENDING_PULSE_SERVER}") if [[ "${PENDING_STANDALONE:-false}" == "true" ]]; then cmd+=("--standalone") if [[ "${PENDING_HTTP_MODE:-false}" == "true" ]]; then cmd+=("--http-mode") if [[ -n "${PENDING_HTTP_ADDR:-}" ]]; then cmd+=("--http-addr" "${PENDING_HTTP_ADDR}") fi fi else if [[ -f "$CTID_FILE" ]]; then cmd+=("--ctid" "$(cat "$CTID_FILE")") else log "CTID file missing; cannot reconcile control plane" return fi fi if PULSE_SENSOR_PROXY_SELFHEAL=1 bash "$INSTALLER" "${cmd[@]}"; then rm -f "$PENDING_FILE" sanitize_allowed_nodes else log "Control-plane reconciliation failed" fi } sanitize_allowed_nodes if ! command -v systemctl >/dev/null 2>&1; then exit 0 fi if ! systemctl list-unit-files 2>/dev/null | grep -q "^${SERVICE}\\.service"; then if [[ -x "$INSTALLER" && -f "$CTID_FILE" ]]; then log "Service unit missing; attempting reinstall" if PULSE_SENSOR_PROXY_SELFHEAL=1 bash "$INSTALLER" --ctid "$(cat "$CTID_FILE")" --skip-restart --quiet; then sanitize_allowed_nodes else log "Reinstall attempt failed" fi fi exit 0 fi if ! systemctl is-active --quiet "${SERVICE}.service"; then systemctl start "${SERVICE}.service" || true sleep 2 fi if ! systemctl is-active --quiet "${SERVICE}.service"; then if [[ -x "$INSTALLER" && -f "$CTID_FILE" ]]; then log "Service failed to start; attempting reinstall" if PULSE_SENSOR_PROXY_SELFHEAL=1 bash "$INSTALLER" --ctid "$(cat "$CTID_FILE")" --skip-restart --quiet; then sanitize_allowed_nodes else log "Reinstall attempt failed" fi systemctl start "${SERVICE}.service" || true fi fi attempt_control_plane_reconcile EOF chmod 0755 "$SELFHEAL_SCRIPT" cat > "$SELFHEAL_SERVICE_UNIT" < "$SELFHEAL_TIMER_UNIT" <<'EOF' [Unit] Description=Ensure pulse-sensor-proxy stays installed and running [Timer] OnBootSec=2min OnUnitActiveSec=5min Unit=pulse-sensor-proxy-selfheal.service [Install] WantedBy=timers.target EOF systemctl daemon-reload systemctl enable --now pulse-sensor-proxy-selfheal.timer >/dev/null 2>&1 || true if [[ -f "$PENDING_CONTROL_PLANE_FILE" ]]; then if [[ "$QUIET" != true ]]; then print_info "Pending control-plane sync detected; triggering immediate retry..." fi systemctl start pulse-sensor-proxy-selfheal.service >/dev/null 2>&1 || true fi fi if [ "$QUIET" = true ]; then print_success "pulse-sensor-proxy installed and running" else print_info "${GREEN}Installation complete!${NC}" print_info "" print_info "Temperature monitoring will use the secure host-side proxy" print_info "" # Only show Docker configuration instructions if Pulse is actually running in Docker on this host IS_PULSE_DOCKER=false if command -v docker >/dev/null 2>&1 && docker ps --format '{{.Names}}' 2>/dev/null | grep -q '^pulse$'; then IS_PULSE_DOCKER=true fi if [[ "$STANDALONE" == true ]] && [[ "$IS_PULSE_DOCKER" == true ]]; then echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo " Docker Container Configuration Required" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "" print_info "${YELLOW}IMPORTANT:${NC} If Pulse is running in Docker, add this bind mount to your docker-compose.yml:" echo "" echo " volumes:" echo " - pulse-data:/data" echo " - /run/pulse-sensor-proxy:/run/pulse-sensor-proxy:ro" echo "" print_info "Then restart your Pulse container:" echo " docker-compose down && docker-compose up -d" echo "" print_info "Or if using Docker directly:" echo " docker restart pulse" echo "" fi # Check if Pulse needs to be restarted to pick up the proxy registration PULSE_RESTART_CMD="" if systemctl is-active --quiet pulse 2>/dev/null; then PULSE_RESTART_CMD="systemctl restart pulse" elif systemctl is-active --quiet pulse-hot-dev 2>/dev/null; then PULSE_RESTART_CMD="systemctl restart pulse-hot-dev" elif command -v docker >/dev/null 2>&1 && docker ps --format '{{.Names}}' 2>/dev/null | grep -q '^pulse$'; then PULSE_RESTART_CMD="docker restart pulse" fi if [[ -n "$PULSE_RESTART_CMD" ]]; then if [[ "$RESTART_PULSE" == true ]]; then echo "" print_info "Restarting Pulse to enable temperature monitoring..." if eval "$PULSE_RESTART_CMD"; then sleep 3 print_success "Pulse restarted successfully - temperature monitoring is now active" else print_warn "Failed to restart Pulse automatically. Please restart manually:" echo " $PULSE_RESTART_CMD" fi echo "" else echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo " Pulse Restart Required" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "" print_info "${YELLOW}IMPORTANT:${NC} Restart Pulse to enable temperature monitoring:" echo " sudo $PULSE_RESTART_CMD" echo "" print_info "Or add --restart-pulse flag to restart automatically:" echo " curl ... | bash -s -- ... --restart-pulse" echo "" fi fi print_info "To check proxy status:" print_info " systemctl status pulse-sensor-proxy" if [[ "$STANDALONE" == true ]]; then echo "" print_info "After restarting Pulse, verify the socket is accessible:" print_info " docker exec pulse ls -l /run/pulse-sensor-proxy/pulse-sensor-proxy.sock" echo "" print_info "Check Pulse logs for temperature proxy detection:" print_info " docker logs pulse | grep -i 'temperature.*proxy'" echo "" print_info "For detailed documentation, see:" print_info " https://github.com/rcourtman/Pulse/blob/main/docs/TEMPERATURE_MONITORING.md" fi fi exit 0