Dear @schlomm , team,
as I learned today sometimes system limitations are hard and leads to unwanted results.
I even got more serioous trouble with my pi and his WIFI so I had to dig in deeper.
The aproach until now - because of “growing” up - is a “recovery” and a “diagnose” part.
This leads to - surprise, surprise :-) - inconsistent data and so dignosis is merely impossible.
For this reason I converged both approches into one script and implemented a 4-stages error-handling and consecutive escalation (until reboot).
At this stage i had to recognize: almost EVERY test results immediatly in an error at stage 1 (ping) and was resolved at stage 2 (L2/L3 - ICMP problem – checking status of wlan interface).
To identify root cause for this I - again - dig down deeply (ChatGPT was NOT that helpful!) and found: At systemd level (on my system!?) ping is not in PATH !!!
So a fully qualified call solved this problem - and most of my “problems” are solved !
If you are using “ping”, too and stuck in problems in scripts - keep this in mind: “usr/bin/ping” might be really helpful for you.
If you are interested in, here my current recovery-script - including some useful logging information:
/usr/local/bin/wlan-recovery.sh
#!/bin/bash
# ============================================================================
# WLAN Recovery Script (Monolithische Version)
# Autor: Dr. Ralf Korell, MD
# Datum: 2025-10-07
#
# Dieses Script wird per systemd-Timer regelmäßig aufgerufen.
# Es prüft die WLAN-Verbindung in mehreren Stufen und führt nur dann
# Recovery-Aktionen aus, wenn wirklich eine Unterbrechung vorliegt.
#
# Features:
# - Mehrstufige Diagnose (Ping, iw, IP, Route)
# - Schutz vor Fehlalarmen und Selbstabschüssen
# - SSH/VNC-Safe-Mode (keine Unterbrechung aktiver Sessions)
# - Logrotation + Statistikdatei
# ============================================================================
# === Konfiguration ==========================================================
LOGFILE="/var/log/wlan-recovery.log"
STATSFILE="/var/log/wlan-recovery.stats"
MAX_LOG_SIZE=50000 # ~50 KB, dann Logrotation
PING_TARGET="172.23.56.1"
MAX_CONSECUTIVE_FAILS=2 # bevor Recovery startet
COOLDOWN_FILE="/tmp/wlan-recovery.cooldown"
COOLDOWN_MINUTES=5
# interne Speicherorte (nicht verändern)
STATEFILE="/tmp/wlan-recovery.state"
DATE_NOW=$(date "+%Y-%m-%d %H:%M:%S")
# === Hilfsfunktionen ========================================================
log() {
echo "$DATE_NOW: $1" | tee -a "$LOGFILE"
}
rotate_log() {
if [ -f "$LOGFILE" ] && [ $(wc -c <"$LOGFILE") -gt $MAX_LOG_SIZE ]; then
mv "$LOGFILE" "$LOGFILE.old"
echo "$DATE_NOW: Log rotated." > "$LOGFILE"
fi
}
increment_stat() {
local key="$1"
local value
value=$(grep "^$key=" "$STATSFILE" 2>/dev/null | cut -d= -f2)
value=$((value + 1))
grep -v "^$key=" "$STATSFILE" 2>/dev/null > "${STATSFILE}.tmp"
echo "$key=$value" >> "${STATSFILE}.tmp"
mv "${STATSFILE}.tmp" "$STATSFILE"
}
cooldown_active() {
if [ -f "$COOLDOWN_FILE" ]; then
local last=$(date -r "$COOLDOWN_FILE" +%s)
local now=$(date +%s)
local diff=$(( (now - last) / 60 ))
[ $diff -lt $COOLDOWN_MINUTES ]
else
return 1
fi
}
start_cooldown() {
touch "$COOLDOWN_FILE"
}
ssh_or_vnc_active() {
ss -tn state established | grep -Eq '(:22|:5900)'
}
# === Diagnosefunktionen =====================================================
is_connected_l2() {
iw dev wlan0 link 2>/dev/null | grep -q "Connected to"
}
has_ip_l3() {
ip -4 addr show wlan0 2>/dev/null | grep -q "inet "
}
has_route() {
ip route get "$PING_TARGET" 2>/dev/null | grep -q "dev wlan0"
}
ping_ok() {
/usr/bin/ping -I wlan0 -c 3 -W 2 "$PING_TARGET" >/dev/null 2>&1
}
# === Hauptlogik =============================================================
rotate_log
# Init Statsfile falls nicht vorhanden
[ -f "$STATSFILE" ] || echo -e "success=0\nrecoveries=0\nfailures=0" > "$STATSFILE"
# Lese bisherigen Fehlerzähler
fails=0
[ -f "$STATEFILE" ] && fails=$(cat "$STATEFILE")
# Diagnose
if ping_ok; then
log "Ping erfolgreich. WLAN funktioniert."
echo 0 > "$STATEFILE"
increment_stat "success"
exit 0
fi
# Wenn Ping fehlschlägt → weitere Prüfungen
log "Ping fehlgeschlagen → erweiterte Diagnose..."
if is_connected_l2 && has_ip_l3 && has_route; then
log "L2/L3 ok → ICMP-Problem (kein Recovery)."
increment_stat "failures"
echo 0 > "$STATEFILE"
exit 0
fi
# Hier gilt: echte Verbindung gestört
fails=$((fails + 1))
echo "$fails" > "$STATEFILE"
if [ $fails -lt $MAX_CONSECUTIVE_FAILS ]; then
log "Erster Fehler ($fails/$MAX_CONSECUTIVE_FAILS) → Beobachten..."
increment_stat "failures"
exit 0
fi
# Wenn Cooldown läuft → überspringen
if cooldown_active; then
log "Cooldown aktiv → Recovery übersprungen."
exit 0
fi
# === Recovery-Stufen ========================================================
if ssh_or_vnc_active; then
log "SSH/VNC aktiv → keine Recovery ausgeführt, nur geloggt."
increment_stat "failures"
exit 0
fi
log "Verbindung tatsächlich gestört → Recovery-Prozess gestartet."
increment_stat "recoveries"
# Stufe 1: sanfte Reassoziation
log "→ Stufe 1: wpa_supplicant Reassoziation..."
wpa_cli -i wlan0 reassociate >/dev/null 2>&1
sleep 5
if ping_ok; then
log "Reassoziation erfolgreich."
echo 0 > "$STATEFILE"
start_cooldown
exit 0
fi
# Stufe 2: Interface Toggle
log "→ Stufe 2: Interface Toggle..."
ip link set wlan0 down
sleep 2
ip link set wlan0 up
sleep 8
if ping_ok; then
log "Interface Toggle erfolgreich."
echo 0 > "$STATEFILE"
start_cooldown
exit 0
fi
# Stufe 3: Treiber-Reload
log "→ Stufe 3: Treiber-Reload..."
modprobe -r brcmfmac && modprobe brcmfmac
sleep 10
if ping_ok; then
log "Treiber-Reload erfolgreich."
echo 0 > "$STATEFILE"
start_cooldown
exit 0
fi
# Wenn alles fehlschlägt
log "Alle Recovery-Stufen fehlgeschlagen → Fehler bleibt bestehen."
increment_stat "failures"
start_cooldown
exit 1
[EDIT: in script above: changed ping count from -c 1 to -c 3 in:
/usr/bin/ping -I wlan0 -c 1 -W 2 “$PING_TARGET” >/dev/null 2>&1
]
Warmest regards,
Ralf