package queue import ( "errors" "fmt" "io/fs" "os" "path/filepath" "strconv" "strings" "time" ) // LockName is the presence-based lock file written next to queue.md // while Append is mid-flight. It is portable (no flock / fcntl) and // recovers from a crashed eeco via the staleness check in acquireLock. const LockName = ".queue.lock" // ErrLocked is returned by Append when the queue lock is already held // by another live eeco invocation in the same workspace. Callers that // must persist their item propagate it so the top-level CLI surfaces a // non-zero exit with a clear message; "fire-and-forget" sites already // discard via `_ = queue.Append(...)`. The lock guarantees the queue // file is never partially written: a contender either holds the lock // and writes cleanly, or fails up-front without touching queue.md. var ErrLocked = errors.New("queue is locked by another eeco invocation") // staleLockAge is the conservative window after which a lock left over // by a crashed run is considered abandoned. Append itself is // millisecond-scale, so any healthy run releases the lock long before // this; only a hard crash mid-write reaches the takeover path. const staleLockAge = 5 * time.Minute // acquireLock claims /.queue.lock for the lifetime of one // Append. On success it returns a release closure that removes the // lock; the caller defers it. On contention it returns ErrLocked. // A stale lock (older than staleLockAge or owned by a dead PID on // Unix) is forcibly reclaimed once before the takeover is reported as // ErrLocked. func acquireLock(stateDir string) (release func(), err error) { lockPath := filepath.Join(stateDir, LockName) claimed, err := tryClaim(lockPath) if err != nil { return func() {}, err } if claimed { return func() { removeLock(lockPath) }, nil } if !lockIsStale(lockPath) { return func() {}, ErrLocked } if err := os.Remove(lockPath); err != nil && !errors.Is(err, fs.ErrNotExist) { return func() {}, fmt.Errorf("queue.lock: remove stale: %w", err) } claimed, err = tryClaim(lockPath) if err != nil { return func() {}, err } if !claimed { return func() {}, ErrLocked } return func() { removeLock(lockPath) }, nil } // tryClaim makes the exclusive O_CREATE|O_EXCL claim on the lock file // and writes its contents on success. It returns: // // (true, nil) — the lock was created cleanly and is now held; // (false, nil) — the lock already exists (genuine contention), so the // caller should run the staleness check; // (false, err) — a real I/O error. // // On Windows a lock file in "delete pending" state — a contender just // released it while a staleness probe (readLockPID) still held an open // read handle — makes CreateFile reject the exclusive create with // ERROR_ACCESS_DENIED rather than reporting the file as existing. That // window clears in microseconds once the stray handle closes, so the // create is retried briefly; if it never clears it is reported as // contention so the caller falls through to the staleness path instead // of erroring out. On Unix isPendingDelete is always false and the loop // runs exactly once. func tryClaim(lockPath string) (bool, error) { deadline := time.Now().Add(2 * time.Second) for { f, err := os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o644) if err == nil { writeLockContents(f) _ = f.Close() return true, nil } if errors.Is(err, fs.ErrExist) { return false, nil } if isPendingDelete(err) { if time.Now().Before(deadline) { time.Sleep(20 * time.Millisecond) continue } return false, nil } return false, fmt.Errorf("queue.lock: %w", err) } } // removeLock deletes the lock file, retrying briefly past the transient // sharing violation Windows raises when a contender's staleness probe // (readLockPID's os.ReadFile) holds an open handle at the moment of // deletion. Go opens files without FILE_SHARE_DELETE on Windows, so a // concurrent reader blocks DeleteFile until its handle closes — // microseconds later. Without the retry the releaser's os.Remove fails, // the lock file is orphaned, and the queue stays locked out until the // staleLockAge window elapses. On Unix the first Remove always wins. func removeLock(path string) { deadline := time.Now().Add(2 * time.Second) for { err := os.Remove(path) if err == nil || errors.Is(err, fs.ErrNotExist) { return } if time.Now().After(deadline) { return } time.Sleep(20 * time.Millisecond) } } func writeLockContents(f *os.File) { _, _ = fmt.Fprintf(f, "pid=%d\ntime=%s\n", os.Getpid(), time.Now().UTC().Format(time.RFC3339)) } // lockIsStale returns true when the existing lock at path is safe to // reclaim: either older than staleLockAge by mtime, or owned by a PID // that no longer exists (Unix only — see processAlive). func lockIsStale(path string) bool { info, err := os.Stat(path) if err != nil { return false } if time.Since(info.ModTime()) > staleLockAge { return true } pid, ok := readLockPID(path) if !ok { return false } return !processAlive(pid) } func readLockPID(path string) (int, bool) { b, err := os.ReadFile(path) if err != nil { return 0, false } for line := range strings.SplitSeq(string(b), "\n") { v, ok := strings.CutPrefix(strings.TrimSpace(line), "pid=") if !ok { continue } pid, err := strconv.Atoi(strings.TrimSpace(v)) if err != nil || pid <= 0 { return 0, false } return pid, true } return 0, false }