// Copyright (c) Microsoft. All rights reserved. using System; using System.Collections.Generic; using System.ComponentModel; using System.Diagnostics; using System.Linq; using System.Security.Cryptography; using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.AI; using Microsoft.Shared.Diagnostics; namespace Microsoft.Agents.AI.Tools.Shell; /// /// Sandboxed shell tool backed by a Docker (or compatible) container runtime. /// /// /// /// Exposes the same public surface as but executes /// commands inside a container. The container is intended to be the /// security boundary, and the defaults set up a hardened-looking /// configuration (--network none, non-root user, /// --read-only root filesystem, --cap-drop=ALL, /// --security-opt=no-new-privileges, memory and pids limits, /// --tmpfs /tmp). These defaults are a best-effort baseline, NOT /// a guarantee: the actual isolation you get depends on the host kernel, /// the container runtime, the image, and any caller-supplied /// extraRunArgs. Do not rely on this tool as your sole defense /// against untrusted input. Pair it with the precautions you would /// normally apply when running adversarial code: review the model's /// output before acting on it, run on a host you can afford to lose, /// keep approval gating on, monitor for resource exhaustion, and /// consider stronger isolation (a dedicated VM, gVisor/Kata, network /// segmentation) when stakes are high. /// /// /// Persistent mode reuses by launching /// docker exec -i <container> bash --noprofile --norc as the /// long-lived shell — the sentinel protocol works unchanged because the /// host process is still a bash REPL connected over pipes. Stateless mode /// runs each call in a fresh docker run --rm. /// /// public sealed class DockerShellExecutor : IShellExecutor { /// Default container image. A small Microsoft-maintained Linux base. internal const string DefaultImage = "mcr.microsoft.com/azurelinux/base/core:3.0"; /// Default container user (nobody:nogroup on most distros). internal const string DefaultContainerUser = "65534:65534"; /// Default Docker network mode (no network). internal const string DefaultNetwork = DockerNetworkMode.None; /// Default container memory limit, in bytes (512 MiB). internal const long DefaultMemoryBytes = 512L * 1024 * 1024; /// Default pids limit. internal const int DefaultPidsLimit = 256; /// Default container working directory. internal const string DefaultContainerWorkdir = "/workspace"; private const int DefaultMaxOutputBytes = 64 * 1024; /// /// Recommended default per-command timeout (30 seconds). Pass this /// explicitly to the constructor to opt in to a bounded timeout. Note /// that (the parameter default) means /// no timeout, matching the documented contract. /// public static readonly TimeSpan DefaultTimeout = TimeSpan.FromSeconds(30); private readonly string _image; private readonly ShellMode _mode; private readonly string? _hostWorkdir; private readonly string _containerWorkdir; private readonly bool _mountReadonly; private readonly string _network; private readonly long _memoryBytes; private readonly int _pidsLimit; private readonly string _user; private readonly bool _readOnlyRoot; private readonly IReadOnlyList _extraRunArgs; private readonly IReadOnlyDictionary _env; private readonly ShellPolicy _policy; private readonly TimeSpan? _timeout; private readonly int _maxOutputBytes; private ShellSession? _session; private bool _containerStarted; private readonly SemaphoreSlim _lifecycleLock = new(1, 1); /// /// Initializes a new instance of the class. /// /// OCI image to run. Must include bash and (for persistent mode) sleep. /// Optional container name. When , a unique name is generated. /// Execution mode. Defaults to . /// Optional host directory mounted at . Mounted read-only by default. /// Path inside the container. Defaults to /workspace. /// When (default), the host workdir is mounted read-only. /// Docker network mode. Defaults to . See for well-known values. /// Container memory limit, in bytes. selects (512 MiB). /// Max processes inside the container. /// UID:GID. Defaults to 65534:65534 (nobody). /// When (default), the container root filesystem is read-only. /// Additional args appended to docker run. /// Environment variables passed via -e to every command. /// Optional . Less critical than for since the container provides isolation. /// Per-command timeout. disables timeouts. /// Per-stream cap before head+tail truncation. /// Override (e.g. podman). public DockerShellExecutor( string image = DefaultImage, string? containerName = null, ShellMode mode = ShellMode.Persistent, string? hostWorkdir = null, string containerWorkdir = DefaultContainerWorkdir, bool mountReadonly = true, string network = DefaultNetwork, long? memoryBytes = null, int pidsLimit = DefaultPidsLimit, string user = DefaultContainerUser, bool readOnlyRoot = true, IReadOnlyList? extraRunArgs = null, IReadOnlyDictionary? environment = null, ShellPolicy? policy = null, TimeSpan? timeout = null, int maxOutputBytes = DefaultMaxOutputBytes, string dockerBinary = "docker") { _ = Throw.IfNull(image); if (maxOutputBytes <= 0) { throw new ArgumentOutOfRangeException(nameof(maxOutputBytes)); } if (memoryBytes is <= 0) { throw new ArgumentOutOfRangeException(nameof(memoryBytes), "memoryBytes must be positive."); } this._image = image; this.ContainerName = containerName ?? GenerateContainerName(); this._mode = mode; this._hostWorkdir = hostWorkdir; this._containerWorkdir = containerWorkdir ?? DefaultContainerWorkdir; this._mountReadonly = mountReadonly; this._network = network ?? DefaultNetwork; this._memoryBytes = memoryBytes ?? DefaultMemoryBytes; this._pidsLimit = pidsLimit; this._user = user ?? DefaultContainerUser; this._readOnlyRoot = readOnlyRoot; this._extraRunArgs = extraRunArgs ?? Array.Empty(); this._env = environment ?? new Dictionary(); this._policy = policy ?? new ShellPolicy(); this._timeout = timeout; this._maxOutputBytes = maxOutputBytes; this.DockerBinary = dockerBinary ?? "docker"; } /// Gets the container name (auto-generated when not specified at construction). public string ContainerName { get; } /// Gets the docker binary path. public string DockerBinary { get; } /// Eagerly start the container (and inner shell session in persistent mode). public async Task InitializeAsync(CancellationToken cancellationToken = default) { await this._lifecycleLock.WaitAsync(cancellationToken).ConfigureAwait(false); try { if (this._containerStarted) { return; } await this.StartContainerAsync(cancellationToken).ConfigureAwait(false); this._containerStarted = true; if (this._mode == ShellMode.Persistent) { var execArgv = BuildExecArgv(this.DockerBinary, this.ContainerName); // BuildExecArgv already includes the bash flags // (--noprofile --norc) at the end of the argv. We pass // ShellKind.Sh here (not Bash) because Sh's // PersistentArgv() returns an empty suffix and forwards // ExtraArgv unchanged; Bash would re-append // --noprofile/--norc and produce a duplicated argv. var inner = new ResolvedShell(execArgv[0], ShellKind.Sh, ExtraArgv: execArgv.Skip(1).ToArray()); this._session = new ShellSession( inner, workingDirectory: null, // workdir is set on the container itself confineWorkingDirectory: false, environment: null, cleanEnvironment: false, maxOutputBytes: this._maxOutputBytes); } } finally { _ = this._lifecycleLock.Release(); } } /// Stop the inner shell session and tear down the container. public async Task ShutdownAsync(CancellationToken cancellationToken = default) { await this._lifecycleLock.WaitAsync(cancellationToken).ConfigureAwait(false); try { if (this._session is not null) { try { await this._session.DisposeAsync().ConfigureAwait(false); } finally { this._session = null; } } if (this._containerStarted) { await this.StopContainerAsync().ConfigureAwait(false); this._containerStarted = false; } } finally { _ = this._lifecycleLock.Release(); } } /// Run a single command inside the container. /// Thrown when the policy denies the command. public async Task RunAsync(string command, CancellationToken cancellationToken = default) { if (command is null) { throw new ArgumentNullException(nameof(command)); } var decision = this._policy.Evaluate(new ShellRequest(command, this._containerWorkdir)); if (!decision.Allowed) { throw new ShellCommandRejectedException( $"Command rejected by policy: {decision.Reason ?? "(unspecified)"}"); } if (this._mode == ShellMode.Persistent) { if (this._session is null) { await this.InitializeAsync(cancellationToken).ConfigureAwait(false); } return await this._session!.RunAsync(command, this._timeout, cancellationToken).ConfigureAwait(false); } return await this.RunStatelessAsync(command, cancellationToken).ConfigureAwait(false); } /// /// Returns when this tool's effective /// configuration matches the recommended hardening defaults — no /// network, non-root user, read-only root filesystem, the host mount /// (if any) is read-only, and no caller-supplied extraRunArgs /// have been added. This is a configuration-shape check, not a /// security guarantee; isolation still depends on the host kernel, /// the container runtime, and the image. /// uses this signal to choose a default for requireApproval: /// when the configuration has been relaxed it leaves approval /// gating on, but you should always make the approval/policy /// decision deliberately rather than relying on this default. /// public bool IsHardenedConfiguration => StringComparer.Ordinal.Equals(this._network, "none") && !IsRootUser(this._user) && this._readOnlyRoot && (this._hostWorkdir is null || this._mountReadonly) && this._extraRunArgs.Count == 0; /// Format a byte count into the value passed to docker --memory (e.g. 536870912b). internal static string FormatMemoryBytes(long memoryBytes) => memoryBytes.ToString(System.Globalization.CultureInfo.InvariantCulture) + "b"; private static bool IsRootUser(string user) { // user is typically "uid:gid" (e.g. "65534:65534") or "0", "0:0", // "root", or "root:root". Anything we cannot parse is treated as // root for the purpose of the safety default — fail safe. if (string.IsNullOrEmpty(user)) { return true; } var uidPart = user.Split(':')[0]; if (uidPart.Equals("root", StringComparison.OrdinalIgnoreCase)) { return true; } return !int.TryParse(uidPart, System.Globalization.NumberStyles.Integer, System.Globalization.CultureInfo.InvariantCulture, out var uid) || uid == 0; } /// /// Build the AIFunction for this tool. /// /// /// When is /// (the default), approval is enabled iff /// is . /// In other words: if the caller relaxed any hardening knob (for /// example by setting network: "host", running as /// 0:0, disabling readOnlyRoot, granting a writable /// host mount, or supplying extraRunArgs), the tool falls /// back to requiring approval. This is a convenience default, not /// a security recommendation — you should treat the /// approval/policy decision as a deliberate choice for the agent /// you are building, not as something this method picks correctly /// for you. /// /// Function name surfaced to the model. /// Function description for the model. /// /// always wraps in /// ; /// never does; (the default) wraps iff /// is . /// public AIFunction AsAIFunction(string name = "run_shell", string? description = null, bool? requireApproval = null) { var effectiveRequireApproval = requireApproval ?? !this.IsHardenedConfiguration; description ??= "Execute a single shell command inside an isolated Docker container and return its " + "stdout, stderr, and exit code. The container has no network, no host filesystem access " + "(except an optional read-only workspace mount), and runs as a non-root user. " + (this._mode == ShellMode.Persistent ? "PERSISTENT MODE: a single long-lived container handles every call; cd and exported variables persist." : "STATELESS MODE: each call runs in a fresh container."); var fn = AIFunctionFactory.Create( async ([Description("The shell command to execute.")] string command, CancellationToken cancellationToken) => { try { var result = await this.RunAsync(command, cancellationToken).ConfigureAwait(false); return result.FormatForModel(); } catch (ShellCommandRejectedException ex) { // ex.Message already starts with "Command rejected by policy: ...". return ex.Message; } }, new AIFunctionFactoryOptions { Name = name, Description = description }); return effectiveRequireApproval ? new ApprovalRequiredAIFunction(fn) : fn; } /// public async ValueTask DisposeAsync() { await this.ShutdownAsync().ConfigureAwait(false); this._lifecycleLock.Dispose(); } /// /// Probe whether the configured docker binary can be reached. Returns /// only if the binary exists on PATH and /// docker version succeeds within ~5 seconds. /// public static async Task IsAvailableAsync(string binary = "docker", CancellationToken cancellationToken = default) { try { var psi = new ProcessStartInfo { FileName = binary, RedirectStandardOutput = true, RedirectStandardError = true, UseShellExecute = false, CreateNoWindow = true, }; psi.ArgumentList.Add("version"); psi.ArgumentList.Add("--format"); psi.ArgumentList.Add("{{.Server.Version}}"); using var proc = new Process { StartInfo = psi }; if (!proc.Start()) { return false; } using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); cts.CancelAfter(TimeSpan.FromSeconds(5)); try { await proc.WaitForExitAsync(cts.Token).ConfigureAwait(false); } catch (OperationCanceledException) { try { proc.Kill(entireProcessTree: true); } catch { } return false; } return proc.ExitCode == 0; } catch (Win32Exception) { return false; } catch (InvalidOperationException) { return false; } } // ------------------------------------------------------------------ // Pure argv builders — kept side-effect-free so tests don't need Docker. // ------------------------------------------------------------------ /// Build the docker run -d argv that starts the long-lived container. public static IReadOnlyList BuildRunArgv( string binary, string image, string containerName, string user, string network, long memoryBytes, int pidsLimit, string workdir, string? hostWorkdir, bool mountReadonly, bool readOnlyRoot, IReadOnlyDictionary? extraEnv, IReadOnlyList? extraArgs) { var argv = new List { binary, "run", "-d", "--rm", "--name", containerName, "--user", user, "--network", network, "--memory", FormatMemoryBytes(memoryBytes), "--pids-limit", pidsLimit.ToString(System.Globalization.CultureInfo.InvariantCulture), "--cap-drop", "ALL", "--security-opt", "no-new-privileges", "--tmpfs", "/tmp:rw,nosuid,nodev,size=64m", "--workdir", workdir, }; if (readOnlyRoot) { argv.Add("--read-only"); } if (hostWorkdir is not null) { var ro = mountReadonly ? "ro" : "rw"; argv.Add("-v"); argv.Add($"{hostWorkdir}:{workdir}:{ro}"); } if (extraEnv is not null) { foreach (var kv in extraEnv) { argv.Add("-e"); argv.Add($"{kv.Key}={kv.Value}"); } } if (extraArgs is not null) { foreach (var a in extraArgs) { argv.Add(a); } } argv.Add(image); argv.Add("sleep"); argv.Add("infinity"); return argv; } /// /// Build the docker exec -i <container> bash --noprofile --norc argv for /// the persistent inner shell. Stateless callers should use /// ; this method intentionally does /// not produce a stand-alone command argv. /// public static IReadOnlyList BuildExecArgv(string binary, string containerName) { return new List { binary, "exec", "-i", containerName, "bash", "--noprofile", "--norc" }; } private async Task StartContainerAsync(CancellationToken cancellationToken) { var argv = BuildRunArgv( this.DockerBinary, this._image, this.ContainerName, this._user, this._network, this._memoryBytes, this._pidsLimit, this._containerWorkdir, this._hostWorkdir, this._mountReadonly, this._readOnlyRoot, this._env, this._extraRunArgs); var (exit, _, stderr) = await RunDockerCommandAsync(argv, cancellationToken).ConfigureAwait(false); if (exit != 0) { throw new DockerNotAvailableException( $"Failed to start container ({exit}): {stderr.Trim()}"); } } private async Task StopContainerAsync() { var argv = new[] { this.DockerBinary, "rm", "-f", this.ContainerName }; try { using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(10)); _ = await RunDockerCommandAsync(argv, cts.Token).ConfigureAwait(false); } catch (Exception ex) when (ex is OperationCanceledException || ex is Win32Exception || ex is InvalidOperationException) { // Best-effort teardown. } } private async Task RunStatelessAsync(string command, CancellationToken cancellationToken) { var perCallName = GenerateContainerName(); var argv = new List(this.BuildRunArgvStateless(perCallName)); argv.Add(this._image); argv.Add("bash"); argv.Add("-c"); argv.Add(command); var stopwatch = Stopwatch.StartNew(); var stdoutBuf = new HeadTailBuffer(this._maxOutputBytes); var stderrBuf = new HeadTailBuffer(this._maxOutputBytes); var psi = new ProcessStartInfo { FileName = argv[0], RedirectStandardOutput = true, RedirectStandardError = true, UseShellExecute = false, CreateNoWindow = true, }; for (var i = 1; i < argv.Count; i++) { psi.ArgumentList.Add(argv[i]); } using var proc = new Process { StartInfo = psi, EnableRaisingEvents = true }; proc.OutputDataReceived += (_, e) => { if (e.Data is not null) { stdoutBuf.AppendLine(e.Data); } }; proc.ErrorDataReceived += (_, e) => { if (e.Data is not null) { stderrBuf.AppendLine(e.Data); } }; try { _ = proc.Start(); } catch (Win32Exception ex) { throw new ShellExecutionException($"Failed to launch '{this.DockerBinary}': {ex.Message}", ex); } proc.BeginOutputReadLine(); proc.BeginErrorReadLine(); var timedOut = false; using var timeoutCts = this._timeout is null ? new CancellationTokenSource() : new CancellationTokenSource(this._timeout.Value); using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, timeoutCts.Token); try { await proc.WaitForExitAsync(linkedCts.Token).ConfigureAwait(false); } catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested && !cancellationToken.IsCancellationRequested) { timedOut = true; // Kill the running container by name; --rm reaps it. await this.BestEffortKillContainerAsync(perCallName).ConfigureAwait(false); try { await proc.WaitForExitAsync(CancellationToken.None).ConfigureAwait(false); } catch (Exception ex) when (ex is InvalidOperationException || ex is Win32Exception) { } } catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) { // Caller-driven cancellation: --rm only fires when PID 1 exits, so // if we just propagate, the container keeps running indefinitely. // Kill it explicitly before rethrowing so we don't leak containers. await this.BestEffortKillContainerAsync(perCallName).ConfigureAwait(false); try { await proc.WaitForExitAsync(CancellationToken.None).ConfigureAwait(false); } catch (Exception ex) when (ex is InvalidOperationException || ex is Win32Exception) { } throw; } proc.WaitForExit(); stopwatch.Stop(); var (sout, soutT) = stdoutBuf.ToFinalString(); var (serr, serrT) = stderrBuf.ToFinalString(); return new ShellResult( Stdout: sout, Stderr: serr, ExitCode: timedOut ? 124 : proc.ExitCode, Duration: stopwatch.Elapsed, Truncated: soutT || serrT, TimedOut: timedOut); } private List BuildRunArgvStateless(string perCallName) { var argv = new List { this.DockerBinary, "run", "--rm", "-i", "--name", perCallName, "--user", this._user, "--network", this._network, "--memory", FormatMemoryBytes(this._memoryBytes), "--pids-limit", this._pidsLimit.ToString(System.Globalization.CultureInfo.InvariantCulture), "--cap-drop", "ALL", "--security-opt", "no-new-privileges", "--tmpfs", "/tmp:rw,nosuid,nodev,size=64m", "--workdir", this._containerWorkdir, }; if (this._readOnlyRoot) { argv.Add("--read-only"); } if (this._hostWorkdir is not null) { var ro = this._mountReadonly ? "ro" : "rw"; argv.Add("-v"); argv.Add($"{this._hostWorkdir}:{this._containerWorkdir}:{ro}"); } foreach (var kv in this._env) { argv.Add("-e"); argv.Add($"{kv.Key}={kv.Value}"); } foreach (var a in this._extraRunArgs) { argv.Add(a); } return argv; } private async Task BestEffortKillContainerAsync(string containerName) { try { using var killCts = new CancellationTokenSource(TimeSpan.FromSeconds(5)); _ = await RunDockerCommandAsync( new[] { this.DockerBinary, "kill", "--signal", "KILL", containerName }, killCts.Token).ConfigureAwait(false); } catch (Exception ex) when (ex is OperationCanceledException || ex is Win32Exception || ex is InvalidOperationException) { // best-effort: container may already be gone } } private static async Task<(int ExitCode, string Stdout, string Stderr)> RunDockerCommandAsync( IReadOnlyList argv, CancellationToken cancellationToken) { var psi = new ProcessStartInfo { FileName = argv[0], RedirectStandardOutput = true, RedirectStandardError = true, UseShellExecute = false, CreateNoWindow = true, }; for (var i = 1; i < argv.Count; i++) { psi.ArgumentList.Add(argv[i]); } // Cap helper-command output at 1 MiB. These commands (`docker version`, // `docker kill`, `docker pull`) shouldn't produce more than that, but a // chatty `docker pull` progress stream can easily run into hundreds of // KiB; bound the buffer so we never exhaust memory on misbehaviour. const int HelperOutputCap = 1 * 1024 * 1024; var stdoutBuf = new HeadTailBuffer(HelperOutputCap); var stderrBuf = new HeadTailBuffer(HelperOutputCap); using var proc = new Process { StartInfo = psi, EnableRaisingEvents = true }; proc.OutputDataReceived += (_, e) => { if (e.Data is not null) { stdoutBuf.AppendLine(e.Data); } }; proc.ErrorDataReceived += (_, e) => { if (e.Data is not null) { stderrBuf.AppendLine(e.Data); } }; _ = proc.Start(); proc.BeginOutputReadLine(); proc.BeginErrorReadLine(); await proc.WaitForExitAsync(cancellationToken).ConfigureAwait(false); proc.WaitForExit(); return (proc.ExitCode, stdoutBuf.ToFinalString().text, stderrBuf.ToFinalString().text); } private static string GenerateContainerName() { var bytes = new byte[6]; #if NET6_0_OR_GREATER RandomNumberGenerator.Fill(bytes); #else using var rng = RandomNumberGenerator.Create(); rng.GetBytes(bytes); #endif #pragma warning disable CA1308 return "af-shell-" + Convert.ToHexString(bytes).ToLowerInvariant(); #pragma warning restore CA1308 } } /// /// Thrown when the configured docker (or compatible) binary cannot start a /// container — typically because the daemon isn't running, the image /// can't be pulled, or the binary isn't on PATH. /// public sealed class DockerNotAvailableException : Exception { /// Initializes a new instance of the class. public DockerNotAvailableException() { } /// Initializes a new instance of the class. /// The exception message. public DockerNotAvailableException(string message) : base(message) { } /// Initializes a new instance of the class. /// The exception message. /// The inner exception. public DockerNotAvailableException(string message, Exception inner) : base(message, inner) { } }