/** * Streaming parser for ... * tags. Simplified from packages/artifacts/src/parser.ts in the reference * repo: handles one artifact at a time, ignores nesting. * * Feed deltas in, iterate events. Every event type here has a direct * counterpart in the reference parser — the shape is intentionally preserved * so you can upgrade later without rewriting consumers. */ export type ArtifactEvent = | { type: 'text'; delta: string } | { type: 'artifact:start'; identifier: string; artifactType: string; title: string } | { type: 'artifact:chunk'; identifier: string; delta: string } | { type: 'artifact:end'; identifier: string; fullContent: string }; const OPEN_PREFIX = ' { const re = /(\w+)\s*=\s*(?:"([^"]*)"|'([^']*)')/g; const out: Record = {}; let m: RegExpExecArray | null = re.exec(raw); while (m !== null) { out[m[1] as string] = (m[2] ?? m[3] ?? '') as string; m = re.exec(raw); } return out; } type OpenTagMatch = | { kind: 'complete'; start: number; end: number; attrs: string } | { kind: 'partial'; start: number } | { kind: 'none' }; import { computeSkipRanges, FENCE_OPEN_RE, isRealArtifactOpenAt, rangeContains } from './markdown-context'; // Scan the buffer for `` while skipping any positions that the // chat markdown renderer would render as a fenced code block or inline code // span — see ./markdown-context.ts for the shared classification used by both // the streaming parser and the post-stream `` stripper. // // Streaming caveats handled here on top of the shared ranges: // * Open fence with no close yet → hold back from its opening line. // * Unterminated tail line that could still resolve into a fence delimiter // (e.g. "```", "```ht") → hold back from the line start. // * Unmatched opening backtick after the last \n → hold back from it; a // future chunk may turn it into an inline code span. function findOpenTag(buffer: string): OpenTagMatch { const len = buffer.length; const { ranges, unclosedFenceStart } = computeSkipRanges(buffer); // Pass 1: scan for the earliest *complete* real `` open outside // any skip range. Done before any hold-back decision, otherwise a stray // backtick or fence-opener prefix on a tail line would suppress an already // self-contained artifact earlier in the buffer. let earliestPartialOpen = -1; let from = 0; while (from < len) { const idx = buffer.indexOf(OPEN_PREFIX, from); if (idx === -1) break; if (rangeContains(ranges, idx)) { from = idx + OPEN_PREFIX.length; continue; } if (unclosedFenceStart !== null && idx >= unclosedFenceStart) { // Anything past an unclosed fence opener is inside a code block that // will close in a later chunk (or at end-of-buffer for the stripper); // treat as skip range, not a real tag. break; } const after = idx + OPEN_PREFIX.length; const next = buffer.charAt(after); if (next === '') { // ` open (e.g. "') { return { kind: 'complete', start: idx, end: j + 1, attrs: buffer.slice(after, j) }; } j++; } // Ran out of buffer before the closing `>` arrived — this is an open tag // mid-stream. Remember and stop scanning (any later ` { if (pos !== null && pos !== -1 && (holdback === -1 || pos < holdback)) holdback = pos; }; note(earliestPartialOpen); note(unclosedFenceStart); const lastNl = buffer.lastIndexOf('\n'); if (lastNl < len - 1) { const tailLineStart = lastNl + 1; const tail = buffer.slice(tailLineStart); if (FENCE_OPEN_RE.test(tail) || /^`{1,2}$/.test(tail)) { note(tailLineStart); } } let firstUnmatched = -1; let parity = 0; for (let k = lastNl + 1; k < len; k++) { if (buffer.charAt(k) !== '`') continue; if (rangeContains(ranges, k)) continue; if (parity === 0) { firstUnmatched = k; parity = 1; } else { firstUnmatched = -1; parity = 0; } } note(firstUnmatched); // Strict prefix at the tail (e.g. " { state.buffer += delta; while (state.buffer.length > 0) { if (!state.inside) { const open = findOpenTag(state.buffer); if (open.kind === 'none') { yield { type: 'text', delta: state.buffer }; state.buffer = ''; return; } if (open.kind === 'partial') { if (open.start > 0) { yield { type: 'text', delta: state.buffer.slice(0, open.start) }; state.buffer = state.buffer.slice(open.start); } return; } if (open.start > 0) { yield { type: 'text', delta: state.buffer.slice(0, open.start) }; } const attrs = parseAttrs(open.attrs); state.inside = true; state.identifier = attrs['identifier'] ?? ''; state.artifactType = attrs['type'] ?? ''; state.title = attrs['title'] ?? ''; state.content = ''; state.buffer = state.buffer.slice(open.end); yield { type: 'artifact:start', identifier: state.identifier, artifactType: state.artifactType, title: state.title, }; continue; } const closeIdx = state.buffer.indexOf(CLOSE_TAG); if (closeIdx === -1) { // Hold back enough bytes to detect a partial close tag at the tail. const flushUpTo = state.buffer.length - (CLOSE_TAG.length - 1); if (flushUpTo > 0) { const chunk = state.buffer.slice(0, flushUpTo); state.content += chunk; state.buffer = state.buffer.slice(flushUpTo); yield { type: 'artifact:chunk', identifier: state.identifier, delta: chunk }; } return; } const finalChunk = state.buffer.slice(0, closeIdx); if (finalChunk.length > 0) { state.content += finalChunk; yield { type: 'artifact:chunk', identifier: state.identifier, delta: finalChunk }; } yield { type: 'artifact:end', identifier: state.identifier, fullContent: state.content }; state.buffer = state.buffer.slice(closeIdx + CLOSE_TAG.length); state.inside = false; state.identifier = ''; state.artifactType = ''; state.title = ''; state.content = ''; } } function* flush(): Generator { if (state.inside) { if (state.buffer.length > 0) { state.content += state.buffer; yield { type: 'artifact:chunk', identifier: state.identifier, delta: state.buffer }; state.buffer = ''; } yield { type: 'artifact:end', identifier: state.identifier, fullContent: state.content }; } else if (state.buffer.length > 0) { yield { type: 'text', delta: state.buffer }; } state.buffer = ''; state.inside = false; } return { feed, flush }; }