${renderAuthorLine(b.author)}${
b.blocks ? renderBlocks(b.blocks, qctx) : ''
}
${
b.truncated
? `⚠ Long-form post — only the preview above was available at export; X did not expose the full text.${
safeUrl(b.sourceUrl)
? ` Read the full post on X →`
: ''
}
`
: b.noteRecovered
? `✓ Long-form post — full text recovered from data X delivered to this browser.
`
: ''
}${
safeIsoTime(b.publishedAt)
? ``
: ''
}${
safeUrl(b.sourceUrl)
? `View on X →`
: ''
}`;
}
default:
return '';
}
}
function hrefsFromHtml(html) {
const hrefs = [];
String(html || '').replace(/\shref="([^"]+)"/g, (match, href) => {
hrefs.push(href);
return match;
});
return hrefs;
}
function archiveStats(model, media = collectMediaManifest(model)) {
const duplicateMedia = duplicateMediaReport(media);
const stats = {
mainTextCaptured: false,
headings: 0,
paragraphs: 0,
lists: 0,
quoteCards: 0,
renderedTweetCards: 0,
images: 0,
videos: 0,
videosPreservedOffline: 0,
videoPostersCaptured: 0,
videoSourceLinksPreserved: 0,
incompleteMedia: 0,
missingMedia: 0,
media,
hashedMedia: media.filter((item) => !!item.sha256).length,
duplicateMedia,
sourceLinks: new Set(model.sourceUrl ? [model.sourceUrl] : []),
mediaUrls: new Map(),
warnings: [],
missing: [],
incomplete: [],
};
const addMissing = (record, warning) => {
stats.missing.push(missingRecord(record.type, record));
if (warning) stats.warnings.push(warning);
};
const markMediaUrl = (url) => {
if (!url) return;
stats.mediaUrls.set(url, (stats.mediaUrls.get(url) || 0) + 1);
};
const walk = (blocks) => {
(blocks || []).forEach((b) => {
if (b.kind === 'heading') {
stats.headings += 1;
stats.mainTextCaptured = stats.mainTextCaptured || !!String(b.text || '').trim();
} else if (b.kind === 'thread-marker') {
if (b.sourceUrl) stats.sourceLinks.add(b.sourceUrl);
} else if (b.kind === 'paragraph') {
stats.paragraphs += 1;
stats.mainTextCaptured = stats.mainTextCaptured || !!textFromHtml(b.html);
hrefsFromHtml(b.html).forEach((href) => stats.sourceLinks.add(href));
} else if (b.kind === 'list') {
stats.lists += 1;
stats.mainTextCaptured =
stats.mainTextCaptured || (b.items || []).some((item) => !!textFromHtml(item));
(b.items || []).forEach((item) =>
hrefsFromHtml(item).forEach((href) => stats.sourceLinks.add(href))
);
} else if (b.kind === 'image') {
stats.images += 1;
markMediaUrl(b.url);
if (!b.dataUri) {
addMissing(
{
type: 'image',
mediaId: b._xaMediaId,
sourcePostId: statusIdFromSourceUrl(b.sourceUrl) || undefined,
sourceUrl: b.sourceUrl || undefined,
originalUrl: b.url || undefined,
reason: missingReason(b, 'download_failed'),
},
`Image ${b._xaMediaId || ''} was unavailable at export time.`.trim()
);
}
if (b.sourceUrl) stats.sourceLinks.add(b.sourceUrl);
} else if (b.kind === 'video') {
stats.videos += 1;
if (b.dataUri) stats.videosPreservedOffline += 1;
if (b.posterDataUri) stats.videoPostersCaptured += 1;
if (b.sourceUrl) stats.videoSourceLinksPreserved += 1;
markMediaUrl(b.mp4Url || b.posterUrl || b.sourceUrl);
if (!b.dataUri) {
stats.incomplete.push(
missingRecord('video', {
mediaId: b._xaMediaId,
sourcePostId: statusIdFromSourceUrl(b.sourceUrl) || undefined,
sourceUrl: b.sourceUrl || undefined,
originalUrl: b.mp4Url || b.hlsUrl || undefined,
reason: b.videoFailureReason || 'video_file_not_captured',
mode:
b.mode === 'poster' || b.mode === 'video-inline'
? 'poster-only'
: b.mode || (b.posterDataUri ? 'poster-only' : 'discovery-failed'),
posterCaptured: !!b.posterDataUri,
sourceLinkPreserved: !!b.sourceUrl,
})
);
stats.warnings.push(
`Video ${b._xaMediaId || ''} was detected, but the video file was not preserved offline. ${
b.posterDataUri ? 'Only the poster' : 'No poster'
} and ${b.sourceUrl ? 'source link were' : 'no source link was'} preserved.`.trim()
);
}
if (!b.dataUri && !b.posterDataUri) {
addMissing(
{
type: 'video',
mediaId: b._xaMediaId,
sourcePostId: statusIdFromSourceUrl(b.sourceUrl) || undefined,
sourceUrl: b.sourceUrl || undefined,
originalUrl: b.mp4Url || b.posterUrl || undefined,
reason: missingReason(b, b.unsupported ? 'unsupported_media' : 'download_failed'),
},
`Video ${b._xaMediaId || ''} was unavailable at export time.`.trim()
);
} else if (b.posterUrl && !b.posterDataUri) {
addMissing(
{
type: 'video-poster',
mediaId: b._xaMediaId,
sourcePostId: statusIdFromSourceUrl(b.sourceUrl) || undefined,
sourceUrl: b.sourceUrl || undefined,
originalUrl: b.posterUrl,
reason: 'download_failed',
},
`Video poster for ${b._xaMediaId || 'a video'} was unavailable at export time.`
);
}
if (b.sourceUrl) stats.sourceLinks.add(b.sourceUrl);
} else if (b.kind === 'quote') {
stats.quoteCards += 1;
stats.renderedTweetCards += 1;
if (b.sourceUrl) stats.sourceLinks.add(b.sourceUrl);
if (!b.blocks || !b.blocks.length) {
const postId = statusIdFromSourceUrl(b.sourceUrl);
addMissing(
{
type: 'quoted-post',
sourcePostId: postId || undefined,
sourceUrl: b.sourceUrl || undefined,
reason: 'private_or_deleted',
},
`Quoted post ${postId || b.sourceUrl || ''} was unavailable at export time.`.trim()
);
}
walk(b.blocks);
} else if (b.kind === 'blockquote') {
walk(b.blocks);
}
});
};
walk(model.blocks);
if (
!(
model.author &&
(String(model.author.name || '').trim() ||
String(model.author.handle || '').trim() ||
model.author.avatarUrl ||
model.author.avatarDataUri)
)
) {
stats.warnings.push('Main author metadata was not found.');
}
if (model.author && model.author.avatarFailed) {
addMissing(
{
type: 'avatar',
sourceUrl: model.author.avatarUrl || undefined,
reason: 'download_failed',
},
'Main author avatar was unavailable at export time.'
);
}
if (!stats.mainTextCaptured) stats.warnings.push('No main text content was captured.');
stats.missingMedia = stats.missing.length;
stats.incompleteMedia = stats.incomplete.length;
if (stats.missingMedia)
stats.warnings.push(`${stats.missingMedia} item(s) were unavailable at export time.`);
if (media.some((item) => item.embedded && !item.sha256))
stats.warnings.push('Some embedded media could not be content-hashed in this browser.');
if (duplicateMedia.length)
stats.warnings.push(`${duplicateMedia.length} duplicate media hash group(s) were detected.`);
return {
...stats,
sourceLinks: stats.sourceLinks.size,
};
}
function buildArchiveManifest(model, rawDebug, stats, documentLang) {
let diagnostics;
if (rawDebug) {
try {
diagnostics = typeof rawDebug === 'string' ? JSON.parse(rawDebug) : rawDebug;
} catch {
diagnostics = { raw: String(rawDebug) };
}
}
diagnostics = diagnostics || {};
diagnostics.networkCapture = {
...networkCaptureDiagnostics,
bufferedCandidates: capturedNetworkVideoCandidates.length,
};
const manifest = {
schemaVersion: '1.0',
exporter: { name: APP, version: VERSION },
capture: {
sourceUrl: model.sourceUrl || '',
publishedAt: safeIsoTime(model.publishedAt),
exportedAt: safeIsoTime(model.exportedAt),
documentLang,
mainTextCaptured: stats.mainTextCaptured,
headings: stats.headings,
paragraphs: stats.paragraphs,
lists: stats.lists,
quoteCards: stats.quoteCards,
renderedTweetCards: stats.renderedTweetCards,
images: stats.images,
videos: stats.videos,
videosPreservedOffline: stats.videosPreservedOffline,
videoPostersCaptured: stats.videoPostersCaptured,
videoSourceLinksPreserved: stats.videoSourceLinksPreserved,
incompleteMedia: stats.incompleteMedia,
missingMedia: stats.missingMedia,
hashedMedia: stats.hashedMedia,
duplicateMedia: stats.duplicateMedia.length,
sourceLinks: stats.sourceLinks,
threadPosts: model.thread ? model.thread.capturedPosts : 1,
threadCompleteness: model.thread ? model.thread.completeness : 'single-post',
note: String(model.userNote || ''),
tags: Array.isArray(model.tags) ? model.tags : [],
},
media: stats.media,
missing: stats.missing,
incomplete: stats.incomplete,
duplicates: stats.duplicateMedia,
warnings: stats.warnings,
};
manifest.diagnostics = diagnostics;
return manifest;
}
function renderCaptureSummary(stats) {
const row = (label, value) =>
`