"""Embed/extract manifests in/from content. Two embedding modes: 1. Sidecar: manifest is stored next to content as a separate .c2pa.json file. 2. Inline header: for text, we can prepend a JSON block. For images/video, real C2PA stores inside metadata (XMP, EXIF). Here we keep it simple: we provide sidecar packaging helpers. Callers can embed the bytes however they want for their own file format. """ from __future__ import annotations import base64 import json from pqc_content_provenance.errors import InvalidManifestError from pqc_content_provenance.manifest import ContentManifest SIDECAR_EXTENSION = ".c2pa.json" TEXT_MARKER_BEGIN = "" TEXT_MARKER_END = "" def embed_manifest(content: bytes, manifest: ContentManifest, mode: str = "sidecar") -> bytes: """Produce an embedded form of (content, manifest). mode='sidecar': returns a JSON blob containing both; save to .c2pa.json. mode='text-header': prepends a marker-bracketed JSON to text content. """ if mode == "sidecar": envelope = { "manifest": manifest.to_dict(), "content_base64": _to_base64(content), } return json.dumps(envelope, indent=2).encode("utf-8") if mode == "text-header": header = f"{TEXT_MARKER_BEGIN}{manifest.to_json()}{TEXT_MARKER_END}" return header.encode("utf-8") + b"\n" + content raise ValueError(f"unknown embed mode: {mode}") def extract_manifest(blob: bytes, mode: str = "sidecar") -> tuple[ContentManifest, bytes]: """Extract (manifest, content) from an embedded blob. Inverse of embed_manifest.""" if mode == "sidecar": try: envelope = json.loads(blob.decode("utf-8")) manifest = ContentManifest.from_dict(envelope["manifest"]) content = _from_base64(envelope["content_base64"]) return manifest, content except (ValueError, KeyError) as e: raise InvalidManifestError(f"invalid sidecar envelope: {e}") from e if mode == "text-header": text = blob.decode("utf-8", errors="replace") if TEXT_MARKER_BEGIN not in text or TEXT_MARKER_END not in text: raise InvalidManifestError("text-header markers not found") start = text.index(TEXT_MARKER_BEGIN) + len(TEXT_MARKER_BEGIN) end = text.index(TEXT_MARKER_END, start) manifest_json = text[start:end] manifest = ContentManifest.from_json(manifest_json) # content is everything after the end marker (skip the trailing newline) rest = text[end + len(TEXT_MARKER_END):] if rest.startswith("\n"): rest = rest[1:] return manifest, rest.encode("utf-8") raise ValueError(f"unknown embed mode: {mode}") def _to_base64(data: bytes) -> str: return base64.b64encode(data).decode("ascii") def _from_base64(s: str) -> bytes: return base64.b64decode(s.encode("ascii"))