openapi: 3.0.3 info: title: Llama Platform version: 0.1.0 paths: /api/v1/data-sinks: get: tags: - Data Sinks summary: List Data Sinks description: List data sinks for a given project. operationId: list_data_sinks_api_v1_data_sinks_get security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/DataSink' title: Response List Data Sinks Api V1 Data Sinks Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' post: tags: - Data Sinks summary: Create Data Sink description: Create a new data sink. operationId: create_data_sink_api_v1_data_sinks_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/DataSinkCreate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DataSink' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/data-sinks/{data_sink_id}: get: tags: - Data Sinks summary: Get Data Sink description: Get a data sink by ID. operationId: get_data_sink_api_v1_data_sinks__data_sink_id__get security: - HTTPBearer: [] parameters: - name: data_sink_id in: path required: true schema: type: string format: uuid title: Data Sink Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DataSink' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' put: tags: - Data Sinks summary: Update Data Sink description: Update a data sink by ID. operationId: update_data_sink_api_v1_data_sinks__data_sink_id__put security: - HTTPBearer: [] parameters: - name: data_sink_id in: path required: true schema: type: string format: uuid title: Data Sink Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/DataSinkUpdate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DataSink' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Data Sinks summary: Delete Data Sink description: Delete a data sink by ID. operationId: delete_data_sink_api_v1_data_sinks__data_sink_id__delete security: - HTTPBearer: [] parameters: - name: data_sink_id in: path required: true schema: type: string format: uuid title: Data Sink Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/data-sources: get: tags: - Data Sources summary: List Data Sources description: 'List data sources for a given project. If project_id is not provided, uses the default project.' operationId: list_data_sources_api_v1_data_sources_get security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/DataSource' title: Response List Data Sources Api V1 Data Sources Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' post: tags: - Data Sources summary: Create Data Source description: Create a new data source. operationId: create_data_source_api_v1_data_sources_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/DataSourceCreate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DataSource' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/data-sources/{data_source_id}: get: tags: - Data Sources summary: Get Data Source description: Get a data source by ID. operationId: get_data_source_api_v1_data_sources__data_source_id__get security: - HTTPBearer: [] parameters: - name: data_source_id in: path required: true schema: type: string format: uuid title: Data Source Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DataSource' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' put: tags: - Data Sources summary: Update Data Source description: Update a data source by ID. operationId: update_data_source_api_v1_data_sources__data_source_id__put security: - HTTPBearer: [] parameters: - name: data_source_id in: path required: true schema: type: string format: uuid title: Data Source Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/DataSourceUpdate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DataSource' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Data Sources summary: Delete Data Source description: Delete a data source by ID. operationId: delete_data_source_api_v1_data_sources__data_source_id__delete security: - HTTPBearer: [] parameters: - name: data_source_id in: path required: true schema: type: string format: uuid title: Data Source Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/embedding-model-configs: get: tags: - Embedding Model Configs summary: List Embedding Model Configs operationId: list_embedding_model_configs_api_v1_embedding_model_configs_get security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/EmbeddingModelConfig' title: Response List Embedding Model Configs Api V1 Embedding Model Configs Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' post: tags: - Embedding Model Configs summary: Create a new Embedding Model Configuration description: Create a new embedding model configuration within a specified project. operationId: create_embedding_model_config_api_v1_embedding_model_configs_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/EmbeddingModelConfigCreate' responses: '201': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/EmbeddingModelConfig' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/embedding-model-configs/{embedding_model_config_id}: put: tags: - Embedding Model Configs summary: Update Embedding Model Config description: Update an embedding model config by ID. operationId: update_embedding_model_config_api_v1_embedding_model_configs__embedding_model_config_id__put security: - HTTPBearer: [] parameters: - name: embedding_model_config_id in: path required: true schema: type: string format: uuid title: Embedding Model Config Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/EmbeddingModelConfigUpdate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/EmbeddingModelConfig' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Embedding Model Configs summary: Delete Embedding Model Config description: Delete an embedding model config by ID. operationId: delete_embedding_model_config_api_v1_embedding_model_configs__embedding_model_config_id__delete security: - HTTPBearer: [] parameters: - name: embedding_model_config_id in: path required: true schema: type: string format: uuid title: Embedding Model Config Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/organizations: get: tags: - Organizations summary: List Organizations description: List organizations for a user. operationId: list_organizations_api_v1_organizations_get security: - HTTPBearer: [] parameters: - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/Organization' title: Response List Organizations Api V1 Organizations Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/organizations/{organization_id}: get: tags: - Organizations summary: Get Organization description: Get an organization by ID. operationId: get_organization_api_v1_organizations__organization_id__get security: - HTTPBearer: [] parameters: - name: organization_id in: path required: true schema: type: string format: uuid title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/Organization' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/projects: get: tags: - Projects summary: List Projects description: List projects or get one by name operationId: list_projects_api_v1_projects_get security: - HTTPBearer: [] parameters: - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: project_name in: query required: false schema: anyOf: - type: string - type: 'null' title: Project Name - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/Project' title: Response List Projects Api V1 Projects Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/projects/{project_id}: get: tags: - Projects summary: Get Project description: Get a project by ID. operationId: get_project_api_v1_projects__project_id__get security: - HTTPBearer: [] parameters: - name: project_id in: path required: true schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/Project' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/job-data-points: get: tags: - Job Dashboard summary: Query project job data points description: Returns paginated job data points for the current project. operationId: list_project_job_data_points_api_v1_job_data_points_get security: - HTTPBearer: [] parameters: - name: job_type in: query required: true schema: enum: - extract - parse - classify type: string description: Job type to query. examples: - parse title: Job Type description: Job type to query. - name: hours in: query required: false schema: type: integer maximum: 168 minimum: 1 description: Hours of history to include. examples: - 24 default: 1 title: Hours description: Hours of history to include. - name: status in: query required: false schema: anyOf: - type: array items: type: string - type: 'null' description: Filter by status. examples: - - completed - failed title: Status description: Filter by status. - name: page_size in: query required: false schema: anyOf: - type: integer - type: 'null' description: Number of items per page. examples: - 100 title: Page Size description: Number of items per page. - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' description: Cursor token for the next page. title: Page Token description: Cursor token for the next page. - name: created_at_on_or_after in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' description: Include items created at or after this timestamp (inclusive) title: Created At On Or After description: Include items created at or after this timestamp (inclusive) - name: created_at_on_or_before in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' description: Include items created at or before this timestamp (inclusive) title: Created At On Or Before description: Include items created at or before this timestamp (inclusive) - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/JobDataPointResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/files/{id}/page_screenshots: get: tags: - Page Screenshots summary: List File Page Screenshots description: List metadata for all screenshots of pages from a file. operationId: list_file_page_screenshots_api_v1_files__id__page_screenshots_get security: - HTTPBearer: [] parameters: - name: id in: path required: true schema: type: string format: uuid title: Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/PageScreenshotMetadata' title: Response List File Page Screenshots Api V1 Files Id Page Screenshots Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/files/{id}/page_screenshots/{page_index}: get: tags: - Page Screenshots summary: Get File Page Screenshot description: Get screenshot of a page from a file. operationId: get_file_page_screenshot_api_v1_files__id__page_screenshots__page_index__get security: - HTTPBearer: [] parameters: - name: id in: path required: true schema: type: string format: uuid title: Id - name: page_index in: path required: true schema: type: integer title: Page Index - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: {} '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/files/{id}/page_screenshots/{page_index}/presigned_url: post: tags: - Page Screenshots summary: Generate File Page Screenshot Presigned Url description: Returns a short-lived presigned URL to read a page screenshot; treat the URL as sensitive while it is valid. operationId: generate_file_page_screenshot_presigned_url_api_v1_files__id__page_screenshots__page_index__presigned_url_post security: - HTTPBearer: [] parameters: - name: id in: path required: true schema: type: string format: uuid title: Id - name: page_index in: path required: true schema: type: integer title: Page Index - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PresignedUrl' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/files/{id}/page-figures: get: tags: - Page Figures summary: List File Pages Figures description: List metadata for all figures from all pages of a file. operationId: list_file_pages_figures_api_v1_files__id__page_figures_get security: - HTTPBearer: [] parameters: - name: id in: path required: true schema: type: string format: uuid title: Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/PageFigureMetadata' title: Response List File Pages Figures Api V1 Files Id Page Figures Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/files/{id}/page-figures/{page_index}: get: tags: - Page Figures summary: List File Page Figures description: List metadata for figures from a specific page of a file. operationId: list_file_page_figures_api_v1_files__id__page_figures__page_index__get security: - HTTPBearer: [] parameters: - name: id in: path required: true schema: type: string format: uuid title: Id - name: page_index in: path required: true schema: type: integer title: Page Index - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/PageFigureMetadata' title: Response List File Page Figures Api V1 Files Id Page Figures Page Index Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/files/{id}/page-figures/{page_index}/{figure_name}: get: tags: - Page Figures summary: Get File Page Figure description: Get a specific figure from a page of a file. operationId: get_file_page_figure_api_v1_files__id__page_figures__page_index___figure_name__get security: - HTTPBearer: [] parameters: - name: id in: path required: true schema: type: string format: uuid title: Id - name: page_index in: path required: true schema: type: integer title: Page Index - name: figure_name in: path required: true schema: type: string title: Figure Name - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: {} '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/files/{id}/page-figures/{page_index}/{figure_name}/presigned_url: post: tags: - Page Figures summary: Generate File Page Figure Presigned Url description: Returns a short-lived presigned URL to read a page figure; treat the URL as sensitive while it is valid. operationId: generate_file_page_figure_presigned_url_api_v1_files__id__page_figures__page_index___figure_name__presigned_url_post security: - HTTPBearer: [] parameters: - name: id in: path required: true schema: type: string format: uuid title: Id - name: page_index in: path required: true schema: type: integer title: Page Index - name: figure_name in: path required: true schema: type: string title: Figure Name - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PresignedUrl' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines: get: tags: - Pipelines summary: Search Pipelines description: Search for pipelines by name, type, or project. operationId: search_pipelines_api_v1_pipelines_get security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: project_name in: query required: false schema: anyOf: - type: string - type: 'null' title: Project Name - name: pipeline_name in: query required: false schema: anyOf: - type: string - type: 'null' title: Pipeline Name - name: pipeline_type in: query required: false schema: anyOf: - $ref: '#/components/schemas/PipelineType' - type: 'null' title: Pipeline Type - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/Pipeline' title: Response Search Pipelines Api V1 Pipelines Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' post: tags: - Pipelines summary: Create Pipeline description: 'Create a new managed ingestion pipeline. A pipeline connects data sources to a vector store for RAG. After creation, call `POST /pipelines/{id}/sync` to start ingesting documents.' operationId: create_pipeline_api_v1_pipelines_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/PipelineCreate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/Pipeline' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' put: tags: - Pipelines summary: Upsert Pipeline description: 'Upsert a pipeline. Updates the pipeline if one with the same name and project already exists, otherwise creates a new one.' operationId: upsert_pipeline_api_v1_pipelines_put security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/PipelineCreate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/Pipeline' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}: get: tags: - Pipelines summary: Get Pipeline description: Get a pipeline by ID. operationId: get_pipeline_api_v1_pipelines__pipeline_id__get security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/Pipeline' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' put: tags: - Pipelines summary: Update Existing Pipeline description: Update an existing pipeline's configuration. operationId: update_existing_pipeline_api_v1_pipelines__pipeline_id__put security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/PipelineUpdate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/Pipeline' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Pipelines summary: Delete Pipeline description: 'Delete a pipeline and all associated resources. Removes pipeline files, data sources, and vector store data. This operation is irreversible.' operationId: delete_pipeline_api_v1_pipelines__pipeline_id__delete security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/status: get: tags: - Pipelines summary: Get Pipeline Status description: 'Get the ingestion status of a managed pipeline. Returns document counts, sync progress, and the last effective timestamp. Only available for managed pipelines.' operationId: get_pipeline_status_api_v1_pipelines__pipeline_id__status_get security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: full_details in: query required: false schema: anyOf: - type: boolean - type: 'null' title: Full Details - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ManagedIngestionStatusResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/sync: post: tags: - Pipelines summary: Sync Pipeline description: 'Trigger an incremental sync for a managed pipeline. Processes new and updated documents from data sources and files, then updates the index for retrieval.' operationId: sync_pipeline_api_v1_pipelines__pipeline_id__sync_post security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/Pipeline' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/sync/cancel: post: tags: - Pipelines summary: Cancel Pipeline Sync description: Cancel all running sync jobs for a pipeline. operationId: cancel_pipeline_sync_api_v1_pipelines__pipeline_id__sync_cancel_post security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/Pipeline' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/force-delete: post: tags: - Pipelines summary: Force Delete Pipeline operationId: force_delete_pipeline_api_v1_pipelines__pipeline_id__force_delete_post security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/copy: post: tags: - Pipelines summary: Copy Pipeline description: 'Copy a pipeline including its files and documents. Creates a new pipeline with the same configuration and triggers a sync to populate the new vector store.' operationId: copy_pipeline_api_v1_pipelines__pipeline_id__copy_post security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/Pipeline' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/retrieve: post: tags: - Pipelines summary: Run Search description: 'Run a retrieval query against a managed pipeline. Searches the pipeline''s vector store using the provided query and retrieval parameters. Supports dense, sparse, and hybrid search modes with configurable top-k and reranking.' operationId: run_search_api_v1_pipelines__pipeline_id__retrieve_post security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/RetrievalParams' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/RetrieveResults' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/playground-session: get: tags: - Pipelines summary: Get Playground Session description: Get a playground session for a user and pipeline. operationId: get_playground_session_api_v1_pipelines__pipeline_id__playground_session_get security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PlaygroundSession' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/chat: post: tags: - Pipelines summary: Chat description: 'Chat with a managed pipeline using RAG. Combines retrieval from the pipeline''s vector store with an LLM chat completion. Returns a streaming response.' operationId: chat_api_v1_pipelines__pipeline_id__chat_post security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ChatInputParams' responses: '200': description: Successful Response content: application/json: schema: {} '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/data-sources: get: tags: - Pipeline Data Sources summary: List Pipeline Data Sources description: Get data sources for a pipeline. operationId: list_pipeline_data_sources_api_v1_pipelines__pipeline_id__data_sources_get security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/PipelineDataSource' title: Response List Pipeline Data Sources Api V1 Pipelines Pipeline Id Data Sources Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' put: tags: - Pipeline Data Sources summary: Add Data Sources To Pipeline description: Add data sources to a pipeline. operationId: add_data_sources_to_pipeline_api_v1_pipelines__pipeline_id__data_sources_put security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: type: array items: $ref: '#/components/schemas/PipelineDataSourceCreate' title: Pipeline Data Source Creates responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/PipelineDataSource' title: Response Add Data Sources To Pipeline Api V1 Pipelines Pipeline Id Data Sources Put '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}: put: tags: - Pipeline Data Sources summary: Update Pipeline Data Source description: Update the configuration of a data source in a pipeline. operationId: update_pipeline_data_source_api_v1_pipelines__pipeline_id__data_sources__data_source_id__put security: - HTTPBearer: [] parameters: - name: data_source_id in: path required: true schema: type: string format: uuid title: Data Source Id - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/PipelineDataSourceUpdate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PipelineDataSource' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}/sync: post: tags: - Pipeline Data Sources summary: Sync Pipeline Data Source description: 'Run incremental ingestion: pull upstream changes from the data source into the data sink.' operationId: sync_pipeline_data_source_api_v1_pipelines__pipeline_id__data_sources__data_source_id__sync_post security: - HTTPBearer: [] parameters: - name: data_source_id in: path required: true schema: type: string format: uuid title: Data Source Id - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: content: application/json: schema: anyOf: - $ref: '#/components/schemas/DataSourceSyncRequest' - type: 'null' title: Payload responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/Pipeline' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}/status: get: tags: - Pipeline Data Sources summary: Get Pipeline Data Source Status description: Get the status of a data source for a pipeline. operationId: get_pipeline_data_source_status_api_v1_pipelines__pipeline_id__data_sources__data_source_id__status_get security: - HTTPBearer: [] parameters: - name: data_source_id in: path required: true schema: type: string format: uuid title: Data Source Id - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ManagedIngestionStatusResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/files2: get: tags: - Pipeline Files summary: List Pipeline Files2 description: List files for a pipeline with optional filtering, sorting, and pagination. operationId: list_pipeline_files2_api_v1_pipelines__pipeline_id__files2_get deprecated: true security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: data_source_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Data Source Id - name: only_manually_uploaded in: query required: false schema: type: boolean default: false title: Only Manually Uploaded - name: file_name_contains in: query required: false schema: anyOf: - type: string - type: 'null' title: File Name Contains - name: statuses in: query required: false schema: anyOf: - type: array items: enum: - NOT_STARTED - IN_PROGRESS - SUCCESS - ERROR - CANCELLED type: string - type: 'null' description: Filter by file statuses title: Statuses description: Filter by file statuses - name: limit in: query required: false schema: anyOf: - type: integer - type: 'null' title: Limit - name: offset in: query required: false schema: anyOf: - type: integer - type: 'null' title: Offset - name: order_by in: query required: false schema: anyOf: - type: string - type: 'null' title: Order By - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PaginatedListPipelineFilesResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/files/status-counts: get: tags: - Pipeline Files summary: Get Pipeline File Status Counts description: Get files for a pipeline. operationId: get_pipeline_file_status_counts_api_v1_pipelines__pipeline_id__files_status_counts_get security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: data_source_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Data Source Id - name: only_manually_uploaded in: query required: false schema: type: boolean default: false title: Only Manually Uploaded - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/FileCountByStatusResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/files/{file_id}/status: get: tags: - Pipeline Files summary: Get Pipeline File Status description: Get status of a file for a pipeline. operationId: get_pipeline_file_status_api_v1_pipelines__pipeline_id__files__file_id__status_get security: - HTTPBearer: [] parameters: - name: file_id in: path required: true schema: type: string format: uuid title: File Id - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ManagedIngestionStatusResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/files: put: tags: - Pipeline Files summary: Add Files To Pipeline Api description: Add files to a pipeline. operationId: add_files_to_pipeline_api_api_v1_pipelines__pipeline_id__files_put security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: type: array items: $ref: '#/components/schemas/PipelineFileCreate' title: Pipeline File Creates responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/PipelineFile' title: Response Add Files To Pipeline Api Api V1 Pipelines Pipeline Id Files Put '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/files/{file_id}: put: tags: - Pipeline Files summary: Update Pipeline File description: Update a file for a pipeline. operationId: update_pipeline_file_api_v1_pipelines__pipeline_id__files__file_id__put security: - HTTPBearer: [] parameters: - name: file_id in: path required: true schema: type: string format: uuid title: File Id - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/PipelineFileUpdate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PipelineFile' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Pipeline Files summary: Delete Pipeline File description: Delete a file from a pipeline. operationId: delete_pipeline_file_api_v1_pipelines__pipeline_id__files__file_id__delete security: - HTTPBearer: [] parameters: - name: file_id in: path required: true schema: type: string format: uuid title: File Id - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/metadata: put: tags: - Pipeline Files summary: Import Pipeline Metadata description: Import metadata for a pipeline. operationId: import_pipeline_metadata_api_v1_pipelines__pipeline_id__metadata_put security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: multipart/form-data: schema: $ref: '#/components/schemas/Body_import_pipeline_metadata_api_v1_pipelines__pipeline_id__metadata_put' responses: '200': description: Successful Response content: application/json: schema: type: object additionalProperties: type: string title: Response Import Pipeline Metadata Api V1 Pipelines Pipeline Id Metadata Put '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Pipeline Files summary: Delete Pipeline Files Metadata description: Delete metadata for all files in a pipeline. operationId: delete_pipeline_files_metadata_api_v1_pipelines__pipeline_id__metadata_delete security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/documents: post: tags: - Pipelines summary: Create Batch Pipeline Documents description: Batch create documents for a pipeline. operationId: create_batch_pipeline_documents_api_v1_pipelines__pipeline_id__documents_post security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: type: array items: $ref: '#/components/schemas/CloudDocumentCreate' title: Documents responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/CloudDocument' title: Response Create Batch Pipeline Documents Api V1 Pipelines Pipeline Id Documents Post '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - Pipelines summary: List Pipeline Documents description: Return a list of documents for a pipeline. operationId: list_pipeline_documents_api_v1_pipelines__pipeline_id__documents_get security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: skip in: query required: false schema: type: integer minimum: 0 default: 0 title: Skip - name: limit in: query required: false schema: type: integer minimum: 0 default: 10 title: Limit - name: file_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: File Id - name: only_direct_upload in: query required: false schema: anyOf: - type: boolean - type: 'null' default: false title: Only Direct Upload - name: only_api_data_source_documents in: query required: false schema: anyOf: - type: boolean - type: 'null' default: false title: Only Api Data Source Documents - name: status_refresh_policy in: query required: false schema: enum: - cached - ttl type: string default: cached title: Status Refresh Policy - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/CloudDocument' title: Response List Pipeline Documents Api V1 Pipelines Pipeline Id Documents Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' put: tags: - Pipelines summary: Upsert Batch Pipeline Documents description: Batch create or update a document for a pipeline. operationId: upsert_batch_pipeline_documents_api_v1_pipelines__pipeline_id__documents_put security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: type: array items: $ref: '#/components/schemas/CloudDocumentCreate' title: Documents responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/CloudDocument' title: Response Upsert Batch Pipeline Documents Api V1 Pipelines Pipeline Id Documents Put '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/documents/paginated: get: tags: - Pipelines summary: Paginated List Pipeline Documents description: Return a list of documents for a pipeline. operationId: paginated_list_pipeline_documents_api_v1_pipelines__pipeline_id__documents_paginated_get security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: skip in: query required: false schema: type: integer minimum: 0 default: 0 title: Skip - name: limit in: query required: false schema: type: integer minimum: 0 default: 10 title: Limit - name: file_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: File Id - name: only_direct_upload in: query required: false schema: anyOf: - type: boolean - type: 'null' default: false title: Only Direct Upload - name: only_api_data_source_documents in: query required: false schema: anyOf: - type: boolean - type: 'null' default: false title: Only Api Data Source Documents - name: status_refresh_policy in: query required: false schema: enum: - cached - ttl type: string default: cached title: Status Refresh Policy - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PaginatedListCloudDocumentsResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/documents/{document_id}: get: tags: - Pipelines summary: Get Pipeline Document description: Return a single document for a pipeline. operationId: get_pipeline_document_api_v1_pipelines__pipeline_id__documents__document_id__get security: - HTTPBearer: [] parameters: - name: document_id in: path required: true schema: type: string title: Document Id - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/CloudDocument' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Pipelines summary: Delete Pipeline Document description: Delete a document from a pipeline; runs async (vectors first, then MongoDB record). operationId: delete_pipeline_document_api_v1_pipelines__pipeline_id__documents__document_id__delete security: - HTTPBearer: [] parameters: - name: document_id in: path required: true schema: type: string title: Document Id - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/documents/{document_id}/status: get: tags: - Pipelines summary: Get Pipeline Document Status description: Return a single document for a pipeline. operationId: get_pipeline_document_status_api_v1_pipelines__pipeline_id__documents__document_id__status_get security: - HTTPBearer: [] parameters: - name: document_id in: path required: true schema: type: string title: Document Id - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ManagedIngestionStatusResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/documents/{document_id}/sync: post: tags: - Pipelines summary: Sync Pipeline Document description: Sync a specific document for a pipeline. operationId: sync_pipeline_document_api_v1_pipelines__pipeline_id__documents__document_id__sync_post security: - HTTPBearer: [] parameters: - name: document_id in: path required: true schema: type: string title: Document Id - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: {} '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/pipelines/{pipeline_id}/documents/{document_id}/chunks: get: tags: - Pipelines summary: List Pipeline Document Chunks description: Return a list of chunks for a pipeline document. operationId: list_pipeline_document_chunks_api_v1_pipelines__pipeline_id__documents__document_id__chunks_get security: - HTTPBearer: [] parameters: - name: document_id in: path required: true schema: type: string title: Document Id - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/TextNode' title: Response List Pipeline Document Chunks Api V1 Pipelines Pipeline Id Documents Document Id Chunks Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/retrievers: post: tags: - Retrievers summary: Create Retriever description: Create a new Retriever. operationId: create_retriever_api_v1_retrievers_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/RetrieverCreate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/Retriever' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' put: tags: - Retrievers summary: Upsert Retriever description: Upsert a new Retriever. operationId: upsert_retriever_api_v1_retrievers_put security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/RetrieverCreate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/Retriever' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - Retrievers summary: List Retrievers description: List Retrievers for a project. operationId: list_retrievers_api_v1_retrievers_get security: - HTTPBearer: [] parameters: - name: name in: query required: false schema: anyOf: - type: string - type: 'null' title: Name - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/Retriever' title: Response List Retrievers Api V1 Retrievers Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/retrievers/{retriever_id}: get: tags: - Retrievers summary: Get Retriever description: Get a Retriever by ID. operationId: get_retriever_api_v1_retrievers__retriever_id__get security: - HTTPBearer: [] parameters: - name: retriever_id in: path required: true schema: type: string format: uuid title: Retriever Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/Retriever' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' put: tags: - Retrievers summary: Update Retriever description: Update an existing Retriever. operationId: update_retriever_api_v1_retrievers__retriever_id__put security: - HTTPBearer: [] parameters: - name: retriever_id in: path required: true schema: type: string format: uuid title: Retriever Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/RetrieverUpdate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/Retriever' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Retrievers summary: Delete Retriever description: Delete a Retriever by ID. operationId: delete_retriever_api_v1_retrievers__retriever_id__delete security: - HTTPBearer: [] parameters: - name: retriever_id in: path required: true schema: type: string format: uuid title: Retriever Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/retrievers/{retriever_id}/retrieve: post: tags: - Retrievers summary: Retrieve description: Retrieve data using a Retriever. operationId: retrieve_api_v1_retrievers__retriever_id__retrieve_post security: - HTTPBearer: [] parameters: - name: retriever_id in: path required: true schema: type: string format: uuid title: Retriever Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/CompositeRetrievalParams' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/CompositeRetrievalResult' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/retrievers/retrieve: post: tags: - Retrievers summary: Direct Retrieve description: Retrieve data using specified pipelines without creating a persistent retriever. operationId: direct_retrieve_api_v1_retrievers_retrieve_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/DirectRetrievalParams' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/CompositeRetrievalResult' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/parsing/job/{job_id}/result/image/{name}: get: tags: - Parsing summary: Get Job Image Result description: Get a job by id operationId: get_job_image_result_api_v1_parsing_job__job_id__result_image__name__get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: name in: path required: true schema: type: string title: Name - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: image/jpeg: {} '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/parsing/supported_file_extensions: get: tags: - Parsing summary: Get Supported File Extensions description: Get a list of supported file extensions operationId: get_supported_file_extensions_api_v1_parsing_supported_file_extensions_get responses: '200': description: Successful Response content: application/json: schema: items: $ref: '#/components/schemas/LlamaParseSupportedFileExtensions' type: array title: Response Get Supported File Extensions Api V1 Parsing Supported File Extensions Get /api/v1/parsing/screenshot: post: tags: - Parsing summary: Screenshot operationId: screenshot_api_v1_parsing_screenshot_post security: - HTTPBearer: [] parameters: - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: content: multipart/form-data: schema: $ref: '#/components/schemas/Body_screenshot_api_v1_parsing_screenshot_post' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ParsingJob' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/parsing/upload: post: tags: - Parsing summary: Upload File operationId: upload_file_api_v1_parsing_upload_post security: - HTTPBearer: [] parameters: - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: content: multipart/form-data: schema: $ref: '#/components/schemas/Body_upload_file_api_v1_parsing_upload_post' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ParsingJob' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/parsing/job/{job_id}: get: tags: - Parsing summary: Get Job description: Get a job by id operationId: get_job_api_v1_parsing_job__job_id__get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ParsingJob' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/parsing/job/{job_id}/parameters: get: tags: - Parsing summary: Get Job Parameters description: Get a job by id operationId: get_job_parameters_api_v1_parsing_job__job_id__parameters_get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: title: Response Get Job Parameters Api V1 Parsing Job Job Id Parameters Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/parsing/job/{job_id}/details: get: tags: - Parsing summary: Get Parsing Job Details description: Get a parsing job's status, parameters (API keys redacted), telemetry, and session logs. operationId: get_parsing_job_details_api_v1_parsing_job__job_id__details_get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: title: Response Get Parsing Job Details Api V1 Parsing Job Job Id Details Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/parsing/job/{job_id}/result/text: get: tags: - Parsing summary: Get Job Text Result description: 'Get a job by id. Deprecated: `credits_used` and `job_credits_usage` will be removed in a future release.' operationId: get_job_text_result_api_v1_parsing_job__job_id__result_text_get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ParsingJobTextResult' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/parsing/job/{job_id}/result/pdf: get: tags: - Parsing summary: Get Job Raw Text Result description: Get a job by id operationId: get_job_raw_text_result_api_v1_parsing_job__job_id__result_pdf_get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: {} '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/parsing/job/{job_id}/result/structured: get: tags: - Parsing summary: Get Job Structured Result description: 'Get a job by id. Deprecated: `credits_used` and `job_credits_usage` will be removed in a future release.' operationId: get_job_structured_result_api_v1_parsing_job__job_id__result_structured_get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ParsingJobStructuredResult' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/parsing/job/{job_id}/result/xlsx: get: tags: - Parsing summary: Get Job Raw Xlsx Result description: Get a job by id operationId: get_job_raw_xlsx_result_api_v1_parsing_job__job_id__result_xlsx_get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: {} '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/parsing/job/{job_id}/result/markdown: get: tags: - Parsing summary: Get Job Result description: 'Get a job by id. Deprecated: `credits_used` and `job_credits_usage` will be removed in a future release.' operationId: get_job_result_api_v1_parsing_job__job_id__result_markdown_get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ParsingJobMarkdownResult' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/parsing/job/{job_id}/result/json: get: tags: - Parsing summary: Get Job Json Result description: 'Get a job by id. Deprecated: `credits_used` and `job_credits_usage` will be removed in a future release.' operationId: get_job_json_result_api_v1_parsing_job__job_id__result_json_get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ParsingJobJsonResult' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/parsing/job/{job_id}/read/{filename}: get: tags: - Parsing summary: Generate Presigned Url description: Generate a presigned URL for a job operationId: generate_presigned_url_api_v1_parsing_job__job_id__read__filename__get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: filename in: path required: true schema: type: string title: Filename - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PresignedUrl' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/classifier/jobs: post: tags: - Classifier - Beta - Classifier summary: Create Classify Job description: 'Create a classify job. Experimental: not production-ready and subject to change.' operationId: create_classify_job_api_v1_classifier_jobs_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ClassifyJobCreate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ClassifyJob' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - Classifier - Beta - Classifier summary: List Classify Jobs description: 'List classify jobs. Experimental: not production-ready and subject to change.' operationId: list_classify_jobs_api_v1_classifier_jobs_get security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: page_size in: query required: false schema: anyOf: - type: integer - type: 'null' title: Page Size - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' title: Page Token - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PaginatedResponse_ClassifyJob_' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/classifier/jobs/{classify_job_id}: get: tags: - Classifier - Beta - Classifier summary: Get Classify Job description: 'Get a classify job. Experimental: not production-ready and subject to change.' operationId: get_classify_job_api_v1_classifier_jobs__classify_job_id__get security: - HTTPBearer: [] parameters: - name: classify_job_id in: path required: true schema: type: string format: uuid title: Classify Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ClassifyJob' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/classifier/jobs/{classify_job_id}/results: get: tags: - Classifier - Beta - Classifier summary: Get Classification Job Results description: 'Get the results of a classify job. Experimental: not production-ready and subject to change.' operationId: get_classification_job_results_api_v1_classifier_jobs__classify_job_id__results_get security: - HTTPBearer: [] parameters: - name: classify_job_id in: path required: true schema: type: string format: uuid title: Classify Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ClassifyJobResults' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/jobs: get: tags: - LlamaExtract summary: List Jobs operationId: list_jobs_api_v1_extraction_jobs_get security: - HTTPBearer: [] parameters: - name: extraction_agent_id in: query required: true schema: type: string format: uuid title: Extraction Agent Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/ExtractJob' title: Response List Jobs Api V1 Extraction Jobs Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' post: tags: - LlamaExtract summary: Run Job operationId: run_job_api_v1_extraction_jobs_post security: - HTTPBearer: [] parameters: - name: from_ui in: query required: false schema: type: boolean default: false title: From Ui - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ExtractJobCreate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractJob' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/jobs/{job_id}: get: tags: - LlamaExtract summary: Get Job operationId: get_job_api_v1_extraction_jobs__job_id__get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string format: uuid title: Job Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractJob' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/jobs/file: post: tags: - LlamaExtract summary: Run Job On File operationId: run_job_on_file_api_v1_extraction_jobs_file_post security: - HTTPBearer: [] parameters: - name: from_ui in: query required: false schema: type: boolean default: false title: From Ui - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: multipart/form-data: schema: $ref: '#/components/schemas/Body_run_job_on_file_api_v1_extraction_jobs_file_post' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractJob' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/jobs/batch: post: tags: - LlamaExtract summary: Run Batch Jobs operationId: run_batch_jobs_api_v1_extraction_jobs_batch_post security: - HTTPBearer: [] parameters: - name: from_ui in: query required: false schema: type: boolean default: false title: From Ui - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ExtractJobCreateBatch' responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/ExtractJob' title: Response Run Batch Jobs Api V1 Extraction Jobs Batch Post '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/jobs/{job_id}/result: get: tags: - LlamaExtract summary: Get Job Result operationId: get_job_result_api_v1_extraction_jobs__job_id__result_get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string format: uuid title: Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractResultset' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/runs: get: tags: - LlamaExtract summary: List Extract Runs operationId: list_extract_runs_api_v1_extraction_runs_get security: - HTTPBearer: [] parameters: - name: extraction_agent_id in: query required: true schema: type: string format: uuid title: Extraction Agent Id - name: skip in: query required: false schema: type: integer default: 0 title: Skip - name: limit in: query required: false schema: type: integer default: 25 title: Limit - name: status in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by status title: Status description: Filter by status - name: run_id in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by run ID title: Run Id description: Filter by run ID - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PaginatedExtractRunsResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/runs/latest-from-ui: get: tags: - LlamaExtract summary: Get Latest Run From Ui operationId: get_latest_run_from_ui_api_v1_extraction_runs_latest_from_ui_get security: - HTTPBearer: [] parameters: - name: extraction_agent_id in: query required: true schema: type: string format: uuid title: Extraction Agent Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: anyOf: - $ref: '#/components/schemas/ExtractRun' - type: 'null' title: Response Get Latest Run From Ui Api V1 Extraction Runs Latest From Ui Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/runs/by-job/{job_id}: get: tags: - LlamaExtract summary: Get Run By Job Id operationId: get_run_by_job_id_api_v1_extraction_runs_by_job__job_id__get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string format: uuid title: Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractRun' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/runs/{run_id}: get: tags: - LlamaExtract summary: Get Run operationId: get_run_api_v1_extraction_runs__run_id__get security: - HTTPBearer: [] parameters: - name: run_id in: path required: true schema: type: string format: uuid title: Run Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractRun' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - LlamaExtract summary: Delete Extraction Run operationId: delete_extraction_run_api_v1_extraction_runs__run_id__delete security: - HTTPBearer: [] parameters: - name: run_id in: path required: true schema: type: string format: uuid title: Run Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: {} '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/run: post: tags: - LlamaExtract summary: Extract Stateless description: Stateless extraction using a default agent in the user's default project; requires `data_schema`, `config`, and one of `file_id` / `text` / base64 file data. operationId: extract_stateless_api_v1_extraction_run_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ExtractStatelessRequest' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractJob' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/extraction-agents: post: tags: - LlamaExtract summary: Create Extraction Agent operationId: create_extraction_agent_api_v1_extraction_extraction_agents_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ExtractAgentCreate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractAgent' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - LlamaExtract summary: List Extraction Agents operationId: list_extraction_agents_api_v1_extraction_extraction_agents_get security: - HTTPBearer: [] parameters: - name: include_default in: query required: false schema: type: boolean description: Whether to include default agents in the results default: false title: Include Default description: Whether to include default agents in the results - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: array items: $ref: '#/components/schemas/ExtractAgent' title: Response List Extraction Agents Api V1 Extraction Extraction Agents Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/extraction-agents/schema/validation: post: tags: - LlamaExtract summary: Validate Extraction Schema description: Validate an extraction agent schema; returns the normalized schema or raises 400. operationId: validate_extraction_schema_api_v1_extraction_extraction_agents_schema_validation_post security: - HTTPBearer: [] parameters: - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ExtractSchemaValidateRequest' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractSchemaValidateResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/extraction-agents/schema/generate: post: tags: - LlamaExtract summary: Generate Extraction Schema description: Generate or refine an extraction agent schema from a file, natural-language prompt, or existing schema. operationId: generate_extraction_schema_api_v1_extraction_extraction_agents_schema_generate_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ExtractSchemaGenerateRequest' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractSchemaGenerateResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/extraction-agents/by-name/{name}: get: tags: - LlamaExtract summary: Get Extraction Agent By Name operationId: get_extraction_agent_by_name_api_v1_extraction_extraction_agents_by_name__name__get deprecated: true security: - HTTPBearer: [] parameters: - name: name in: path required: true schema: type: string title: Name - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractAgent' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/extraction-agents/default: get: tags: - LlamaExtract summary: Get Or Create Default Extraction Agent description: Get or create the default extraction agent for the current project (empty schema, default config). operationId: get_or_create_default_extraction_agent_api_v1_extraction_extraction_agents_default_get security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractAgent' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/extraction/extraction-agents/{extraction_agent_id}: get: tags: - LlamaExtract summary: Get Extraction Agent operationId: get_extraction_agent_api_v1_extraction_extraction_agents__extraction_agent_id__get security: - HTTPBearer: [] parameters: - name: extraction_agent_id in: path required: true schema: type: string format: uuid title: Extraction Agent Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractAgent' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - LlamaExtract summary: Delete Extraction Agent operationId: delete_extraction_agent_api_v1_extraction_extraction_agents__extraction_agent_id__delete security: - HTTPBearer: [] parameters: - name: extraction_agent_id in: path required: true schema: type: string format: uuid title: Extraction Agent Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: {} '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' put: tags: - LlamaExtract summary: Update Extraction Agent operationId: update_extraction_agent_api_v1_extraction_extraction_agents__extraction_agent_id__put security: - HTTPBearer: [] parameters: - name: extraction_agent_id in: path required: true schema: type: string format: uuid title: Extraction Agent Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ExtractAgentUpdate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractAgent' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/retrieval/retrieve: post: tags: - Retrieval summary: Retrieve description: Retrieve relevant chunks via hybrid search (vector + full-text), with filtering on built-in or user-defined metadata. operationId: retrieve_api_v1_retrieval_retrieve_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/RetrieveParams' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/RetrieveResult' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/retrieval/files/find: post: tags: - Retrieval summary: Find Files description: Search for files by name. operationId: find_files_api_v1_retrieval_files_find_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/FileFindParams' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/FileFindResult' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/retrieval/files/grep: post: tags: - Retrieval summary: Grep File description: Grep within a file's parsed content using a regex pattern. operationId: grep_file_api_v1_retrieval_files_grep_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/FileGrepParams' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/FileGrepResult' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/retrieval/files/read: post: tags: - Retrieval summary: Read File description: Read the parsed text content of a specific file. operationId: read_file_api_v1_retrieval_files_read_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/FileReadParams' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/FileReadResult' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/chat: post: tags: - Chat summary: Create Session description: Create a chat session, optionally bound to indexes (locked after the first message). operationId: create_session_api_v1_chat_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: content: application/json: schema: $ref: '#/components/schemas/SessionCreate' responses: '201': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ChatSessionSummary' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - Chat summary: List Sessions description: List all chat sessions for the current project. operationId: list_sessions_api_v1_chat_get security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: page_size in: query required: false schema: anyOf: - type: integer - type: 'null' title: Page Size - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' title: Page Token - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/SessionList' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/chat/{session_id}: get: tags: - Chat summary: Get Full Session description: Retrieve a full session by ID, including its event history. operationId: get_full_session_api_v1_chat__session_id__get security: - HTTPBearer: [] parameters: - name: session_id in: path required: true schema: type: string title: Session Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/SessionDetail' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Chat summary: Delete Session description: Delete a session. operationId: delete_session_api_v1_chat__session_id__delete security: - HTTPBearer: [] parameters: - name: session_id in: path required: true schema: type: string title: Session Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/chat/{session_id}/summary: get: tags: - Chat summary: Get Session Summary description: Retrieve a session summary by ID. operationId: get_session_summary_api_v1_chat__session_id__summary_get security: - HTTPBearer: [] parameters: - name: session_id in: path required: true schema: type: string title: Session Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ChatSessionSummary' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/chat/{session_id}/messages/stream: post: tags: - Chat summary: Stream Messages description: Stream agent events for a chat turn as Server-Sent Events. operationId: stream_messages_api_v1_chat__session_id__messages_stream_post security: - HTTPBearer: [] parameters: - name: session_id in: path required: true schema: type: string title: Session Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ChatParams' responses: '200': description: Successful Response content: application/json: schema: {} '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/indexes: post: tags: - Indexes summary: Create Index description: Create a searchable index over a source directory. operationId: create_index_api_v1_indexes_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/IndexCreateRequest' responses: '201': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/IndexResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - Indexes summary: List Indexes description: List indexes for the current project. operationId: list_indexes_api_v1_indexes_get security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: source_directory_id in: query required: false schema: anyOf: - type: string - type: 'null' title: Source Directory Id - name: page_size in: query required: false schema: anyOf: - type: integer - type: 'null' title: Page Size - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' title: Page Token - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/IndexQueryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/indexes/{index_id}: get: tags: - Indexes summary: Get Index description: Get an index by ID. operationId: get_index_api_v1_indexes__index_id__get security: - HTTPBearer: [] parameters: - name: index_id in: path required: true schema: type: string title: Index Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/IndexResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Indexes summary: Delete Index description: Delete an index. operationId: delete_index_api_v1_indexes__index_id__delete security: - HTTPBearer: [] parameters: - name: index_id in: path required: true schema: type: string title: Index Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/indexes/{index_id}/sync: post: tags: - Indexes summary: Sync Index description: Trigger a sync and export for an existing index, re-parsing changed files and exporting updated chunks. operationId: sync_index_api_v1_indexes__index_id__sync_post security: - HTTPBearer: [] parameters: - name: index_id in: path required: true schema: type: string title: Index Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '202': description: Successful Response content: application/json: schema: {} '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/usage-metrics: get: tags: - Beta - Usage Metrics summary: List Usage Metrics description: List usage metrics with filtering and pagination. operationId: list_usage_metrics_api_v1_beta_usage_metrics_get security: - HTTPBearer: [] parameters: - name: page_size in: query required: false schema: anyOf: - type: integer - type: 'null' description: Number of items per page title: Page Size description: Number of items per page - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' description: Token for pagination title: Page Token description: Token for pagination - name: include_total in: query required: false schema: type: boolean description: Include total count in response default: false title: Include Total description: Include total count in response - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' description: Filter by project ID title: Project Id description: Filter by project ID - name: user_id in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by user ID title: User Id description: Filter by user ID - name: event_types in: query required: false schema: anyOf: - type: array items: type: string - type: 'null' description: Filter by event types title: Event Types description: Filter by event types - name: days in: query required: false schema: anyOf: - type: array items: type: string - type: 'null' description: Filter by specific days (YYYY-MM-DD) title: Days description: Filter by specific days (YYYY-MM-DD) - name: day_on_or_before in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by days on or before this date (YYYY-MM-DD) title: Day On Or Before description: Filter by days on or before this date (YYYY-MM-DD) - name: day_on_or_after in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by days on or after this date (YYYY-MM-DD) title: Day On Or After description: Filter by days on or after this date (YYYY-MM-DD) - name: event_aggregation_type in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by event aggregation type title: Event Aggregation Type description: Filter by event aggregation type - name: event_aggregation_key in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by event aggregation key title: Event Aggregation Key description: Filter by event aggregation key - name: organization_id in: query required: true schema: type: string format: uuid title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/UsageMetricQueryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/agent-data/{item_id}: get: tags: - Beta - Agent Data summary: Get Agent Data description: Get agent data by ID. operationId: get_agent_data_api_v1_beta_agent_data__item_id__get security: - HTTPBearer: [] parameters: - name: item_id in: path required: true schema: type: string title: Item Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/AgentData' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' put: tags: - Beta - Agent Data summary: Update Agent Data description: Update agent data by ID (overwrites). operationId: update_agent_data_api_v1_beta_agent_data__item_id__put security: - HTTPBearer: [] parameters: - name: item_id in: path required: true schema: type: string title: Item Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/AgentDataUpdate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/AgentData' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Beta - Agent Data summary: Delete Agent Data description: Delete agent data by ID. operationId: delete_agent_data_api_v1_beta_agent_data__item_id__delete security: - HTTPBearer: [] parameters: - name: item_id in: path required: true schema: type: string title: Item Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: type: object additionalProperties: type: string title: Response Delete Agent Data Api V1 Beta Agent Data Item Id Delete '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/agent-data: post: tags: - Beta - Agent Data summary: Create Agent Data description: Create new agent data. operationId: create_agent_data_api_v1_beta_agent_data_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/AgentDataCreate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/AgentData' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/agent-data/:search: post: tags: - Beta - Agent Data summary: Search Agent Data description: Search agent data with filtering, sorting, and pagination. operationId: search_agent_data_api_v1_beta_agent_data__search_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/SearchRequest' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PaginatedResponse_AgentData_' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/agent-data/:aggregate: post: tags: - Beta - Agent Data summary: Aggregate Agent Data description: Aggregate agent data with grouping and optional counting/first item retrieval. operationId: aggregate_agent_data_api_v1_beta_agent_data__aggregate_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/AggregateRequest' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PaginatedResponse_AggregateGroup_' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/agent-data/:delete: post: tags: - Beta - Agent Data summary: Delete Agent Data By Query description: Bulk delete agent data by query (deployment_name, collection, optional filters). operationId: delete_agent_data_by_query_api_v1_beta_agent_data__delete_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/DeleteRequest' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DeleteResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/files: get: tags: - Beta - Files summary: List Files description: 'List files with optional filtering and pagination. Filter by `file_name`, `file_ids`, or `external_file_id`. Supports cursor-based pagination and custom ordering.' operationId: list_files_api_v1_beta_files_get security: - HTTPBearer: [] parameters: - name: page_size in: query required: false schema: anyOf: - type: integer maximum: 1000 minimum: 1 - type: 'null' description: The maximum number of items to return. Defaults to 50, maximum is 1000. title: Page Size description: The maximum number of items to return. Defaults to 50, maximum is 1000. - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' description: A page token received from a previous list call. Provide this to retrieve the subsequent page. title: Page Token description: A page token received from a previous list call. Provide this to retrieve the subsequent page. - name: file_ids in: query required: false schema: anyOf: - type: array items: type: string format: uuid - type: 'null' description: Filter by specific file IDs. title: File Ids description: Filter by specific file IDs. - name: file_name in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by file name (exact match). title: File Name description: Filter by file name (exact match). - name: external_file_id in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by external file ID. title: External File Id description: Filter by external file ID. - name: order_by in: query required: false schema: anyOf: - type: string - type: 'null' description: A comma-separated list of fields to order by, sorted in ascending order. Use 'field_name desc' to specify descending order. title: Order By description: A comma-separated list of fields to order by, sorted in ascending order. Use 'field_name desc' to specify descending order. - name: expand in: query required: false schema: anyOf: - type: array items: type: string - type: 'null' description: Fields to expand on each file. examples: - download_url title: Expand description: Fields to expand on each file. - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/FileQueryResponseV2' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' post: tags: - Beta - Files summary: Upload File description: 'Upload a file using multipart/form-data. Set `purpose` to indicate how the file will be used: `user_data`, `parse`, `extract`, `classify`, `split`, `sheet`, or `agent_app`. Returns the created file metadata including its ID for use in subsequent parse, extract, or classify operations.' operationId: upload_file_api_v1_beta_files_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: multipart/form-data: schema: $ref: '#/components/schemas/Body_upload_file_api_v1_beta_files_post' responses: '201': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/FileV2' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/files/query: post: tags: - Beta - Files summary: Query Files description: 'Query files with filtering and pagination. Deprecated: use `GET /files`.' operationId: query_files_api_v1_beta_files_query_post deprecated: true security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/FileQueryRequest' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/FileQueryResponseV2' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/files/{file_id}: get: tags: - Beta - Files summary: Get File description: Get file metadata by ID. operationId: get_file_api_v1_beta_files__file_id__get security: - HTTPBearer: [] parameters: - name: file_id in: path required: true schema: type: string format: uuid title: File Id - name: expand in: query required: false schema: anyOf: - type: array items: type: string - type: 'null' description: Fields to expand. examples: - download_url title: Expand description: Fields to expand. - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/FileV2' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Beta - Files summary: Delete File description: Delete a file from the project. operationId: delete_file_api_v1_beta_files__file_id__delete security: - HTTPBearer: [] parameters: - name: file_id in: path required: true schema: type: string format: uuid title: File Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/files/{file_id}/content: get: tags: - Beta - Files summary: Read File Content description: Get a presigned URL to download the file content. operationId: read_file_content_api_v1_beta_files__file_id__content_get security: - HTTPBearer: [] parameters: - name: file_id in: path required: true schema: type: string format: uuid title: File Id - name: expires_at_seconds in: query required: false schema: anyOf: - type: integer - type: 'null' title: Expires At Seconds - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PresignedUrl' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/sheets/jobs: post: tags: - Beta - Sheets - Beta - Sheets summary: Create Spreadsheet Job description: 'Create a spreadsheet parsing job. Provide at most one of `configuration` (an inline parsing configuration) or `configuration_id` (a saved configuration preset). If neither is provided, a default configuration is used. Optionally include `webhook_configurations` to receive `sheets.*` status notifications. Experimental: not production-ready and subject to change.' operationId: create_spreadsheet_job_api_v1_beta_sheets_jobs_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/SpreadsheetJobCreate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/SpreadsheetJob' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - Beta - Sheets - Beta - Sheets summary: List Spreadsheet Jobs description: 'List spreadsheet parsing jobs. Experimental: not production-ready and subject to change.' operationId: list_spreadsheet_jobs_api_v1_beta_sheets_jobs_get security: - HTTPBearer: [] parameters: - name: include_results in: query required: false schema: type: boolean default: false title: Include Results - name: status in: query required: false schema: anyOf: - enum: - PENDING - SUCCESS - ERROR - PARTIAL_SUCCESS - CANCELLED type: string - type: 'null' description: Filter by job status title: Status description: Filter by job status - name: job_ids in: query required: false schema: anyOf: - type: array items: type: string - type: 'null' description: Filter by specific job IDs title: Job Ids description: Filter by specific job IDs - name: configuration_id in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by saved configuration ID title: Configuration Id description: Filter by saved configuration ID - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: page_size in: query required: false schema: anyOf: - type: integer - type: 'null' title: Page Size - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' title: Page Token - name: created_at_on_or_after in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' description: Include items created at or after this timestamp (inclusive) title: Created At On Or After description: Include items created at or after this timestamp (inclusive) - name: created_at_on_or_before in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' description: Include items created at or before this timestamp (inclusive) title: Created At On Or Before description: Include items created at or before this timestamp (inclusive) - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PaginatedResponse_SpreadsheetJob_' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/sheets/jobs/{spreadsheet_job_id}: get: tags: - Beta - Sheets - Beta - Sheets summary: Get Spreadsheet Job description: 'Get a spreadsheet parsing job. When `include_results=True` (default), embeds extracted regions and results if complete, skipping the separate `/results` call. Experimental: not production-ready and subject to change.' operationId: get_spreadsheet_job_api_v1_beta_sheets_jobs__spreadsheet_job_id__get security: - HTTPBearer: [] parameters: - name: spreadsheet_job_id in: path required: true schema: type: string title: Spreadsheet Job Id - name: include_results in: query required: false schema: type: boolean default: true title: Include Results - name: expand in: query required: false schema: type: array items: type: string description: 'Optional fields to populate on the response. Valid values: metadata_state_transitions.' title: Expand description: 'Optional fields to populate on the response. Valid values: metadata_state_transitions.' - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/SpreadsheetJob' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Beta - Sheets - Beta - Sheets summary: Delete Spreadsheet Job description: 'Delete a spreadsheet parsing job and its associated data. Experimental: not production-ready and subject to change.' operationId: delete_spreadsheet_job_api_v1_beta_sheets_jobs__spreadsheet_job_id__delete security: - HTTPBearer: [] parameters: - name: spreadsheet_job_id in: path required: true schema: type: string title: Spreadsheet Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: {} '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/sheets/jobs/{spreadsheet_job_id}/regions/{region_id}/result/{region_type}: get: tags: - Beta - Sheets - Beta - Sheets summary: Get Result Region description: 'Generate a presigned URL to download a specific extracted region. Experimental: not production-ready and subject to change.' operationId: get_result_region_api_v1_beta_sheets_jobs__spreadsheet_job_id__regions__region_id__result__region_type__get security: - HTTPBearer: [] parameters: - name: spreadsheet_job_id in: path required: true schema: type: string title: Spreadsheet Job Id - name: region_id in: path required: true schema: type: string title: Region Id - name: region_type in: path required: true schema: $ref: '#/components/schemas/SpreadsheetResultType' - name: expires_at_seconds in: query required: false schema: anyOf: - type: integer - type: 'null' title: Expires At Seconds - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PresignedUrl' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/directories: post: tags: - Beta - Directories summary: Create Directory description: Create a new directory within the specified project. operationId: create_directory_api_v1_beta_directories_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/DirectoryCreateRequest' responses: '201': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DirectoryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - Beta - Directories summary: List Directories operationId: list_directories_api_v1_beta_directories_get security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: name in: query required: false schema: anyOf: - type: string - type: 'null' title: Name - name: type in: query required: false schema: anyOf: - enum: - user - index - ephemeral type: string - type: 'null' title: Type - name: include_deleted in: query required: false schema: type: boolean default: false title: Include Deleted - name: page_size in: query required: false schema: anyOf: - type: integer - type: 'null' title: Page Size - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' title: Page Token - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DirectoryQueryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/directories/{directory_id}: get: tags: - Beta - Directories summary: Get Directory description: Retrieve a directory by its identifier. operationId: get_directory_api_v1_beta_directories__directory_id__get security: - HTTPBearer: [] parameters: - name: directory_id in: path required: true schema: type: string title: Directory Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DirectoryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' patch: tags: - Beta - Directories summary: Update Directory description: Update directory metadata. operationId: update_directory_api_v1_beta_directories__directory_id__patch security: - HTTPBearer: [] parameters: - name: directory_id in: path required: true schema: type: string title: Directory Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/DirectoryUpdateRequest' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DirectoryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Beta - Directories summary: Delete Directory description: Permanently delete a directory. operationId: delete_directory_api_v1_beta_directories__directory_id__delete security: - HTTPBearer: [] parameters: - name: directory_id in: path required: true schema: type: string title: Directory Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/directories/{directory_id}/files: post: tags: - Beta - Directory Files summary: Add Directory File description: Create a new file within the specified directory; the directory must exist in the project and `file_id` must reference an existing file. operationId: add_directory_file_api_v1_beta_directories__directory_id__files_post security: - HTTPBearer: [] parameters: - name: directory_id in: path required: true schema: type: string title: Directory Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/DirectoryFileCreateRequest' responses: '201': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DirectoryFileResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - Beta - Directory Files summary: List Directory Files description: List all files within the specified directory with optional filtering and pagination. operationId: list_directory_files_api_v1_beta_directories__directory_id__files_get security: - HTTPBearer: [] parameters: - name: directory_id in: path required: true schema: type: string title: Directory Id - name: expand in: query required: false schema: anyOf: - type: array items: type: string - type: 'null' description: Fields to expand on each directory file. examples: - download_url title: Expand description: Fields to expand on each directory file. - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: display_name in: query required: false schema: anyOf: - type: string - type: 'null' title: Display Name - name: display_name_contains in: query required: false schema: anyOf: - type: string - type: 'null' title: Display Name Contains - name: unique_id in: query required: false schema: anyOf: - type: string - type: 'null' title: Unique Id - name: file_id in: query required: false schema: anyOf: - type: string - type: 'null' title: File Id - name: include_deleted in: query required: false schema: type: boolean default: false title: Include Deleted - name: page_size in: query required: false schema: anyOf: - type: integer - type: 'null' title: Page Size - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' title: Page Token - name: updated_at_on_or_after in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' description: Include items updated at or after this timestamp (inclusive) title: Updated At On Or After description: Include items updated at or after this timestamp (inclusive) - name: updated_at_on_or_before in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' description: Include items updated at or before this timestamp (inclusive) title: Updated At On Or Before description: Include items updated at or before this timestamp (inclusive) - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: content: application/json: schema: anyOf: - type: array items: type: string maxItems: 200 - type: 'null' title: Directory File Ids responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DirectoryFileQueryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/directories/{directory_id}/files/upload: post: tags: - Beta - Directory Files summary: Upload File To Directory description: Upload a file and create its directory entry in one call; `unique_id` / `display_name` default to values derived from file metadata. operationId: upload_file_to_directory_api_v1_beta_directories__directory_id__files_upload_post security: - HTTPBearer: [] parameters: - name: directory_id in: path required: true schema: type: string title: Directory Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: multipart/form-data: schema: $ref: '#/components/schemas/Body_upload_file_to_directory_api_v1_beta_directories__directory_id__files_upload_post' responses: '201': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DirectoryFileResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/directories/{directory_id}/files/{directory_file_id}: get: tags: - Beta - Directory Files summary: Get Directory File description: Get a directory file by `directory_file_id`; to look up by `unique_id`, use the list endpoint with a filter. operationId: get_directory_file_api_v1_beta_directories__directory_id__files__directory_file_id__get security: - HTTPBearer: [] parameters: - name: directory_id in: path required: true schema: type: string title: Directory Id - name: directory_file_id in: path required: true schema: type: string title: Directory File Id - name: expand in: query required: false schema: anyOf: - type: array items: type: string - type: 'null' description: Fields to expand. examples: - download_url title: Expand description: Fields to expand. - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DirectoryFileResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' patch: tags: - Beta - Directory Files summary: Update Directory File description: Update directory-file metadata by `directory_file_id`; set `directory_id` to move the file to a different directory. To resolve from `unique_id`, list with a filter first. operationId: update_directory_file_api_v1_beta_directories__directory_id__files__directory_file_id__patch security: - HTTPBearer: [] parameters: - name: directory_id in: path required: true schema: type: string title: Directory Id - name: directory_file_id in: path required: true schema: type: string title: Directory File Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/DirectoryFileUpdateRequest' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/DirectoryFileResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Beta - Directory Files summary: Delete Directory File description: Delete a directory file by `directory_file_id`; to resolve from `unique_id`, list with a filter first. operationId: delete_directory_file_api_v1_beta_directories__directory_id__files__directory_file_id__delete security: - HTTPBearer: [] parameters: - name: directory_id in: path required: true schema: type: string title: Directory Id - name: directory_file_id in: path required: true schema: type: string title: Directory File Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/directories/{directory_id}/files/bulk-delete: post: tags: - Beta - Directory Files summary: Bulk Delete Directory Files description: Delete up to 100 files from the specified directory; all must belong to it. operationId: bulk_delete_directory_files_api_v1_beta_directories__directory_id__files_bulk_delete_post security: - HTTPBearer: [] parameters: - name: directory_id in: path required: true schema: type: string title: Directory Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/DirectoryFileBulkDeleteRequest' responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/configurations: post: tags: - Beta - Configurations - Configurations summary: Create Configuration description: Upsert a product configuration; updates if one with the same name + product type + project exists, otherwise creates. operationId: create_configuration_api_v1_beta_configurations_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ConfigurationCreateRequest' responses: '201': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ConfigurationResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - Beta - Configurations - Configurations summary: List Configurations description: List product configurations for the current project. operationId: list_configurations_api_v1_beta_configurations_get security: - HTTPBearer: [] parameters: - name: product_type in: query required: false schema: anyOf: - type: array items: enum: - split_v1 - extract_v2 - classify_v2 - parse_v2 - spreadsheet_v1 - unknown type: string - type: 'null' description: Filter by one or more product types. Repeat the parameter for multiple values. title: Product Type description: Filter by one or more product types. Repeat the parameter for multiple values. - name: name in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by configuration name. title: Name description: Filter by configuration name. - name: page_size in: query required: false schema: anyOf: - type: integer - type: 'null' description: Number of items per page. title: Page Size description: Number of items per page. - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' description: Pagination token. title: Page Token description: Pagination token. - name: latest_only in: query required: false schema: type: boolean description: Return only the latest version per configuration name. default: false title: Latest Only description: Return only the latest version per configuration name. - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ConfigurationQueryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/configurations/{config_id}: get: tags: - Beta - Configurations - Configurations summary: Get Configuration description: Get a single product configuration by ID. operationId: get_configuration_api_v1_beta_configurations__config_id__get security: - HTTPBearer: [] parameters: - name: config_id in: path required: true schema: type: string title: Config Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ConfigurationResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' put: tags: - Beta - Configurations - Configurations summary: Update Configuration description: Update an existing product configuration. operationId: update_configuration_api_v1_beta_configurations__config_id__put security: - HTTPBearer: [] parameters: - name: config_id in: path required: true schema: type: string title: Config Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ConfigurationUpdateRequest' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ConfigurationResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - Beta - Configurations - Configurations summary: Delete Configuration description: Delete a product configuration. operationId: delete_configuration_api_v1_beta_configurations__config_id__delete security: - HTTPBearer: [] parameters: - name: config_id in: path required: true schema: type: string title: Config Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '204': description: Successful Response '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/split/jobs: post: tags: - Beta - Split summary: Create Split Job description: Create a document split job. operationId: create_split_job_api_v1_beta_split_jobs_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/SplitCreateRequest' responses: '201': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/SplitJobResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - Beta - Split summary: List Split Jobs description: List document split jobs. operationId: list_split_jobs_api_v1_beta_split_jobs_get security: - HTTPBearer: [] parameters: - name: status in: query required: false schema: anyOf: - enum: - pending - processing - completed - failed - cancelled type: string - type: 'null' description: Filter by job status (pending, processing, completed, failed, cancelled) title: Status description: Filter by job status (pending, processing, completed, failed, cancelled) - name: job_ids in: query required: false schema: anyOf: - type: array items: type: string - type: 'null' description: Filter by specific job IDs title: Job Ids description: Filter by specific job IDs - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: page_size in: query required: false schema: anyOf: - type: integer - type: 'null' title: Page Size - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' title: Page Token - name: created_at_on_or_after in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' description: Include items created at or after this timestamp (inclusive) title: Created At On Or After description: Include items created at or after this timestamp (inclusive) - name: created_at_on_or_before in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' description: Include items created at or before this timestamp (inclusive) title: Created At On Or Before description: Include items created at or before this timestamp (inclusive) - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/SplitJobQueryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/split/jobs/{split_job_id}: get: tags: - Beta - Split summary: Get Split Job description: Get a document split job. operationId: get_split_job_api_v1_beta_split_jobs__split_job_id__get security: - HTTPBearer: [] parameters: - name: split_job_id in: path required: true schema: type: string title: Split Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/SplitJobResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/batch-processing: post: tags: - Beta - Batch Processing summary: Create Batch Job description: 'Create a batch processing job. Processes files from a directory or a specific list of item IDs. Supports batch parsing and classification operations. Provide either `directory_id` to process all files in a directory, or `item_ids` for specific items. The job runs asynchronously — poll `GET /batch/{job_id}` for progress.' operationId: create_batch_job_api_v1_beta_batch_processing_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: temporal-namespace in: header required: false schema: anyOf: - type: string - type: 'null' title: Temporal-Namespace - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/BatchJobCreateRequest' responses: '201': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/BatchJobResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - Beta - Batch Processing summary: List Batch Jobs description: 'List batch processing jobs with optional filtering. Filter by `directory_id`, `job_type`, or `status`. Results are paginated with configurable `limit` and `offset`.' operationId: list_batch_jobs_api_v1_beta_batch_processing_get security: - HTTPBearer: [] parameters: - name: directory_id in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by directory ID title: Directory Id description: Filter by directory ID - name: job_type in: query required: false schema: anyOf: - $ref: '#/components/schemas/BatchJobType' - type: 'null' description: Filter by job type (PARSE, EXTRACT, CLASSIFY) title: Job Type description: Filter by job type (PARSE, EXTRACT, CLASSIFY) - name: status in: query required: false schema: anyOf: - $ref: '#/components/schemas/BatchJobStatus' - type: 'null' description: Filter by job status (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) title: Status description: Filter by job status (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) - name: limit in: query required: false schema: type: integer maximum: 1000 minimum: 1 description: Maximum number of jobs to return default: 50 title: Limit description: Maximum number of jobs to return - name: offset in: query required: false schema: type: integer minimum: 0 description: Number of jobs to skip for pagination default: 0 title: Offset description: Number of jobs to skip for pagination - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/BatchJobQueryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/batch-processing/{job_id}: get: tags: - Beta - Batch Processing summary: Get Batch Job Status description: 'Get detailed status of a batch processing job. Returns current progress percentage, file counts (total, processed, failed, skipped), and timestamps.' operationId: get_batch_job_status_api_v1_beta_batch_processing__job_id__get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/BatchJobStatusResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/batch-processing/{job_id}/items: get: tags: - Beta - Batch Processing summary: List Batch Job Items description: 'List items in a batch job with optional status filtering. Useful for finding failed items, viewing completed items, or debugging processing issues.' operationId: list_batch_job_items_api_v1_beta_batch_processing__job_id__items_get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: status in: query required: false schema: anyOf: - $ref: '#/components/schemas/BatchFileStatus' - type: 'null' description: Filter items by status title: Status description: Filter items by status - name: limit in: query required: false schema: type: integer maximum: 1000 minimum: 1 description: Maximum number of items to return default: 100 title: Limit description: Maximum number of items to return - name: offset in: query required: false schema: type: integer minimum: 0 description: Number of items to skip default: 0 title: Offset description: Number of items to skip - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/BatchItemListResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/batch-processing/{job_id}/cancel: post: tags: - Beta - Batch Processing summary: Cancel Batch Job description: 'Cancel a running batch processing job. Stops processing and marks pending items as cancelled. Items currently being processed may still complete.' operationId: cancel_batch_job_api_v1_beta_batch_processing__job_id__cancel_post security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: temporal-namespace in: header required: false schema: anyOf: - type: string - type: 'null' title: Temporal-Namespace - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/BatchJobCancelRequest' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/BatchJobCancelResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/batch-processing/items/{item_id}/processing-results: get: tags: - Beta - Batch Processing summary: Get Item Processing Results description: 'Get all processing results for a specific item. Returns the complete processing history for an item including what operations were performed, parameters used, and where outputs are stored. Optionally filter by `job_type`.' operationId: get_item_processing_results_api_v1_beta_batch_processing_items__item_id__processing_results_get security: - HTTPBearer: [] parameters: - name: item_id in: path required: true schema: type: string title: Item Id - name: job_type in: query required: false schema: anyOf: - $ref: '#/components/schemas/BatchJobType' - type: 'null' description: Filter results by job type title: Job Type description: Filter results by job type - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ItemProcessingResultsResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v1/beta/pipelines/{pipeline_id}/files: get: tags: - Beta - Pipeline Files summary: List Pipeline Files description: List files for a pipeline with pagination and filtering. operationId: list_pipeline_files_api_v1_beta_pipelines__pipeline_id__files_get security: - HTTPBearer: [] parameters: - name: pipeline_id in: path required: true schema: type: string format: uuid title: Pipeline Id - name: data_source_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' description: Filter by data source ID. title: Data Source Id description: Filter by data source ID. - name: file_name_contains in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by files whose names contain this substring (case-insensitive). title: File Name Contains description: Filter by files whose names contain this substring (case-insensitive). - name: statuses in: query required: false schema: anyOf: - type: array items: enum: - NOT_STARTED - IN_PROGRESS - SUCCESS - ERROR - CANCELLED type: string - type: 'null' description: Filter by pipeline file statuses. title: Statuses description: Filter by pipeline file statuses. - name: page_size in: query required: false schema: anyOf: - type: integer maximum: 1000 minimum: 1 - type: 'null' description: Maximum number of items to return. Defaults to 50, max 1000. title: Page Size description: Maximum number of items to return. Defaults to 50, max 1000. - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' description: Page token for pagination, from a previous response. title: Page Token description: Page token for pagination, from a previous response. - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PipelineFileListResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/parse/upload: post: tags: - V2 - Parse summary: Upload File Multipart description: 'Upload and parse a file using multipart/form-data. Send the file as a `file` field and parsing configuration as a `configuration` JSON string field. The job runs asynchronously. Poll `GET /parse/{job_id}` with `expand` to retrieve results.' operationId: upload_file_multipart_api_v2_parse_upload_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ParseJobResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/parse: post: tags: - V2 - Parse summary: Parse File description: 'Parse a file by file ID or URL. Provide either `file_id` (a previously uploaded file) or `source_url` (a publicly accessible URL). Configure parsing with options like `tier`, `target_pages`, and `lang`. ## Tiers - `fast` — rule-based, cheapest, no AI - `cost_effective` — balanced speed and quality - `agentic` — full AI-powered parsing - `agentic_plus` — premium AI with specialized features The job runs asynchronously. Poll `GET /parse/{job_id}` with `expand=text` or `expand=markdown` to retrieve results.' operationId: parse_file_api_v2_parse_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ParseRequestConfiguration' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ParseJobResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - V2 - Parse summary: List Parse Jobs description: 'List parse jobs for the current project. Filter by `status` or creation date range. Results are paginated — use `page_token` from the response to fetch subsequent pages.' operationId: list_parse_jobs_api_v2_parse_get security: - HTTPBearer: [] parameters: - name: page_size in: query required: false schema: anyOf: - type: integer - type: 'null' description: Number of items per page title: Page Size description: Number of items per page - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' description: Token for pagination title: Page Token description: Token for pagination - name: status in: query required: false schema: anyOf: - enum: - PENDING - RUNNING - COMPLETED - FAILED - CANCELLED type: string - type: 'null' description: Filter by job status (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) title: Status description: Filter by job status (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) - name: job_ids in: query required: false schema: anyOf: - type: array items: type: string - type: 'null' description: Filter by specific job IDs title: Job Ids description: Filter by specific job IDs - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: created_at_on_or_after in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' description: Include items created at or after this timestamp (inclusive) title: Created At On Or After description: Include items created at or after this timestamp (inclusive) - name: created_at_on_or_before in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' description: Include items created at or before this timestamp (inclusive) title: Created At On Or Before description: Include items created at or before this timestamp (inclusive) - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ParseJobQueryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/parse/versions: get: tags: - V2 - Parse summary: List Parse Versions description: List the parse versions accepted by each tier. operationId: list_parse_versions_api_v2_parse_versions_get responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ParseVersionsResponse' /api/v2/parse/{job_id}: get: tags: - V2 - Parse summary: Get Parse Job description: 'Retrieve a parse job with optional expanded content. By default returns job metadata only. Use `expand` to include parsed content: - `text` — plain text output - `markdown` — markdown output - `items` — structured page-by-page output - `job_metadata` — usage and processing details Content metadata fields (e.g. `text_content_metadata`) return presigned URLs for downloading large results.' operationId: get_parse_job_api_v2_parse__job_id__get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: expand in: query required: false schema: type: array items: type: string description: 'Fields to include: text, markdown, items, metadata, job_metadata, text_content_metadata, markdown_content_metadata, items_content_metadata, metadata_content_metadata, raw_words_content_metadata, xlsx_content_metadata, output_pdf_content_metadata, images_content_metadata. Metadata fields include presigned URLs.' title: Expand description: 'Fields to include: text, markdown, items, metadata, job_metadata, text_content_metadata, markdown_content_metadata, items_content_metadata, metadata_content_metadata, raw_words_content_metadata, xlsx_content_metadata, output_pdf_content_metadata, images_content_metadata. Metadata fields include presigned URLs.' - name: image_filenames in: query required: false schema: anyOf: - type: string - type: 'null' description: 'Filter to specific image filenames (optional). Example: image_0.png,image_1.jpg' title: Image Filenames description: 'Filter to specific image filenames (optional). Example: image_0.png,image_1.jpg' - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ParseResultResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/parse/{job_id}/cancel: post: tags: - V2 - Parse summary: Cancel Parse Job description: 'Cancel a running parse job. Stops processing and marks the job as CANCELLED. Returns the updated job. Jobs already in a terminal state (COMPLETED, FAILED, CANCELLED) cannot be cancelled.' operationId: cancel_parse_job_api_v2_parse__job_id__cancel_post security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ParseJobResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/classify: post: tags: - V2 - Classify summary: Create Classify Job description: 'Create a classify job. Classifies a document against a set of rules. Set `file_input` to a file ID (`dfl-...`) or parse job ID (`pjb-...`), and provide either inline `configuration` with rules or a `configuration_id` referencing a saved preset. Each rule has a `type` (the label to assign) and a `description` (natural language criteria). The classifier returns the best matching rule with a confidence score. The job runs asynchronously. Poll `GET /classify/{job_id}` to check status and retrieve results.' operationId: create_classify_job_api_v2_classify_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ClassifyV2JobCreateRequest' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ClassifyV2JobResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - V2 - Classify summary: List Classify Jobs description: 'List classify jobs with optional filtering and pagination. Filter by `status`, `configuration_id`, specific `job_ids`, or creation date range.' operationId: list_classify_jobs_api_v2_classify_get security: - HTTPBearer: [] parameters: - name: page_size in: query required: false schema: anyOf: - type: integer maximum: 100 minimum: 1 - type: 'null' description: Number of items per page title: Page Size description: Number of items per page - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' description: Token for pagination title: Page Token description: Token for pagination - name: status in: query required: false schema: anyOf: - enum: - PENDING - RUNNING - COMPLETED - FAILED type: string - type: 'null' description: Filter by job status title: Status description: Filter by job status - name: job_ids in: query required: false schema: anyOf: - type: array items: type: string - type: 'null' description: Filter by specific job IDs title: Job Ids description: Filter by specific job IDs - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: configuration_id in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by configuration ID examples: - cfg-11111111-2222-3333-4444-555555555555 title: Configuration Id description: Filter by configuration ID - name: created_at_on_or_after in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' description: Include items created at or after this timestamp (inclusive) title: Created At On Or After description: Include items created at or after this timestamp (inclusive) - name: created_at_on_or_before in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' description: Include items created at or before this timestamp (inclusive) title: Created At On Or Before description: Include items created at or before this timestamp (inclusive) - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ClassifyV2JobQueryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/classify/{job_id}: get: tags: - V2 - Classify summary: Get Classify Job description: 'Get a classify job by ID. Returns the job status, configuration, and classify result when complete. The result includes the matched document type, confidence score, and reasoning.' operationId: get_classify_job_api_v2_classify__job_id__get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ClassifyV2JobResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/classify/{job_id}/cancel: post: tags: - V2 - Classify summary: Cancel Classify Job description: 'Cancel a running classify job. Stops processing and marks the job as CANCELLED. Returns the updated job. Jobs already in a terminal state (COMPLETED, FAILED, CANCELLED) cannot be cancelled.' operationId: cancel_classify_job_api_v2_classify__job_id__cancel_post security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ClassifyV2JobResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/extract/schema/validation: post: tags: - V2 - Extract summary: Validate Extraction Schema description: Validate a JSON schema for extraction. operationId: validate_extraction_schema_api_v2_extract_schema_validation_post security: - HTTPBearer: [] parameters: - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ExtractV2SchemaValidateRequest' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractV2SchemaValidateResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/extract/schema/generate: post: tags: - V2 - Extract summary: Generate Extraction Schema description: Generate a JSON schema and return a product configuration request. operationId: generate_extraction_schema_api_v2_extract_schema_generate_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ExtractV2SchemaGenerateRequest' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ConfigurationCreateRequest' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/extract: post: tags: - V2 - Extract summary: Create Extract Job description: 'Create an extraction job. Extracts structured data from a document using either a saved configuration or an inline JSON Schema. ## Input Provide exactly one of: - `configuration_id` — reference a saved extraction config - `configuration` — inline configuration with a `data_schema` ## Document input Set `file_input` to a file ID (`dfl-...`) or a completed parse job ID (`pjb-...`). The job runs asynchronously. Poll `GET /extract/{job_id}` or register a webhook to monitor completion.' operationId: create_extract_job_api_v2_extract_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ExtractV2JobCreate' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractV2Job' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - V2 - Extract summary: List Extract Jobs description: 'List extraction jobs with optional filtering and pagination. Filter by `configuration_id`, `status`, `file_input`, or creation date range. Results are returned newest-first. Use `expand=configuration` to include the full configuration used, and `expand=extract_metadata` for per-field metadata.' operationId: list_extract_jobs_api_v2_extract_get security: - HTTPBearer: [] parameters: - name: document_input_type in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by document input type (file_id or parse_job_id) title: Document Input Type description: Filter by document input type (file_id or parse_job_id) - name: file_input in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by file input value title: File Input description: Filter by file input value - name: document_input_value in: query required: false schema: anyOf: - type: string - type: 'null' description: 'Deprecated: use file_input instead' deprecated: true title: Document Input Value description: 'Deprecated: use file_input instead' deprecated: true - name: status in: query required: false schema: anyOf: - enum: - PENDING - THROTTLED - RUNNING - COMPLETED - FAILED - CANCELLED type: string - type: 'null' description: Filter by status title: Status description: Filter by status - name: page_size in: query required: false schema: anyOf: - type: integer - type: 'null' description: Number of items per page title: Page Size description: Number of items per page - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' description: Token for pagination title: Page Token description: Token for pagination - name: job_ids in: query required: false schema: anyOf: - type: array items: type: string - type: 'null' description: Filter by specific job IDs title: Job Ids description: Filter by specific job IDs - name: expand in: query required: false schema: type: array items: type: string description: 'Additional fields to include: configuration, extract_metadata' title: Expand description: 'Additional fields to include: configuration, extract_metadata' - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: configuration_id in: query required: false schema: anyOf: - type: string - type: 'null' description: Filter by configuration ID examples: - cfg-11111111-2222-3333-4444-555555555555 title: Configuration Id description: Filter by configuration ID - name: created_at_on_or_after in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' description: Include items created at or after this timestamp (inclusive) title: Created At On Or After description: Include items created at or after this timestamp (inclusive) - name: created_at_on_or_before in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' description: Include items created at or before this timestamp (inclusive) title: Created At On Or Before description: Include items created at or before this timestamp (inclusive) - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractV2JobQueryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/extract/{job_id}: get: tags: - V2 - Extract summary: Get Extract Job description: 'Get a single extraction job by ID. Returns the job status and results when complete. Use `expand=configuration` to include the full configuration used, and `expand=extract_metadata` for per-field metadata.' operationId: get_extract_job_api_v2_extract__job_id__get security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: expand in: query required: false schema: type: array items: type: string description: 'Additional fields to include: configuration, extract_metadata' title: Expand description: 'Additional fields to include: configuration, extract_metadata' - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractV2Job' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' delete: tags: - V2 - Extract summary: Delete Extract Job description: Delete an extraction job and its results. operationId: delete_extract_job_api_v2_extract__job_id__delete security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: {} '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/extract/{job_id}/cancel: post: tags: - V2 - Extract summary: Cancel Extract Job description: 'Cancel a running extraction job. Stops processing and marks the job as CANCELLED. Returns the updated job. Jobs already in a terminal state (COMPLETED, FAILED, CANCELLED) cannot be cancelled.' operationId: cancel_extract_job_api_v2_extract__job_id__cancel_post security: - HTTPBearer: [] parameters: - name: job_id in: path required: true schema: type: string title: Job Id - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ExtractV2Job' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/projects: get: tags: - V2 - Projects summary: List Projects description: List projects in an organization. Requires `organization_id` or a project-scoped API key. operationId: list_projects_api_v2_projects_get security: - HTTPBearer: [] parameters: - name: organization_id in: query required: false schema: anyOf: - type: string - type: 'null' title: Organization Id - name: name in: query required: false schema: anyOf: - type: string - type: 'null' title: Name - name: page_size in: query required: false schema: anyOf: - type: integer - type: 'null' title: Page Size - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' title: Page Token - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ProjectQueryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/projects/{project_id}: get: tags: - V2 - Projects summary: Get Project description: Get a project by ID. operationId: get_project_api_v2_projects__project_id__get security: - HTTPBearer: [] parameters: - name: project_id in: path required: true schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ProjectResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/organizations: get: tags: - V2 - Organizations summary: List Organizations description: List organizations the current user can access. operationId: list_organizations_api_v2_organizations_get security: - HTTPBearer: [] parameters: - name: name in: query required: false schema: anyOf: - type: string - type: 'null' title: Name - name: page_size in: query required: false schema: anyOf: - type: integer - type: 'null' title: Page Size - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' title: Page Token - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/OrganizationQueryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/organizations/{organization_id}: get: tags: - V2 - Organizations summary: Get Organization description: Get an organization by ID. operationId: get_organization_api_v2_organizations__organization_id__get security: - HTTPBearer: [] parameters: - name: organization_id in: path required: true schema: type: string format: uuid title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/OrganizationResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/batches: post: tags: - V2 - Batches summary: Create Batch description: Create a batch over a source directory and start processing asynchronously. operationId: create_batch_api_v2_batches_post security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/BatchCreateRequest' responses: '201': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/BatchResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' get: tags: - V2 - Batches summary: List Batches description: List batches for the current project. operationId: list_batches_api_v2_batches_get security: - HTTPBearer: [] parameters: - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: page_size in: query required: false schema: anyOf: - type: integer - type: 'null' title: Page Size - name: page_token in: query required: false schema: anyOf: - type: string - type: 'null' title: Page Token - name: created_at_on_or_after in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' title: Created At On Or After - name: created_at_on_or_before in: query required: false schema: anyOf: - type: string format: date-time - type: 'null' title: Created At On Or Before - name: status in: query required: false schema: anyOf: - enum: - PENDING - THROTTLED - RUNNING - COMPLETED - FAILED - CANCELLED type: string - type: 'null' title: Status - name: source_directory_id in: query required: false schema: anyOf: - type: string - type: 'null' title: Source Directory Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/BatchQueryResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /api/v2/batches/{batch_id}: get: tags: - V2 - Batches summary: Get Batch description: Get a batch by ID. operationId: get_batch_api_v2_batches__batch_id__get security: - HTTPBearer: [] parameters: - name: batch_id in: path required: true schema: type: string title: Batch Id - name: expand in: query required: false schema: anyOf: - type: array items: type: string - type: 'null' description: 'Fields to expand. Supported value: results.' title: Expand description: 'Fields to expand. Supported value: results.' - name: project_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Project Id - name: organization_id in: query required: false schema: anyOf: - type: string format: uuid - type: 'null' title: Organization Id - name: session in: cookie required: false schema: anyOf: - type: string - type: 'null' title: Session responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/BatchResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' components: schemas: AdvancedModeTransformConfig: properties: mode: type: string const: advanced title: Mode default: advanced segmentation_config: anyOf: - $ref: '#/components/schemas/NoneSegmentationConfig' - $ref: '#/components/schemas/PageSegmentationConfig' - $ref: '#/components/schemas/ElementSegmentationConfig' title: Segmentation Config description: Configuration for the segmentation. chunking_config: anyOf: - $ref: '#/components/schemas/NoneChunkingConfig' - $ref: '#/components/schemas/CharacterChunkingConfig' - $ref: '#/components/schemas/TokenChunkingConfig' - $ref: '#/components/schemas/SentenceChunkingConfig' - $ref: '#/components/schemas/SemanticChunkingConfig' title: Chunking Config description: Configuration for the chunking. type: object title: AdvancedModeTransformConfig AgentData: properties: id: anyOf: - type: string - type: 'null' title: Id deployment_name: type: string title: Deployment Name project_id: anyOf: - type: string - type: 'null' title: Project Id collection: type: string title: Collection default: default data: additionalProperties: true type: object title: Data created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At type: object required: - deployment_name - data title: AgentData description: API Result for a single agent data item AgentDataCreate: properties: deployment_name: type: string title: Deployment Name collection: type: string title: Collection default: default data: additionalProperties: true type: object title: Data type: object required: - deployment_name - data title: AgentDataCreate description: API request model for creating agent data AgentDataUpdate: properties: data: additionalProperties: true type: object title: Data type: object required: - data title: AgentDataUpdate description: API request model for updating agent data AggregateGroup: properties: group_key: additionalProperties: true type: object title: Group Key count: anyOf: - type: integer - type: 'null' title: Count first_item: anyOf: - additionalProperties: true type: object - type: 'null' title: First Item type: object required: - group_key title: AggregateGroup description: API Result for a single group in the aggregate response AggregateRequest: properties: page_size: anyOf: - type: integer - type: 'null' title: Page Size description: The maximum number of items to return. The service may return fewer than this value. If unspecified, a default page size will be used. The maximum value is typically 1000; values above this will be coerced to the maximum. page_token: anyOf: - type: string - type: 'null' title: Page Token description: A page token, received from a previous list call. Provide this to retrieve the subsequent page. filter: anyOf: - additionalProperties: $ref: '#/components/schemas/FilterOperation' type: object - type: 'null' title: Filter description: A filter object or expression that filters resources listed in the response. order_by: anyOf: - type: string - type: 'null' title: Order By description: A comma-separated list of fields to order by, sorted in ascending order. Use 'field_name desc' to specify descending order. deployment_name: type: string title: Deployment Name description: The agent deployment's name to aggregate data for collection: type: string title: Collection description: The logical agent data collection to aggregate data for default: default group_by: anyOf: - items: type: string type: array - type: 'null' title: Group By description: The fields to group by. If empty, the entire dataset is grouped on. e.g. if left out, can be used for simple count operations count: anyOf: - type: boolean - type: 'null' title: Count description: Whether to count the number of items in each group default: false first: anyOf: - type: boolean - type: 'null' title: First description: Whether to return the first item in each group (Sorted by created_at) default: false offset: anyOf: - type: integer maximum: 1000.0 minimum: 0.0 - type: 'null' title: Offset description: The offset to start from. If not provided, the first page is returned default: 0 type: object required: - deployment_name title: AggregateRequest description: API request body for aggregating agent data AttachmentRef: properties: type: type: string title: Type description: Attachment kind, e.g. 'screenshot', 'items'. attachment_name: type: string title: Attachment Name description: Attachment-relative path, e.g. 'screenshots/page_7.jpg'. source_id: type: string title: Source Id description: File ID to pass as source_id when fetching the attachment. type: object required: - type - attachment_name - source_id title: AttachmentRef description: Reference to a file attachment, retrievable via ``GET /api/v1/beta/attachments/{attachment_name}?source_id=...``. AutoModeConfigurationEntry: properties: parsing_conf: $ref: '#/components/schemas/AutoModeParsingConf' description: Parsing configuration to apply when trigger conditions are met trigger_mode: anyOf: - type: string - type: 'null' title: Trigger Mode description: 'How to combine multiple trigger conditions: ''and'' (all conditions must match, this is the default) or ''or'' (any single condition can trigger)' page_md_error: anyOf: - type: boolean - type: 'null' title: Page Md Error description: Trigger on pages with markdown extraction errors text_in_page: anyOf: - type: string - type: 'null' title: Text In Page description: Trigger if page text/markdown contains this string table_in_page: anyOf: - type: boolean - type: 'null' title: Table In Page description: Trigger if page contains a table image_in_page: anyOf: - type: boolean - type: 'null' title: Image In Page description: Trigger if page contains non-screenshot images full_page_image_in_page: anyOf: - type: boolean - type: 'null' title: Full Page Image In Page description: Trigger if page contains a full-page image (scanned page detection) full_page_image_in_page_threshold: anyOf: - type: number - type: string - type: 'null' title: Full Page Image In Page Threshold description: Threshold for full page image detection (0.0-1.0, default 0.8) filename_regexp: anyOf: - type: string - type: 'null' title: Filename Regexp description: Regex pattern to match against filename filename_regexp_mode: anyOf: - type: string - type: 'null' title: Filename Regexp Mode description: Regex mode flags (e.g., 'i' for case-insensitive) filename_match_glob: anyOf: - type: string - type: 'null' title: Filename Match Glob description: Single glob pattern to match against filename filename_match_glob_list: anyOf: - items: type: string type: array - type: 'null' title: Filename Match Glob List description: List of glob patterns to match against filename page_longer_than_n_chars: anyOf: - type: integer - type: string - type: 'null' title: Page Longer Than N Chars description: Trigger if page has more than N characters page_shorter_than_n_chars: anyOf: - type: integer - type: string - type: 'null' title: Page Shorter Than N Chars description: Trigger if page has fewer than N characters page_contains_at_least_n_words: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Least N Words description: Trigger if page has more than N words page_contains_at_most_n_words: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Most N Words description: Trigger if page has fewer than N words page_contains_at_least_n_lines: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Least N Lines description: Trigger if page has more than N lines page_contains_at_most_n_lines: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Most N Lines description: Trigger if page has fewer than N lines page_contains_at_least_n_images: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Least N Images description: Trigger if page has more than N images page_contains_at_most_n_images: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Most N Images description: Trigger if page has fewer than N images page_contains_at_least_n_tables: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Least N Tables description: Trigger if page has more than N tables page_contains_at_most_n_tables: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Most N Tables description: Trigger if page has fewer than N tables page_contains_at_least_n_links: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Least N Links description: Trigger if page has more than N links page_contains_at_most_n_links: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Most N Links description: Trigger if page has fewer than N links page_contains_at_least_n_charts: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Least N Charts description: Trigger if page has more than N charts page_contains_at_most_n_charts: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Most N Charts description: Trigger if page has fewer than N charts page_contains_at_least_n_layout_elements: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Least N Layout Elements description: Trigger if page has more than N layout elements page_contains_at_most_n_layout_elements: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Most N Layout Elements description: Trigger if page has fewer than N layout elements page_contains_at_least_n_percent_numbers: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Least N Percent Numbers description: Trigger if page has more than N% numeric words page_contains_at_most_n_percent_numbers: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Most N Percent Numbers description: Trigger if page has fewer than N% numeric words page_contains_at_least_n_numbers: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Least N Numbers description: Trigger if page has more than N numeric words page_contains_at_most_n_numbers: anyOf: - type: integer - type: string - type: 'null' title: Page Contains At Most N Numbers description: Trigger if page has fewer than N numeric words regexp_in_page: anyOf: - type: string - type: 'null' title: Regexp In Page description: Regex pattern to match in page content regexp_in_page_mode: anyOf: - type: string - type: 'null' title: Regexp In Page Mode description: Regex mode flags for regexp_in_page layout_element_in_page: anyOf: - type: string - type: 'null' title: Layout Element In Page description: Trigger if page contains this layout element type layout_element_in_page_confidence_threshold: anyOf: - type: number - type: string - type: 'null' title: Layout Element In Page Confidence Threshold description: Confidence threshold for layout element detection additionalProperties: false type: object required: - parsing_conf title: AutoModeConfigurationEntry description: 'A single auto mode rule with trigger conditions and parsing configuration. Auto mode allows conditional parsing where different configurations are applied based on page content, structure, or filename. When triggers match, the parsing_conf overrides default settings for that page.' examples: - parsing_conf: tier: agentic version: latest table_in_page: true - image_in_page: true parsing_conf: tier: agentic version: latest - filename_match_glob: '*.txt' parsing_conf: tier: fast version: latest AutoModeCropBox: properties: bottom: anyOf: - type: number maximum: 1.0 minimum: 0.0 - type: 'null' title: Bottom description: Bottom boundary of crop box as ratio (0-1) left: anyOf: - type: number maximum: 1.0 minimum: 0.0 - type: 'null' title: Left description: Left boundary of crop box as ratio (0-1) right: anyOf: - type: number maximum: 1.0 minimum: 0.0 - type: 'null' title: Right description: Right boundary of crop box as ratio (0-1) top: anyOf: - type: number maximum: 1.0 minimum: 0.0 - type: 'null' title: Top description: Top boundary of crop box as ratio (0-1) additionalProperties: false type: object title: AutoModeCropBox description: Crop box options for auto mode parsing configuration. AutoModeIgnoreOptions: properties: ignore_diagonal_text: anyOf: - type: boolean - type: 'null' title: Ignore Diagonal Text description: Whether to ignore diagonal text in the document ignore_hidden_text: anyOf: - type: boolean - type: 'null' title: Ignore Hidden Text description: Whether to ignore hidden text in the document additionalProperties: false type: object title: AutoModeIgnoreOptions description: Ignore options for auto mode parsing configuration. AutoModeParsingConf: properties: tier: anyOf: - type: string enum: - fast - cost_effective - agentic - agentic_plus - type: 'null' title: Tier description: Override the parsing tier for matched pages. Must be paired with version version: anyOf: - type: string enum: - latest - '2026-06-05' - '2026-06-04' - '2025-12-11' x-enum-order-preserved: true - type: string - type: 'null' title: Version description: 'Version for the override tier. Required when `tier` is set. Use `latest`, or pin one of that tier''s dated versions. Current `latest` by tier: - `fast`: `2025-12-11` - `cost_effective`: `2026-06-05` - `agentic`: `2026-06-04` - `agentic_plus`: `2026-06-04` Full list: `GET /api/v2/parse/versions`.' custom_prompt: anyOf: - type: string - type: 'null' title: Custom Prompt description: Custom AI instructions for matched pages. Overrides the base custom_prompt ignore: anyOf: - $ref: '#/components/schemas/AutoModeIgnoreOptions' - type: 'null' description: Options for ignoring specific text types aggressive_table_extraction: anyOf: - type: boolean - type: 'null' title: Aggressive Table Extraction description: Whether to use aggressive table extraction outlined_table_extraction: anyOf: - type: boolean - type: 'null' title: Outlined Table Extraction description: Whether to use outlined table extraction adaptive_long_table: anyOf: - type: boolean - type: 'null' title: Adaptive Long Table description: Whether to use adaptive long table handling extract_layout: anyOf: - type: boolean - type: 'null' title: Extract Layout description: Whether to extract layout information specialized_chart_parsing: anyOf: - type: string enum: - agentic_plus - agentic - efficient - type: 'null' title: Specialized Chart Parsing description: Enable specialized chart parsing with the specified mode high_res_ocr: anyOf: - type: boolean - type: 'null' title: High Res Ocr description: Whether to use high resolution OCR language: anyOf: - type: string - type: 'null' title: Language description: Primary language of the document crop_box: anyOf: - $ref: '#/components/schemas/AutoModeCropBox' - type: 'null' description: Document crop box boundaries spatial_text: anyOf: - $ref: '#/components/schemas/AutoModeSpatialTextOptions' - type: 'null' description: Spatial text output options presentation: anyOf: - $ref: '#/components/schemas/AutoModePresentationOptions' - type: 'null' description: Presentation-specific parsing options additionalProperties: false type: object title: AutoModeParsingConf description: 'Parsing configuration applied when auto mode triggers match. These settings override the base configuration for pages where trigger conditions are satisfied. Only specify fields you want to override - unset fields inherit from the base configuration.' AutoModePresentationOptions: properties: out_of_bounds_content: anyOf: - type: boolean - type: 'null' title: Out Of Bounds Content description: Extract out of bounds content in presentation slides skip_embedded_data: anyOf: - type: boolean - type: 'null' title: Skip Embedded Data description: Skip extraction of embedded data for charts in presentation slides additionalProperties: false type: object title: AutoModePresentationOptions description: Presentation-specific options for auto mode parsing configuration. AutoModeSpatialTextOptions: properties: preserve_layout_alignment_across_pages: anyOf: - type: boolean - type: 'null' title: Preserve Layout Alignment Across Pages description: Preserve text alignment across page boundaries preserve_very_small_text: anyOf: - type: boolean - type: 'null' title: Preserve Very Small Text description: Include very small text in spatial output do_not_unroll_columns: anyOf: - type: boolean - type: 'null' title: Do Not Unroll Columns description: Keep column structure intact without unrolling additionalProperties: false type: object title: AutoModeSpatialTextOptions description: Spatial text options for auto mode parsing configuration. AutoTransformConfig: properties: mode: type: string const: auto title: Mode default: auto chunk_size: type: integer exclusiveMinimum: 0.0 title: Chunk Size description: Chunk size for the transformation. default: 1024 chunk_overlap: type: integer title: Chunk Overlap description: Chunk overlap for the transformation. default: 200 gte: 0 type: object title: AutoTransformConfig AzureOpenAIEmbedding: properties: model_name: type: string title: Model Name description: The name of the OpenAI embedding model. default: text-embedding-ada-002 embed_batch_size: type: integer maximum: 2048.0 exclusiveMinimum: 0.0 title: Embed Batch Size description: The batch size for embedding calls. default: 10 num_workers: anyOf: - type: integer - type: 'null' title: Num Workers description: The number of workers to use for async embedding calls. additional_kwargs: additionalProperties: true type: object title: Additional Kwargs description: Additional kwargs for the OpenAI API. api_key: anyOf: - type: string - type: 'null' title: Api Key description: The OpenAI API key. api_base: type: string title: Api Base description: The base URL for Azure deployment. default: '' api_version: type: string title: Api Version description: The version for Azure OpenAI API. default: '' max_retries: type: integer minimum: 0.0 title: Max Retries description: Maximum number of retries. default: 10 timeout: type: number minimum: 0.0 title: Timeout description: Timeout for each request. default: 60.0 default_headers: anyOf: - additionalProperties: type: string type: object - type: 'null' title: Default Headers description: The default headers for API requests. reuse_client: type: boolean title: Reuse Client description: Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. default: true dimensions: anyOf: - type: integer - type: 'null' title: Dimensions description: The number of dimensions on the output embedding vectors. Works only with v3 embedding models. azure_endpoint: anyOf: - type: string - type: 'null' title: Azure Endpoint description: The Azure endpoint to use. azure_deployment: anyOf: - type: string - type: 'null' title: Azure Deployment description: The Azure deployment to use. class_name: type: string title: Class Name default: AzureOpenAIEmbedding type: object title: AzureOpenAIEmbedding AzureOpenAIEmbeddingConfig: properties: type: type: string const: AZURE_EMBEDDING title: Type description: Type of the embedding model. default: AZURE_EMBEDDING component: $ref: '#/components/schemas/AzureOpenAIEmbedding' description: Configuration for the Azure OpenAI embedding model. type: object title: AzureOpenAIEmbeddingConfig BBox: properties: x: type: number title: X description: X coordinate of the bounding box y: type: number title: Y description: Y coordinate of the bounding box w: type: number title: W description: Width of the bounding box h: type: number title: H description: Height of the bounding box r: anyOf: - type: number - type: 'null' title: R description: Optional visual text rotation angle in degrees. Omitted when unrotated. confidence: anyOf: - type: number - type: 'null' title: Confidence description: Confidence score start_index: anyOf: - type: integer - type: 'null' title: Start Index description: Start index in the text end_index: anyOf: - type: integer - type: 'null' title: End Index description: End index in the text label: anyOf: - type: string - type: 'null' title: Label description: Label for the bounding box type: object required: - x - y - w - h title: BBox description: Bounding box with coordinates and optional metadata. BatchConfiguration: properties: job: $ref: '#/components/schemas/BatchJobConfig' description: Job to create for each file in the source directory. type: object required: - job title: BatchConfiguration description: "Configuration for a batch.\n\nExample:\n {\n \"job\"\ : {\n \"type\": \"parse_v2\",\n \"configuration_id\"\ : \"cfg-PARSE_AGENTIC\"\n }\n }\n\nThis wraps the product configuration\ \ ID to run over the source directory.\nUse a built-in preset ID when available,\ \ or create/reuse a product\nconfiguration through the generic configurations\ \ API before creating the\nbatch." BatchCreateRequest: properties: source_directory_id: type: string title: Source Directory Id description: Directory whose files should be processed. examples: - dir-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee config: $ref: '#/components/schemas/BatchConfiguration' description: Batch configuration snapshot to apply to this source directory. type: object required: - source_directory_id - config title: BatchCreateRequest description: "Create a batch over a directory.\n\nExample:\n {\n \"\ source_directory_id\": \"dir-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\",\n \ \ \"config\": {\n \"job\": {\n \"type\": \"\ parse_v2\",\n \"configuration_id\": \"cfg-PARSE_AGENTIC\"\n\ \ }\n },\n }\n\nThe source is always a directory. Callers\ \ upload or attach files to the\ndirectory first, then this API maps each\ \ source directory file to an\noutput job such as a parse job ID." BatchFileStatus: type: string enum: - pending - processing - completed - failed - skipped - cancelled title: BatchFileStatus description: Status of an individual file in a batch job. BatchItemDetail: properties: status: $ref: '#/components/schemas/BatchFileStatus' description: Processing status of this item effective_at: type: string format: date-time title: Effective At job_record_id: anyOf: - type: string - type: 'null' title: Job Record Id description: The job record ID associated with this status, if any. error_message: anyOf: - type: string - type: 'null' title: Error Message description: Error message for the latest job attempt, if any. item_id: type: string title: Item Id description: ID of the item item_name: type: string title: Item Name description: Name of the item job_id: anyOf: - type: string - type: 'null' title: Job Id description: Job ID for the underlying processing job (links to parse/extract job results) skip_reason: anyOf: - type: string - type: 'null' title: Skip Reason description: Reason item was skipped (e.g., 'already_processed', 'size_limit_exceeded') started_at: anyOf: - type: string format: date-time - type: 'null' title: Started At description: When processing started for this item completed_at: anyOf: - type: string format: date-time - type: 'null' title: Completed At description: When processing completed for this item type: object required: - status - item_id - item_name title: BatchItemDetail description: Detailed information about an item in a batch job. BatchItemListResponse: properties: items: items: $ref: '#/components/schemas/BatchItemDetail' type: array title: Items description: List of item details next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object title: BatchItemListResponse description: Paginated response containing batch job item details. BatchJobCancelRequest: properties: reason: anyOf: - type: string - type: 'null' title: Reason description: Optional reason for cancelling the job type: object title: BatchJobCancelRequest description: Request to cancel a running batch job. BatchJobCancelResponse: properties: job_id: type: string title: Job Id description: ID of the cancelled job status: $ref: '#/components/schemas/BatchJobStatus' description: New status (should be 'cancelled') processed_items: type: integer title: Processed Items description: Number of items processed before cancellation message: type: string title: Message description: Confirmation message type: object required: - job_id - status - processed_items - message title: BatchJobCancelResponse description: Response after cancelling a batch job. BatchJobConfig: properties: type: anyOf: - type: string const: parse_v2 - type: string const: extract_v2 title: Type description: Product job type to run for each source directory file. examples: - parse_v2 - extract_v2 configuration_id: type: string title: Configuration Id description: Product configuration ID or built-in preset ID matching the job type. examples: - cfg-PARSE_AGENTIC type: object required: - type - configuration_id title: BatchJobConfig description: "Job to run for each file in the source directory.\n\nExample:\n\ \ {\n \"type\": \"parse_v2\",\n \"configuration_id\": \"\ cfg-PARSE_AGENTIC\"\n }\n\nBatch V2 references product configuration IDs\ \ so the underlying\ndirectory-sync flow can resolve a stable config ID for\ \ every file-level job.\nIDs may refer to saved project configurations or\ \ built-in presets for the\nrequested product type." BatchJobCreateRequest: properties: directory_id: anyOf: - type: string - type: 'null' title: Directory Id description: ID of the directory containing files to process examples: - dir-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee item_ids: anyOf: - items: type: string type: array - type: 'null' title: Item Ids description: List of specific item IDs to process. Either this or directory_id must be provided. examples: - - dfl-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee - dfl-11111111-2222-3333-4444-555555555555 job_config: anyOf: - $ref: '#/components/schemas/BatchParseJobRecordCreate' - $ref: '#/components/schemas/ClassifyJob' title: Job Config description: Job configuration — either a parse or classify config page_size: type: integer maximum: 1000.0 minimum: 1.0 title: Page Size description: Number of files to process per batch when using directory mode default: 100 continue_as_new_threshold: anyOf: - type: integer - type: 'null' title: Continue As New Threshold description: Maximum files to process per execution cycle in directory mode. Defaults to page_size. type: object required: - job_config title: BatchJobCreateRequest description: 'Request to create a batch processing job. Supports two modes: 1. Directory mode: Process all files in a directory (use directory_id) 2. Item list mode: Process specific items (use item_ids). Project must be provided via validate_project dependency.' BatchJobQueryResponse: properties: items: items: $ref: '#/components/schemas/BatchJobResponse' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: BatchJobQueryResponse description: Response schema for paginated batch job queries. BatchJobReference: properties: type: anyOf: - type: string const: parse_v2 - type: string const: extract_v2 title: Type description: Type of job produced for the file. id: type: string title: Id description: Job ID, such as a parse job ID. examples: - pjb-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee type: object required: - type - id title: BatchJobReference description: "Reference to a job produced by a batch.\n\nExample:\n {\n \ \ \"type\": \"parse_v2\",\n \"id\": \"pjb-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\"\ \n }" BatchJobResponse: properties: status: $ref: '#/components/schemas/BatchJobStatus' description: Current job status effective_at: type: string format: date-time title: Effective At job_record_id: anyOf: - type: string - type: 'null' title: Job Record Id description: The job record ID associated with this status, if any. error_message: anyOf: - type: string - type: 'null' title: Error Message description: Error message for the latest job attempt, if any. id: type: string title: Id description: Unique identifier for the batch job examples: - bjb-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime project_id: type: string title: Project Id description: Project this job belongs to examples: - proj-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee directory_id: anyOf: - type: string - type: 'null' title: Directory Id description: Directory being processed examples: - dir-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee job_type: $ref: '#/components/schemas/BatchJobType' description: Type of processing operation (parse or classify) total_items: type: integer title: Total Items description: Total number of items in the job processed_items: type: integer title: Processed Items description: Number of items processed so far default: 0 failed_items: type: integer title: Failed Items description: Number of items that failed processing default: 0 skipped_items: type: integer title: Skipped Items description: Number of items skipped (already processed or size limit) default: 0 workflow_id: anyOf: - type: string - type: 'null' title: Workflow Id description: Async job tracking ID started_at: anyOf: - type: string format: date-time - type: 'null' title: Started At description: Timestamp when job processing started completed_at: anyOf: - type: string format: date-time - type: 'null' title: Completed At description: Timestamp when job completed type: object required: - status - id - project_id - job_type - total_items title: BatchJobResponse description: Response schema for a batch processing job. BatchJobStatus: type: string enum: - pending - running - dispatched - completed - failed - cancelled title: BatchJobStatus description: Status of a batch processing job. BatchJobStatusResponse: properties: job: $ref: '#/components/schemas/BatchJobResponse' progress_percentage: type: number maximum: 100.0 minimum: 0.0 title: Progress Percentage description: Percentage of items processed (0-100) type: object required: - job - progress_percentage title: BatchJobStatusResponse description: Detailed status response for a batch processing job. BatchJobType: type: string enum: - parse - extract - classify title: BatchJobType description: Type of batch processing operation. BatchParseJobConfig: properties: webhook_configurations: anyOf: - items: $ref: '#/components/schemas/WebhookConfiguration' type: array - type: 'null' title: Webhook Configurations description: Outbound webhook endpoints to notify on job status changes priority: anyOf: - type: string enum: - low - medium - high - critical - type: 'null' title: Priority description: The priority for the request. This field may be ignored or overwritten depending on the organization tier. custom_metadata: anyOf: - additionalProperties: true type: object - type: 'null' title: Custom Metadata description: The custom metadata to attach to the documents. resource_info: anyOf: - additionalProperties: true type: object - type: 'null' title: Resource Info description: The resource info about the file languages: items: $ref: '#/components/schemas/ParserLanguages' type: array minItems: 1 title: Languages parsing_instruction: anyOf: - type: string - type: 'null' title: Parsing Instruction default: '' disable_ocr: anyOf: - type: boolean - type: 'null' title: Disable Ocr default: false annotate_links: anyOf: - type: boolean - type: 'null' title: Annotate Links default: false adaptive_long_table: anyOf: - type: boolean - type: 'null' title: Adaptive Long Table default: false compact_markdown_table: anyOf: - type: boolean - type: 'null' title: Compact Markdown Table default: false disable_reconstruction: anyOf: - type: boolean - type: 'null' title: Disable Reconstruction default: false disable_image_extraction: anyOf: - type: boolean - type: 'null' title: Disable Image Extraction default: false invalidate_cache: anyOf: - type: boolean - type: 'null' title: Invalidate Cache default: false outlined_table_extraction: anyOf: - type: boolean - type: 'null' title: Outlined Table Extraction default: false aggressive_table_extraction: anyOf: - type: boolean - type: 'null' title: Aggressive Table Extraction default: false merge_tables_across_pages_in_markdown: anyOf: - type: boolean - type: 'null' title: Merge Tables Across Pages In Markdown default: false output_pdf_of_document: anyOf: - type: boolean - type: 'null' title: Output Pdf Of Document default: false do_not_cache: anyOf: - type: boolean - type: 'null' title: Do Not Cache default: false fast_mode: anyOf: - type: boolean - type: 'null' title: Fast Mode default: false skip_diagonal_text: anyOf: - type: boolean - type: 'null' title: Skip Diagonal Text default: false preserve_layout_alignment_across_pages: anyOf: - type: boolean - type: 'null' title: Preserve Layout Alignment Across Pages default: false preserve_very_small_text: anyOf: - type: boolean - type: 'null' title: Preserve Very Small Text default: false gpt4o_mode: anyOf: - type: boolean - type: 'null' title: Gpt4O Mode default: false gpt4o_api_key: anyOf: - type: string - type: 'null' title: Gpt4O Api Key do_not_unroll_columns: anyOf: - type: boolean - type: 'null' title: Do Not Unroll Columns default: false extract_layout: anyOf: - type: boolean - type: 'null' title: Extract Layout default: false high_res_ocr: anyOf: - type: boolean - type: 'null' title: High Res Ocr default: false html_make_all_elements_visible: anyOf: - type: boolean - type: 'null' title: Html Make All Elements Visible default: false layout_aware: anyOf: - type: boolean - type: 'null' title: Layout Aware default: false specialized_chart_parsing_agentic: anyOf: - type: boolean - type: 'null' title: Specialized Chart Parsing Agentic default: false specialized_chart_parsing_plus: anyOf: - type: boolean - type: 'null' title: Specialized Chart Parsing Plus default: false specialized_chart_parsing_efficient: anyOf: - type: boolean - type: 'null' title: Specialized Chart Parsing Efficient default: false specialized_image_parsing: anyOf: - type: boolean - type: 'null' title: Specialized Image Parsing default: false precise_bounding_box: anyOf: - type: boolean - type: 'null' title: Precise Bounding Box default: false line_level_bounding_box: anyOf: - type: boolean - type: 'null' title: Line Level Bounding Box default: false html_remove_navigation_elements: anyOf: - type: boolean - type: 'null' title: Html Remove Navigation Elements default: false html_remove_fixed_elements: anyOf: - type: boolean - type: 'null' title: Html Remove Fixed Elements default: false guess_xlsx_sheet_name: anyOf: - type: boolean - type: 'null' title: Guess Xlsx Sheet Name default: false page_separator: anyOf: - type: string - type: 'null' title: Page Separator bounding_box: anyOf: - type: string - type: 'null' title: Bounding Box bbox_top: anyOf: - type: number - type: 'null' title: Bbox Top bbox_right: anyOf: - type: number - type: 'null' title: Bbox Right bbox_bottom: anyOf: - type: number - type: 'null' title: Bbox Bottom bbox_left: anyOf: - type: number - type: 'null' title: Bbox Left target_pages: anyOf: - type: string - type: 'null' title: Target Pages use_vendor_multimodal_model: anyOf: - type: boolean - type: 'null' title: Use Vendor Multimodal Model default: false vendor_multimodal_model_name: anyOf: - type: string - type: 'null' title: Vendor Multimodal Model Name model: anyOf: - type: string - type: 'null' title: Model vendor_multimodal_api_key: anyOf: - type: string - type: 'null' title: Vendor Multimodal Api Key page_prefix: anyOf: - type: string - type: 'null' title: Page Prefix page_suffix: anyOf: - type: string - type: 'null' title: Page Suffix webhook_url: anyOf: - type: string - type: 'null' title: Webhook Url preset: anyOf: - type: string - type: 'null' title: Preset take_screenshot: anyOf: - type: boolean - type: 'null' title: Take Screenshot default: false is_formatting_instruction: anyOf: - type: boolean - type: 'null' title: Is Formatting Instruction default: true premium_mode: anyOf: - type: boolean - type: 'null' title: Premium Mode default: false continuous_mode: anyOf: - type: boolean - type: 'null' title: Continuous Mode default: false input_s3_path: anyOf: - type: string - type: 'null' title: Input S3 Path input_s3_region: anyOf: - type: string - type: 'null' title: Input S3 Region description: The region for the input S3 bucket. output_s3_path_prefix: anyOf: - type: string - type: 'null' title: Output S3 Path Prefix description: If specified, llamaParse will save the output to the specified path. All output file will use this 'prefix' should be a valid s3:// url output_s3_region: anyOf: - type: string - type: 'null' title: Output S3 Region description: The region for the output S3 bucket. project_id: anyOf: - type: string - type: 'null' title: Project Id azure_openai_deployment_name: anyOf: - type: string - type: 'null' title: Azure Openai Deployment Name azure_openai_endpoint: anyOf: - type: string - type: 'null' title: Azure Openai Endpoint azure_openai_api_version: anyOf: - type: string - type: 'null' title: Azure Openai Api Version azure_openai_key: anyOf: - type: string - type: 'null' title: Azure Openai Key input_url: anyOf: - type: string - type: 'null' title: Input Url http_proxy: anyOf: - type: string - type: 'null' title: Http Proxy auto_mode: anyOf: - type: boolean - type: 'null' title: Auto Mode default: false auto_mode_trigger_on_regexp_in_page: anyOf: - type: string - type: 'null' title: Auto Mode Trigger On Regexp In Page auto_mode_trigger_on_text_in_page: anyOf: - type: string - type: 'null' title: Auto Mode Trigger On Text In Page auto_mode_trigger_on_table_in_page: anyOf: - type: boolean - type: 'null' title: Auto Mode Trigger On Table In Page default: false auto_mode_trigger_on_image_in_page: anyOf: - type: boolean - type: 'null' title: Auto Mode Trigger On Image In Page default: false auto_mode_configuration_json: anyOf: - type: string - type: 'null' title: Auto Mode Configuration Json structured_output: anyOf: - type: boolean - type: 'null' title: Structured Output default: false structured_output_json_schema: anyOf: - type: string - type: 'null' title: Structured Output Json Schema structured_output_json_schema_name: anyOf: - type: string - type: 'null' title: Structured Output Json Schema Name max_pages: anyOf: - type: integer - type: 'null' title: Max Pages max_pages_enforced: anyOf: - type: integer - type: 'null' title: Max Pages Enforced extract_charts: anyOf: - type: boolean - type: 'null' title: Extract Charts default: false formatting_instruction: anyOf: - type: string - type: 'null' title: Formatting Instruction complemental_formatting_instruction: anyOf: - type: string - type: 'null' title: Complemental Formatting Instruction content_guideline_instruction: anyOf: - type: string - type: 'null' title: Content Guideline Instruction spreadsheet_extract_sub_tables: anyOf: - type: boolean - type: 'null' title: Spreadsheet Extract Sub Tables default: false spreadsheet_force_formula_computation: anyOf: - type: boolean - type: 'null' title: Spreadsheet Force Formula Computation default: false spreadsheet_include_hidden_sheets: anyOf: - type: boolean - type: 'null' title: Spreadsheet Include Hidden Sheets default: false inline_images_in_markdown: anyOf: - type: boolean - type: 'null' title: Inline Images In Markdown default: false job_timeout_in_seconds: anyOf: - type: number - type: 'null' title: Job Timeout In Seconds job_timeout_extra_time_per_page_in_seconds: anyOf: - type: number - type: 'null' title: Job Timeout Extra Time Per Page In Seconds strict_mode_image_extraction: anyOf: - type: boolean - type: 'null' title: Strict Mode Image Extraction default: false strict_mode_image_ocr: anyOf: - type: boolean - type: 'null' title: Strict Mode Image Ocr default: false strict_mode_reconstruction: anyOf: - type: boolean - type: 'null' title: Strict Mode Reconstruction default: false strict_mode_buggy_font: anyOf: - type: boolean - type: 'null' title: Strict Mode Buggy Font default: false save_images: anyOf: - type: boolean - type: 'null' title: Save Images default: true images_to_save: anyOf: - items: type: string enum: - screenshot - embedded - layout type: array - type: 'null' title: Images To Save hide_headers: anyOf: - type: boolean - type: 'null' title: Hide Headers default: false hide_footers: anyOf: - type: boolean - type: 'null' title: Hide Footers default: false page_header_prefix: anyOf: - type: string - type: 'null' title: Page Header Prefix page_header_suffix: anyOf: - type: string - type: 'null' title: Page Header Suffix page_footer_prefix: anyOf: - type: string - type: 'null' title: Page Footer Prefix page_footer_suffix: anyOf: - type: string - type: 'null' title: Page Footer Suffix remove_hidden_text: anyOf: - type: boolean - type: 'null' title: Remove Hidden Text default: false keep_page_separator_when_merging_tables: anyOf: - type: boolean - type: 'null' title: Keep Page Separator When Merging Tables default: false ignore_document_elements_for_layout_detection: anyOf: - type: boolean - type: 'null' title: Ignore Document Elements For Layout Detection default: false output_tables_as_HTML: anyOf: - type: boolean - type: 'null' title: Output Tables As Html default: false internal_is_screenshot_job: anyOf: - type: boolean - type: 'null' title: Internal Is Screenshot Job default: false parse_mode: anyOf: - $ref: '#/components/schemas/ParsingMode' - type: 'null' system_prompt: anyOf: - type: string - type: 'null' title: System Prompt system_prompt_append: anyOf: - type: string - type: 'null' title: System Prompt Append user_prompt: anyOf: - type: string - type: 'null' title: User Prompt page_error_tolerance: anyOf: - type: number - type: 'null' title: Page Error Tolerance default: 0.05 replace_failed_page_mode: anyOf: - $ref: '#/components/schemas/FailPageMode' - type: 'null' default: raw_text replace_failed_page_with_error_message_prefix: anyOf: - type: string - type: 'null' title: Replace Failed Page With Error Message Prefix replace_failed_page_with_error_message_suffix: anyOf: - type: string - type: 'null' title: Replace Failed Page With Error Message Suffix markdown_table_multiline_header_separator: anyOf: - type: string - type: 'null' title: Markdown Table Multiline Header Separator presentation_out_of_bounds_content: anyOf: - type: boolean - type: 'null' title: Presentation Out Of Bounds Content default: false presentation_skip_embedded_data: anyOf: - type: boolean - type: 'null' title: Presentation Skip Embedded Data default: false tier: anyOf: - type: string - type: 'null' title: Tier version: anyOf: - type: string - type: 'null' title: Version extract_printed_page_number: anyOf: - type: boolean - type: 'null' title: Extract Printed Page Number default: false enable_cost_optimizer: anyOf: - type: boolean - type: 'null' title: Enable Cost Optimizer type: type: string const: parse title: Type default: parse lang: type: string title: Lang description: The language. default: en outputBucket: anyOf: - type: string - type: 'null' title: Outputbucket description: The output bucket. pipeline_id: anyOf: - type: string - type: 'null' title: Pipeline Id description: The pipeline ID. type: object title: BatchParseJobConfig description: 'Generic parse job configuration for batch processing. This model contains the parsing configuration that applies to all files in a batch, but excludes file-specific fields like file_name, file_id, etc. Those file-specific fields are populated from DirectoryFile data when creating individual ParseJobRecordCreate instances for each file. The fields in this model should be generic settings that apply uniformly to all files being processed in the batch.' BatchParseJobRecordCreate: properties: job_name: type: string const: parse_raw_file_job title: Job Name default: parse_raw_file_job partitions: additionalProperties: anyOf: - type: string format: uuid - type: string propertyNames: $ref: '#/components/schemas/PartitionNames' type: object title: Partitions description: The partitions for this execution. Used for determining where to save job output. parameters: anyOf: - $ref: '#/components/schemas/BatchParseJobConfig' - type: 'null' description: The generic parse configuration for all files in the batch. session_id: anyOf: - type: string format: uuid - type: 'null' title: Session Id description: The upstream request ID that created this job. Used for tracking the job across services. correlation_id: anyOf: - type: string format: uuid - type: 'null' title: Correlation Id description: The correlation ID for this job. Used for tracking the job across services. parent_job_execution_id: anyOf: - type: string format: uuid - type: 'null' title: Parent Job Execution Id description: The ID of the parent job execution. user_id: anyOf: - type: string - type: 'null' title: User Id description: The ID of the user that created this job project_id: anyOf: - type: string format: uuid - type: 'null' title: Project Id description: The ID of the project this job belongs to. webhook_url: anyOf: - type: string - type: 'null' title: Webhook Url description: The URL that needs to be called at the end of the parsing job. type: object title: BatchParseJobRecordCreate description: "Batch-specific parse job record for batch processing.\n\nThis\ \ model contains the metadata and configuration for a batch parse job,\nbut\ \ excludes file-specific information. It's used as input to the batch\nparent\ \ workflow and combined with DirectoryFile data to create full\nParseJobRecordCreate\ \ instances for each file.\n\nAttributes:\n job_name: Must be PARSE_RAW_FILE\n\ \ partitions: Partitions for job output location\n parameters: Generic\ \ parse configuration (BatchParseJobConfig)\n session_id: Upstream request\ \ ID for tracking\n correlation_id: Correlation ID for cross-service tracking\n\ \ parent_job_execution_id: Parent job execution ID if nested\n user_id:\ \ User who created the job\n project_id: Project this job belongs to\n\ \ webhook_url: Optional webhook URL for job completion notifications" BatchQueryResponse: properties: items: items: $ref: '#/components/schemas/BatchResponse' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: BatchQueryResponse description: Paginated list of batches. BatchResponse: properties: id: anyOf: - type: string - type: string format: uuid title: Id description: Unique identifier created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime project_id: type: string title: Project Id description: Project this batch belongs to. source_directory_id: type: string title: Source Directory Id description: Directory being processed. config: $ref: '#/components/schemas/BatchConfiguration' description: Batch configuration snapshot. status: type: string enum: - PENDING - THROTTLED - RUNNING - COMPLETED - FAILED - CANCELLED title: Status description: Current batch status. results: anyOf: - items: $ref: '#/components/schemas/BatchResult' type: array - type: 'null' title: Results description: Expanded per-file result mappings. Null unless requested with expand=results, or while the batch is still running. type: object required: - id - project_id - source_directory_id - config - status title: BatchResponse description: "A top-level batch.\n\nExample:\n {\n \"id\": \"bat-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\"\ ,\n \"project_id\": \"prj-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\",\n\ \ \"source_directory_id\": \"dir-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\"\ ,\n \"config\": {\n \"job\": {\n \"type\"\ : \"parse_v2\",\n \"configuration_id\": \"cfg-PARSE_AGENTIC\"\ \n }\n },\n \"status\": \"COMPLETED\",\n \"\ results\": [\n {\n \"source_directory_file_id\"\ : \"dfl-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\",\n \"job_reference\"\ : {\n \"type\": \"parse_v2\",\n \"id\"\ : \"pjb-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\"\n },\n \ \ \"error_message\": null\n }\n ]\n }\n\nBatch-level\ \ ``FAILED`` means the orchestration failed and cannot provide a\nreliable\ \ per-file result set. ``results`` is only populated when explicitly\nrequested\ \ with ``expand=results`` and may be ``null`` while a batch is still\nrunning." BatchResult: properties: source_directory_file_id: type: string title: Source Directory File Id description: Source directory file processed by this batch. examples: - dfl-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee job_reference: anyOf: - $ref: '#/components/schemas/BatchJobReference' - type: 'null' description: Job created for this file, once known. error_message: anyOf: - type: string - type: 'null' title: Error Message description: Batch-level mapping error if the system could not create or associate a job for this source file. type: object required: - source_directory_file_id title: BatchResult description: "Result projection for one source directory file in a batch.\n\n\ Example:\n {\n \"source_directory_file_id\": \"dfl-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\"\ ,\n \"job_reference\": {\n \"type\": \"parse_v2\",\n \ \ \"id\": \"pjb-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\"\n },\n\ \ \"error_message\": null\n }\n\nThis is a projection of directory-sync\ \ state, not a separate child\nresource that callers need to create. The source\ \ directory file ID is the\nstable correlation key. Underlying job progress\ \ and failures should be\nresolved through the referenced product job endpoint." BedrockEmbedding: properties: model_name: type: string title: Model Name description: The modelId of the Bedrock model to use. default: amazon.titan-embed-text-v1 embed_batch_size: type: integer maximum: 2048.0 exclusiveMinimum: 0.0 title: Embed Batch Size description: The batch size for embedding calls. default: 10 num_workers: anyOf: - type: integer - type: 'null' title: Num Workers description: The number of workers to use for async embedding calls. profile_name: anyOf: - type: string - type: 'null' title: Profile Name description: The name of aws profile to use. If not given, then the default profile is used. aws_access_key_id: anyOf: - type: string - type: 'null' title: Aws Access Key Id description: AWS Access Key ID to use aws_secret_access_key: anyOf: - type: string - type: 'null' title: Aws Secret Access Key description: AWS Secret Access Key to use aws_session_token: anyOf: - type: string - type: 'null' title: Aws Session Token description: AWS Session Token to use region_name: anyOf: - type: string - type: 'null' title: Region Name description: AWS region name to use. Uses region configured in AWS CLI if not passed max_retries: type: integer exclusiveMinimum: 0.0 title: Max Retries description: The maximum number of API retries. default: 10 timeout: type: number title: Timeout description: The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts. default: 60.0 additional_kwargs: additionalProperties: true type: object title: Additional Kwargs description: Additional kwargs for the bedrock client. class_name: type: string title: Class Name default: BedrockEmbedding type: object title: BedrockEmbedding BedrockEmbeddingConfig: properties: type: type: string const: BEDROCK_EMBEDDING title: Type description: Type of the embedding model. default: BEDROCK_EMBEDDING component: $ref: '#/components/schemas/BedrockEmbedding' description: Configuration for the Bedrock embedding model. type: object title: BedrockEmbeddingConfig Body_import_pipeline_metadata_api_v1_pipelines__pipeline_id__metadata_put: properties: upload_file: type: string format: binary title: Upload File type: object required: - upload_file title: Body_import_pipeline_metadata_api_v1_pipelines__pipeline_id__metadata_put Body_run_job_on_file_api_v1_extraction_jobs_file_post: properties: extraction_agent_id: type: string format: uuid title: Extraction Agent Id description: The id of the extraction agent file: type: string format: binary title: File description: The file to run the job on data_schema_override: anyOf: - type: string - type: 'null' title: Data Schema Override description: The data schema to override the extraction agent's data schema with as a JSON string config_override: anyOf: - type: string - type: 'null' title: Config Override description: The config to override the extraction agent's config with as a JSON string type: object required: - extraction_agent_id - file title: Body_run_job_on_file_api_v1_extraction_jobs_file_post Body_screenshot_api_v1_parsing_screenshot_post: properties: file: anyOf: - type: string format: binary - type: 'null' title: File do_not_cache: type: boolean title: Do Not Cache default: false http_proxy: type: string title: Http Proxy input_s3_path: type: string title: Input S3 Path default: '' input_s3_region: type: string title: Input S3 Region default: '' input_url: type: string title: Input Url invalidate_cache: type: boolean title: Invalidate Cache default: false max_pages: anyOf: - type: integer - type: 'null' title: Max Pages output_s3_path_prefix: type: string title: Output S3 Path Prefix default: '' output_s3_region: type: string title: Output S3 Region default: '' target_pages: type: string title: Target Pages default: '' webhook_url: type: string title: Webhook Url default: '' webhook_configurations: type: string title: Webhook Configurations default: '' job_timeout_in_seconds: type: number title: Job Timeout In Seconds job_timeout_extra_time_per_page_in_seconds: type: number title: Job Timeout Extra Time Per Page In Seconds type: object title: Body_screenshot_api_v1_parsing_screenshot_post Body_upload_file_api_v1_beta_files_post: properties: purpose: type: string title: Purpose description: 'The intended purpose of the file. Valid values: ''user_data'', ''parse'', ''extract'', ''split'', ''classify'', ''sheet'', ''agent_app''. This determines the storage and retention policy for the file.' file: type: string format: binary title: File description: The file to upload external_file_id: anyOf: - type: string - type: 'null' title: External File Id description: The ID of the file in the external system type: object required: - purpose - file title: Body_upload_file_api_v1_beta_files_post Body_upload_file_api_v1_parsing_upload_post: properties: file: anyOf: - type: string format: binary - type: 'null' title: File adaptive_long_table: type: boolean title: Adaptive Long Table default: false annotate_links: type: boolean title: Annotate Links default: false auto_mode: type: boolean title: Auto Mode default: false auto_mode_trigger_on_image_in_page: type: boolean title: Auto Mode Trigger On Image In Page default: false auto_mode_trigger_on_table_in_page: type: boolean title: Auto Mode Trigger On Table In Page default: false auto_mode_trigger_on_text_in_page: type: string title: Auto Mode Trigger On Text In Page auto_mode_trigger_on_regexp_in_page: type: string title: Auto Mode Trigger On Regexp In Page auto_mode_configuration_json: type: string title: Auto Mode Configuration Json azure_openai_api_version: type: string title: Azure Openai Api Version azure_openai_deployment_name: type: string title: Azure Openai Deployment Name azure_openai_endpoint: type: string title: Azure Openai Endpoint azure_openai_key: type: string title: Azure Openai Key bbox_bottom: type: number title: Bbox Bottom bbox_left: type: number title: Bbox Left bbox_right: type: number title: Bbox Right bbox_top: type: number title: Bbox Top compact_markdown_table: type: boolean title: Compact Markdown Table default: false disable_ocr: type: boolean title: Disable Ocr default: false disable_reconstruction: type: boolean title: Disable Reconstruction default: false disable_image_extraction: type: boolean title: Disable Image Extraction default: false do_not_cache: type: boolean title: Do Not Cache default: false do_not_unroll_columns: type: boolean title: Do Not Unroll Columns default: false extract_charts: type: boolean title: Extract Charts default: false guess_xlsx_sheet_name: type: boolean title: Guess Xlsx Sheet Name default: false high_res_ocr: type: boolean title: High Res Ocr default: false html_make_all_elements_visible: type: boolean title: Html Make All Elements Visible default: false layout_aware: type: boolean title: Layout Aware default: false specialized_chart_parsing_agentic: type: boolean title: Specialized Chart Parsing Agentic default: false specialized_chart_parsing_plus: type: boolean title: Specialized Chart Parsing Plus default: false specialized_chart_parsing_efficient: type: boolean title: Specialized Chart Parsing Efficient default: false specialized_image_parsing: type: boolean title: Specialized Image Parsing default: false precise_bounding_box: type: boolean title: Precise Bounding Box default: false line_level_bounding_box: type: boolean title: Line Level Bounding Box default: false html_remove_fixed_elements: type: boolean title: Html Remove Fixed Elements default: false html_remove_navigation_elements: type: boolean title: Html Remove Navigation Elements default: false http_proxy: type: string title: Http Proxy input_s3_path: type: string title: Input S3 Path default: '' input_s3_region: type: string title: Input S3 Region default: '' input_url: type: string title: Input Url invalidate_cache: type: boolean title: Invalidate Cache default: false language: items: $ref: '#/components/schemas/ParserLanguages' type: array title: Language default: - en extract_layout: type: boolean title: Extract Layout default: false max_pages: anyOf: - type: integer - type: 'null' title: Max Pages merge_tables_across_pages_in_markdown: type: boolean title: Merge Tables Across Pages In Markdown default: false outlined_table_extraction: type: boolean title: Outlined Table Extraction default: false aggressive_table_extraction: type: boolean title: Aggressive Table Extraction default: false output_pdf_of_document: type: boolean title: Output Pdf Of Document default: false output_s3_path_prefix: type: string title: Output S3 Path Prefix default: '' output_s3_region: type: string title: Output S3 Region default: '' page_prefix: type: string title: Page Prefix default: '' page_separator: type: string title: Page Separator page_suffix: type: string title: Page Suffix default: '' preserve_layout_alignment_across_pages: type: boolean title: Preserve Layout Alignment Across Pages default: false preserve_very_small_text: type: boolean title: Preserve Very Small Text default: false skip_diagonal_text: type: boolean title: Skip Diagonal Text default: false spreadsheet_extract_sub_tables: type: boolean title: Spreadsheet Extract Sub Tables default: true spreadsheet_force_formula_computation: type: boolean title: Spreadsheet Force Formula Computation default: false inline_images_in_markdown: type: boolean title: Inline Images In Markdown default: false structured_output: type: boolean title: Structured Output default: false structured_output_json_schema: type: string title: Structured Output Json Schema structured_output_json_schema_name: type: string title: Structured Output Json Schema Name take_screenshot: type: boolean title: Take Screenshot default: false target_pages: type: string title: Target Pages default: '' vendor_multimodal_api_key: type: string title: Vendor Multimodal Api Key default: '' vendor_multimodal_model_name: type: string title: Vendor Multimodal Model Name model: type: string title: Model webhook_url: type: string title: Webhook Url default: '' webhook_configurations: type: string title: Webhook Configurations default: '' preset: type: string title: Preset default: '' parse_mode: anyOf: - $ref: '#/components/schemas/ParsingMode' - type: 'null' page_error_tolerance: type: number title: Page Error Tolerance default: 0.05 replace_failed_page_mode: anyOf: - $ref: '#/components/schemas/FailPageMode' - type: 'null' replace_failed_page_with_error_message_prefix: type: string title: Replace Failed Page With Error Message Prefix default: '' replace_failed_page_with_error_message_suffix: type: string title: Replace Failed Page With Error Message Suffix default: '' system_prompt: type: string title: System Prompt default: '' system_prompt_append: type: string title: System Prompt Append default: '' user_prompt: type: string title: User Prompt default: '' job_timeout_in_seconds: type: number title: Job Timeout In Seconds job_timeout_extra_time_per_page_in_seconds: type: number title: Job Timeout Extra Time Per Page In Seconds strict_mode_image_extraction: type: boolean title: Strict Mode Image Extraction default: false strict_mode_image_ocr: type: boolean title: Strict Mode Image Ocr default: false strict_mode_reconstruction: type: boolean title: Strict Mode Reconstruction default: false strict_mode_buggy_font: type: boolean title: Strict Mode Buggy Font default: false save_images: type: boolean title: Save Images default: true ignore_document_elements_for_layout_detection: type: boolean title: Ignore Document Elements For Layout Detection default: false keep_page_separator_when_merging_tables: type: boolean title: Keep Page Separator When Merging Tables default: false output_tables_as_HTML: type: boolean title: Output Tables As Html default: false markdown_table_multiline_header_separator: type: string title: Markdown Table Multiline Header Separator use_vendor_multimodal_model: type: boolean title: Use Vendor Multimodal Model default: false bounding_box: type: string title: Bounding Box default: '' gpt4o_mode: type: boolean title: Gpt4O Mode default: false gpt4o_api_key: type: string title: Gpt4O Api Key default: '' complemental_formatting_instruction: type: string title: Complemental Formatting Instruction content_guideline_instruction: type: string title: Content Guideline Instruction premium_mode: type: boolean title: Premium Mode default: false is_formatting_instruction: type: boolean title: Is Formatting Instruction default: true continuous_mode: type: boolean title: Continuous Mode default: false parsing_instruction: type: string title: Parsing Instruction default: '' fast_mode: type: boolean title: Fast Mode default: false formatting_instruction: type: string title: Formatting Instruction hide_headers: type: boolean title: Hide Headers default: false hide_footers: type: boolean title: Hide Footers default: false page_header_prefix: type: string title: Page Header Prefix page_header_suffix: type: string title: Page Header Suffix page_footer_prefix: type: string title: Page Footer Prefix page_footer_suffix: type: string title: Page Footer Suffix remove_hidden_text: type: boolean title: Remove Hidden Text default: false presentation_out_of_bounds_content: type: boolean title: Presentation Out Of Bounds Content default: false presentation_skip_embedded_data: type: boolean title: Presentation Skip Embedded Data default: false extract_printed_page_number: type: boolean title: Extract Printed Page Number default: false tier: type: string title: Tier version: type: string title: Version type: object title: Body_upload_file_api_v1_parsing_upload_post Body_upload_file_to_directory_api_v1_beta_directories__directory_id__files_upload_post: properties: upload_file: type: string format: binary title: Upload File unique_id: anyOf: - type: string - type: 'null' title: Unique Id display_name: anyOf: - type: string - type: 'null' title: Display Name external_file_id: anyOf: - type: string - type: 'null' title: External File Id metadata: anyOf: - type: string - type: 'null' title: Metadata description: User metadata as a JSON object string. examples: - '{"source": "web", "priority": 1}' type: object required: - upload_file title: Body_upload_file_to_directory_api_v1_beta_directories__directory_id__files_upload_post BoxAuthMechanism: type: string enum: - developer_token - ccg title: BoxAuthMechanism CharacterChunkingConfig: properties: chunk_size: type: integer exclusiveMinimum: 0.0 title: Chunk Size default: 1024 chunk_overlap: type: integer title: Chunk Overlap default: 200 gte: 0 mode: type: string const: character title: Mode default: character type: object title: CharacterChunkingConfig ChatData: properties: retrieval_parameters: $ref: '#/components/schemas/PresetRetrievalParams' llm_parameters: anyOf: - $ref: '#/components/schemas/LLMParameters' - type: 'null' class_name: type: string title: Class Name default: base_component type: object title: ChatData ChatInputParams: properties: messages: items: $ref: '#/components/schemas/InputMessage' type: array minItems: 1 title: Messages data: $ref: '#/components/schemas/ChatData' class_name: type: string title: Class Name default: base_component type: object title: ChatInputParams ChatMessage: properties: id: type: string format: uuid title: Id index: type: integer title: Index description: The index of the message in the chat. annotations: items: $ref: '#/components/schemas/MessageAnnotation' type: array title: Annotations description: Retrieval annotations for the message. role: $ref: '#/components/schemas/MessageRole' description: The role of the message. content: anyOf: - type: string - type: 'null' title: Content description: Text content of the generation additional_kwargs: additionalProperties: type: string type: object title: Additional Kwargs description: Additional arguments passed to the model class_name: type: string title: Class Name default: base_component type: object required: - id - index - role title: ChatMessage ChatParams: properties: index_ids: items: type: string type: array minItems: 1 title: Index Ids description: Indexes to retrieve data from. examples: - - idx-abc123 - idx-def456 prompt: type: string maxLength: 1000 title: Prompt description: User message for this chat turn. examples: - What were the main findings in Q3? type: object required: - index_ids - prompt title: ChatParams description: Request body for running a chat turn against one or more indexes. ChatSessionSummary: properties: session_id: type: string title: Session Id description: Unique session identifier. examples: - ses-abc123 generated_title: anyOf: - type: string - type: 'null' title: Generated Title description: Auto-generated title derived from the first user message. examples: - What were the main findings in Q3?... last_updated_at: type: string title: Last Updated At description: ISO-format timestamp showing when the session was last updated. examples: - '2026-04-22T12:34:41.342245' job_metadata: anyOf: - $ref: '#/components/schemas/JobMetadata' - type: 'null' description: Token usage and status from the most recent run. Null if the session has not been run yet. index_ids: anyOf: - items: type: string type: array - type: 'null' title: Index Ids description: Indexes this session is bound to. Null on unbound sessions. examples: - - idx-abc123 - idx-def456 type: object required: - session_id - last_updated_at title: ChatSessionSummary description: Summary of a chat session, including its title and last run metadata. ClassificationResult: properties: reasoning: type: string title: Reasoning description: Step-by-step explanation of why this classification was chosen and the confidence score assigned confidence: type: number maximum: 1.0 minimum: 0.0 title: Confidence description: Confidence score of the classification (0.0-1.0) type: anyOf: - type: string - type: 'null' title: Type description: The document type that best matches, or null if no match. type: object required: - reasoning - confidence - type title: ClassificationResult description: Result of classifying a single file. ClassifierRule: properties: type: type: string maxLength: 50 minLength: 1 title: Type description: The document type to assign when this rule matches (e.g., 'invoice', 'receipt', 'contract') examples: - invoice - receipt - contract - report - proposal description: type: string maxLength: 500 minLength: 10 title: Description description: Natural language description of what to classify. Be specific about the content characteristics that identify this document type. examples: - contains invoice number, line items, and total amount - purchase receipt with transaction info and merchant details - legal contract with terms, conditions, and signatures type: object required: - type - description title: ClassifierRule description: 'A rule for classifying documents - v0 simplified version. This represents a single classification rule that will be applied to documents. All rules are content-based and use natural language descriptions.' ClassifyJob: properties: status: $ref: '#/components/schemas/StatusEnum' description: The status of the classify job effective_at: type: string format: date-time title: Effective At job_record_id: anyOf: - type: string - type: 'null' title: Job Record Id description: The job record ID associated with this status, if any. error_message: anyOf: - type: string - type: 'null' title: Error Message description: Error message for the latest job attempt, if any. id: type: string format: uuid title: Id description: Unique identifier created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime rules: items: $ref: '#/components/schemas/ClassifierRule' type: array minItems: 1 title: Rules description: The rules to classify the files user_id: type: string title: User Id description: The ID of the user project_id: type: string format: uuid title: Project Id description: The ID of the project mode: $ref: '#/components/schemas/ClassifyMode' description: The classification mode to use default: FAST parsing_configuration: $ref: '#/components/schemas/ClassifyParsingConfiguration' description: The configuration for the parsing job default: lang: en max_pages: 5 type: object required: - status - id - rules - user_id - project_id title: ClassifyJob description: A classify job. ClassifyJobCreate: properties: rules: items: $ref: '#/components/schemas/ClassifierRule' type: array minItems: 1 title: Rules description: The rules to classify the files mode: $ref: '#/components/schemas/ClassifyMode' description: The classification mode to use default: FAST file_ids: items: type: string format: uuid type: array maxItems: 500 minItems: 1 title: File Ids description: The IDs of the files to classify parsing_configuration: $ref: '#/components/schemas/ClassifyParsingConfiguration' description: The configuration for the parsing job default: lang: en max_pages: 5 webhook_configurations: items: $ref: '#/components/schemas/LlamaParseWebhookConfiguration' type: array title: Webhook Configurations description: List of webhook configurations for notifications type: object required: - rules - file_ids title: ClassifyJobCreate description: A classify job. ClassifyJobResults: properties: items: items: $ref: '#/components/schemas/FileClassification' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: ClassifyJobResults description: Response model for the classify endpoint following AIP-132 pagination standard. ClassifyMode: type: string enum: - FAST - MULTIMODAL title: ClassifyMode description: Supported classification execution modes. ClassifyParsingConfiguration: properties: lang: $ref: '#/components/schemas/ParserLanguages' description: The language to parse the files in default: en max_pages: anyOf: - type: integer - type: 'null' title: Max Pages description: The maximum number of pages to parse default: 5 target_pages: anyOf: - items: type: integer type: array minItems: 1 - type: 'null' title: Target Pages description: The pages to target for parsing (0-indexed, so first page is at 0) type: object title: ClassifyParsingConfiguration description: Parsing configuration for a classify job. ClassifyV2Configuration: properties: rules: items: $ref: '#/components/schemas/ClassifyV2Rule' type: array minItems: 1 title: Rules description: Classify rules to evaluate against the document (at least one required) mode: type: string const: FAST title: Mode description: Classify execution mode default: FAST parsing_configuration: anyOf: - $ref: '#/components/schemas/ClassifyV2ParsingConfiguration' - type: 'null' description: Parsing configuration for controlling which pages are read type: object required: - rules title: ClassifyV2Configuration description: Configuration for a classify job. ClassifyV2JobCreateRequest: properties: webhook_configurations: anyOf: - items: $ref: '#/components/schemas/WebhookConfiguration' type: array - type: 'null' title: Webhook Configurations description: Outbound webhook endpoints to notify on job status changes configuration_id: anyOf: - type: string - type: 'null' title: Configuration Id description: Saved configuration ID examples: - cfg-11111111-2222-3333-4444-555555555555 configuration: anyOf: - $ref: '#/components/schemas/ClassifyV2Configuration' - type: 'null' description: Inline classify configuration (required if configuration_id is not provided) file_input: anyOf: - type: string maxLength: 200 - type: 'null' title: File Input description: File ID or parse job ID to classify examples: - dfl-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee file_id: anyOf: - type: string - type: 'null' title: File Id description: 'Deprecated: use file_input instead' deprecated: true examples: - dfl-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee parse_job_id: anyOf: - type: string - type: 'null' title: Parse Job Id description: 'Deprecated: use file_input instead' deprecated: true examples: - pjb-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee transaction_id: anyOf: - type: string - type: 'null' title: Transaction Id description: Idempotency key scoped to the project examples: - tx-unique-idempotency-key type: object title: ClassifyV2JobCreateRequest description: Request to create a classify job. ClassifyV2JobQueryResponse: properties: items: items: $ref: '#/components/schemas/ClassifyV2JobResponse' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: ClassifyV2JobQueryResponse description: Response schema for paginated classify job queries. ClassifyV2JobResponse: properties: id: anyOf: - type: string - type: string format: uuid title: Id description: Unique identifier created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime file_input: type: string title: File Input description: ID of the input file or parse job project_id: type: string title: Project Id description: Project this job belongs to user_id: type: string title: User Id description: User who created this job status: type: string enum: - PENDING - RUNNING - COMPLETED - FAILED title: Status description: 'Current job status: PENDING, RUNNING, COMPLETED, or FAILED' document_input_type: type: string enum: - url - file_id - parse_job_id title: Document Input Type description: Whether the input was a file or parse job (FILE or PARSE_JOB) configuration: $ref: '#/components/schemas/ClassifyV2Configuration' description: Classify configuration used for this job result: anyOf: - $ref: '#/components/schemas/ClassifyV2Result' - type: 'null' description: Classify result — available when status is COMPLETED error_message: anyOf: - type: string - type: 'null' title: Error Message description: Error message if job failed configuration_id: anyOf: - type: string - type: 'null' title: Configuration Id description: Product configuration ID transaction_id: anyOf: - type: string - type: 'null' title: Transaction Id description: Idempotency key parse_job_id: anyOf: - type: string - type: 'null' title: Parse Job Id description: Associated parse job ID type: object required: - id - file_input - project_id - user_id - status - document_input_type - configuration title: ClassifyV2JobResponse description: Response for a classify job. ClassifyV2Parameters: properties: rules: items: $ref: '#/components/schemas/ClassifyV2Rule' type: array minItems: 1 title: Rules description: Classify rules to evaluate against the document (at least one required) mode: type: string const: FAST title: Mode description: Classify execution mode default: FAST parsing_configuration: anyOf: - $ref: '#/components/schemas/ClassifyV2ParsingConfiguration' - type: 'null' description: Parsing configuration for controlling which pages are read product_type: type: string const: classify_v2 title: Product Type description: Product type. type: object required: - rules - product_type title: ClassifyV2Parameters description: Typed parameters for a *classify v2* product configuration. ClassifyV2ParsingConfiguration: properties: lang: type: string title: Lang description: ISO 639-1 language code for the document default: en examples: - en - es - zh target_pages: anyOf: - type: string - type: 'null' title: Target Pages description: Comma-separated page numbers or ranges to process (1-based). Omit to process all pages. examples: - 1,3,5-7 - 1-3,8-10 max_pages: anyOf: - type: integer minimum: 1.0 - type: 'null' title: Max Pages description: Maximum number of pages to process. Omit for no limit. examples: - 10 type: object title: ClassifyV2ParsingConfiguration description: Parsing configuration for classify jobs. ClassifyV2Result: properties: reasoning: type: string title: Reasoning description: Why the document matched (or didn't match) the returned rule confidence: type: number maximum: 1.0 minimum: 0.0 title: Confidence description: Confidence score between 0.0 and 1.0 type: anyOf: - type: string - type: 'null' title: Type description: Matched rule type, or null if no rule matched type: object required: - reasoning - confidence - type title: ClassifyV2Result description: Result of classifying a document. ClassifyV2Rule: properties: type: type: string maxLength: 50 minLength: 1 title: Type description: Document type to assign when rule matches examples: - invoice - receipt - contract - report - proposal description: type: string maxLength: 500 minLength: 10 title: Description description: Natural language criteria for matching this rule examples: - contains invoice number, line items, and total amount - purchase receipt with transaction info and merchant details - legal contract with terms, conditions, and signatures type: object required: - type - description title: ClassifyV2Rule description: A rule for classifying documents. CloudAstraDBVectorStore: properties: supports_nested_metadata_filters: type: boolean const: true title: Supports Nested Metadata Filters default: true token: type: string format: password title: Token description: The Astra DB Application Token to use writeOnly: true api_endpoint: type: string title: Api Endpoint description: The Astra DB JSON API endpoint for your database collection_name: type: string title: Collection Name description: Collection name to use. If not existing, it will be created embedding_dimension: type: integer title: Embedding Dimension description: Length of the embedding vectors in use keyspace: anyOf: - type: string - type: 'null' title: Keyspace description: The keyspace to use. If not provided, 'default_keyspace' class_name: type: string title: Class Name default: CloudAstraDBVectorStore type: object required: - token - api_endpoint - collection_name - embedding_dimension title: CloudAstraDBVectorStore description: "Cloud AstraDB Vector Store.\n\nThis class is used to store the\ \ configuration for an AstraDB vector store, so that it can be\ncreated and\ \ used in LlamaCloud.\n\nArgs:\n token (str): The Astra DB Application\ \ Token to use.\n api_endpoint (str): The Astra DB JSON API endpoint for\ \ your database.\n collection_name (str): Collection name to use. If not\ \ existing, it will be created.\n embedding_dimension (int): Length of\ \ the embedding vectors in use.\n keyspace (optional[str]): The keyspace\ \ to use. If not provided, 'default_keyspace'" CloudAzStorageBlobDataSource: properties: supports_access_control: type: boolean title: Supports Access Control default: false container_name: type: string title: Container Name description: The name of the Azure Storage Blob container to read from. account_url: type: string title: Account Url description: The Azure Storage Blob account URL to use for authentication. blob: anyOf: - type: string - type: 'null' title: Blob description: The blob name to read from. prefix: anyOf: - type: string - type: 'null' title: Prefix description: The prefix of the Azure Storage Blob objects to read from. account_name: anyOf: - type: string - type: 'null' title: Account Name description: The Azure Storage Blob account name to use for authentication. account_key: anyOf: - type: string format: password writeOnly: true - type: 'null' title: Account Key description: The Azure Storage Blob account key to use for authentication. tenant_id: anyOf: - type: string - type: 'null' title: Tenant Id description: The Azure AD tenant ID to use for authentication. client_id: anyOf: - type: string - type: 'null' title: Client Id description: The Azure AD client ID to use for authentication. client_secret: anyOf: - type: string format: password writeOnly: true - type: 'null' title: Client Secret description: The Azure AD client secret to use for authentication. class_name: type: string title: Class Name default: CloudAzStorageBlobDataSource type: object required: - container_name - account_url title: CloudAzStorageBlobDataSource CloudAzureAISearchVectorStore: properties: supports_nested_metadata_filters: type: boolean const: true title: Supports Nested Metadata Filters default: true search_service_api_key: type: string format: password title: Search Service Api Key writeOnly: true search_service_endpoint: type: string title: Search Service Endpoint search_service_api_version: anyOf: - type: string - type: 'null' title: Search Service Api Version index_name: anyOf: - type: string - type: 'null' title: Index Name filterable_metadata_field_keys: anyOf: - additionalProperties: true type: object - type: 'null' title: Filterable Metadata Field Keys embedding_dimension: anyOf: - type: integer - type: 'null' title: Embedding Dimension client_id: anyOf: - type: string - type: 'null' title: Client Id client_secret: anyOf: - type: string format: password writeOnly: true - type: 'null' title: Client Secret tenant_id: anyOf: - type: string - type: 'null' title: Tenant Id class_name: type: string title: Class Name default: CloudAzureAISearchVectorStore type: object required: - search_service_api_key - search_service_endpoint title: CloudAzureAISearchVectorStore description: Cloud Azure AI Search Vector Store. CloudBoxDataSource: properties: supports_access_control: type: boolean title: Supports Access Control default: false folder_id: anyOf: - type: string - type: 'null' title: Folder Id description: The ID of the Box folder to read from. authentication_mechanism: $ref: '#/components/schemas/BoxAuthMechanism' description: The type of authentication to use (Developer Token or CCG) developer_token: anyOf: - type: string format: password writeOnly: true - type: 'null' title: Developer Token description: Developer token for authentication if authentication_mechanism is 'developer_token'. client_id: anyOf: - type: string - type: 'null' title: Client Id description: Box API key used for identifying the application the user is authenticating with client_secret: anyOf: - type: string format: password writeOnly: true - type: 'null' title: Client Secret description: Box API secret used for making auth requests. user_id: anyOf: - type: string - type: 'null' title: User Id description: Box User ID, if provided authenticates as user. enterprise_id: anyOf: - type: string - type: 'null' title: Enterprise Id description: Box Enterprise ID, if provided authenticates as service. class_name: type: string title: Class Name default: CloudBoxDataSource type: object required: - authentication_mechanism title: CloudBoxDataSource CloudConfluenceDataSource: properties: supports_access_control: type: boolean title: Supports Access Control default: false server_url: type: string title: Server Url description: The server URL of the Confluence instance. authentication_mechanism: type: string title: Authentication Mechanism description: Type of Authentication for connecting to Confluence APIs. user_name: anyOf: - type: string - type: 'null' title: User Name description: The username to use for authentication. api_token: anyOf: - type: string format: password writeOnly: true - type: 'null' title: Api Token description: The API token to use for authentication. space_key: anyOf: - type: string - type: 'null' title: Space Key description: The space key to read from. page_ids: anyOf: - type: string - type: 'null' title: Page Ids description: The page IDs of the Confluence to read from. cql: anyOf: - type: string - type: 'null' title: Cql description: The CQL query to use for fetching pages. label: anyOf: - type: string - type: 'null' title: Label description: The label to use for fetching pages. index_restricted_pages: type: boolean title: Index Restricted Pages description: Whether to index restricted pages. default: false keep_markdown_format: type: boolean title: Keep Markdown Format description: Whether to keep the markdown format. failure_handling: $ref: '#/components/schemas/FailureHandlingConfig' description: "Configuration for handling failures during processing. Key-value\ \ object controlling failure handling behaviors.\n\nExample:\n{\n \"\ skip_list_failures\": true\n}\n\nCurrently supports:\n- skip_list_failures:\ \ Skip failed batches/lists and continue processing" class_name: type: string title: Class Name default: CloudConfluenceDataSource type: object required: - authentication_mechanism - server_url title: CloudConfluenceDataSource CloudDocument: properties: text: type: string title: Text metadata: additionalProperties: true type: object title: Metadata excluded_embed_metadata_keys: items: type: string type: array title: Excluded Embed Metadata Keys default: [] excluded_llm_metadata_keys: items: type: string type: array title: Excluded Llm Metadata Keys default: [] page_positions: anyOf: - items: type: integer type: array - type: 'null' title: Page Positions description: indices in the CloudDocument.text where a new page begins. e.g. Second page starts at index specified by page_positions[1]. id: type: string title: Id status_metadata: anyOf: - additionalProperties: true type: object - type: 'null' title: Status Metadata type: object required: - text - metadata - id title: CloudDocument description: Cloud document stored in S3. CloudDocumentCreate: properties: text: type: string title: Text metadata: additionalProperties: true type: object title: Metadata excluded_embed_metadata_keys: items: type: string type: array title: Excluded Embed Metadata Keys default: [] excluded_llm_metadata_keys: items: type: string type: array title: Excluded Llm Metadata Keys default: [] page_positions: anyOf: - items: type: integer type: array - type: 'null' title: Page Positions description: indices in the CloudDocument.text where a new page begins. e.g. Second page starts at index specified by page_positions[1]. id: anyOf: - type: string - type: 'null' title: Id type: object required: - text - metadata title: CloudDocumentCreate description: Create a new cloud document. CloudGoogleDriveDataSource: properties: supports_access_control: type: boolean title: Supports Access Control default: false folder_id: type: string title: Folder Id description: The ID of the Google Drive folder to read from. service_account_key: anyOf: - additionalProperties: type: string type: object description: A dictionary containing secret values - type: 'null' title: Service Account Key description: The service account key JSON to use for authentication. class_name: type: string title: Class Name default: CloudGoogleDriveDataSource type: object required: - folder_id title: CloudGoogleDriveDataSource CloudJiraDataSource: properties: supports_access_control: type: boolean title: Supports Access Control default: false email: anyOf: - type: string - type: 'null' title: Email description: The email address to use for authentication. api_token: anyOf: - type: string format: password writeOnly: true - type: 'null' title: Api Token description: The API/ Access Token used for Basic, PAT and OAuth2 authentication. server_url: anyOf: - type: string - type: 'null' title: Server Url description: The server url for Jira Cloud. cloud_id: anyOf: - type: string - type: 'null' title: Cloud Id description: The cloud ID, used in case of OAuth2. authentication_mechanism: type: string title: Authentication Mechanism description: Type of Authentication for connecting to Jira APIs. query: type: string title: Query description: JQL (Jira Query Language) query to search. class_name: type: string title: Class Name default: CloudJiraDataSource type: object required: - authentication_mechanism - query title: CloudJiraDataSource description: Cloud Jira Data Source integrating JiraReader. CloudJiraDataSourceV2: properties: supports_access_control: type: boolean title: Supports Access Control default: false email: anyOf: - type: string - type: 'null' title: Email description: The email address to use for authentication. api_token: anyOf: - type: string format: password writeOnly: true - type: 'null' title: Api Token description: The API Access Token used for Basic, PAT and OAuth2 authentication. server_url: type: string title: Server Url description: The server url for Jira Cloud. cloud_id: anyOf: - type: string - type: 'null' title: Cloud Id description: The cloud ID, used in case of OAuth2. authentication_mechanism: type: string title: Authentication Mechanism description: Type of Authentication for connecting to Jira APIs. api_version: type: string enum: - '2' - '3' title: Api Version description: Jira REST API version to use (2 or 3). 3 supports Atlassian Document Format (ADF). default: '2' query: type: string title: Query description: JQL (Jira Query Language) query to search. fields: anyOf: - items: type: string type: array - type: 'null' title: Fields description: List of fields to retrieve from Jira. If None, retrieves all fields. expand: anyOf: - type: string - type: 'null' title: Expand description: Fields to expand in the response. requests_per_minute: anyOf: - type: integer - type: 'null' title: Requests Per Minute description: Rate limit for Jira API requests per minute. get_permissions: type: boolean title: Get Permissions description: Whether to fetch project role permissions and issue-level security default: true class_name: type: string title: Class Name default: CloudJiraDataSourceV2 type: object required: - server_url - authentication_mechanism - query title: CloudJiraDataSourceV2 description: Cloud Jira Data Source integrating JiraReaderV2. CloudMilvusVectorStore: properties: supports_nested_metadata_filters: type: boolean title: Supports Nested Metadata Filters default: false uri: type: string title: Uri collection_name: anyOf: - type: string - type: 'null' title: Collection Name token: anyOf: - type: string format: password writeOnly: true - type: 'null' title: Token embedding_dimension: anyOf: - type: integer - type: 'null' title: Embedding Dimension class_name: type: string title: Class Name default: CloudMilvusVectorStore type: object required: - uri title: CloudMilvusVectorStore description: Cloud Milvus Vector Store. CloudMongoDBAtlasVectorSearch: properties: supports_nested_metadata_filters: type: boolean title: Supports Nested Metadata Filters default: false mongodb_uri: type: string format: password title: Mongodb Uri writeOnly: true db_name: type: string title: Db Name collection_name: type: string title: Collection Name vector_index_name: anyOf: - type: string - type: 'null' title: Vector Index Name fulltext_index_name: anyOf: - type: string - type: 'null' title: Fulltext Index Name embedding_dimension: anyOf: - type: integer - type: 'null' title: Embedding Dimension class_name: type: string title: Class Name default: CloudMongoDBAtlasVectorSearch type: object required: - mongodb_uri - db_name - collection_name title: CloudMongoDBAtlasVectorSearch description: "Cloud MongoDB Atlas Vector Store.\n\nThis class is used to store\ \ the configuration for a MongoDB Atlas vector store,\nso that it can be created\ \ and used in LlamaCloud.\n\nArgs:\n mongodb_uri (str): URI for connecting\ \ to MongoDB Atlas\n db_name (str): name of the MongoDB database\n collection_name\ \ (str): name of the MongoDB collection\n vector_index_name (str): name\ \ of the MongoDB Atlas vector index\n fulltext_index_name (str): name of\ \ the MongoDB Atlas full-text index" CloudNotionPageDataSource: properties: supports_access_control: type: boolean title: Supports Access Control default: false integration_token: type: string format: password title: Integration Token description: The integration token to use for authentication. writeOnly: true database_ids: anyOf: - type: string - type: 'null' title: Database Ids description: The Notion Database Id to read content from. page_ids: anyOf: - type: string - type: 'null' title: Page Ids description: The Page ID's of the Notion to read from. class_name: type: string title: Class Name default: CloudNotionPageDataSource type: object required: - integration_token title: CloudNotionPageDataSource CloudOneDriveDataSource: properties: supports_access_control: type: boolean const: true title: Supports Access Control default: true user_principal_name: type: string title: User Principal Name description: The user principal name to use for authentication. folder_path: anyOf: - type: string - type: 'null' title: Folder Path description: The path of the OneDrive folder to read from. folder_id: anyOf: - type: string - type: 'null' title: Folder Id description: The ID of the OneDrive folder to read from. client_id: type: string title: Client Id description: The client ID to use for authentication. client_secret: type: string format: password title: Client Secret description: The client secret to use for authentication. writeOnly: true tenant_id: type: string title: Tenant Id description: The tenant ID to use for authentication. required_exts: anyOf: - items: type: string type: array - type: 'null' title: Required Exts description: The list of required file extensions. class_name: type: string title: Class Name default: CloudOneDriveDataSource type: object required: - user_principal_name - client_id - client_secret - tenant_id title: CloudOneDriveDataSource CloudPineconeVectorStore: properties: supports_nested_metadata_filters: type: boolean const: true title: Supports Nested Metadata Filters default: true api_key: type: string format: password title: Api Key description: The API key for authenticating with Pinecone writeOnly: true index_name: type: string title: Index Name namespace: anyOf: - type: string - type: 'null' title: Namespace insert_kwargs: anyOf: - additionalProperties: true type: object - type: 'null' title: Insert Kwargs class_name: type: string title: Class Name default: CloudPineconeVectorStore type: object required: - api_key - index_name title: CloudPineconeVectorStore description: "Cloud Pinecone Vector Store.\n\nThis class is used to store the\ \ configuration for a Pinecone vector store, so that it can be\ncreated and\ \ used in LlamaCloud.\n\nArgs:\n api_key (str): API key for authenticating\ \ with Pinecone\n index_name (str): name of the Pinecone index\n namespace\ \ (optional[str]): namespace to use in the Pinecone index\n insert_kwargs\ \ (optional[dict]): additional kwargs to pass during insertion" CloudPostgresVectorStore: properties: supports_nested_metadata_filters: type: boolean title: Supports Nested Metadata Filters default: true database: type: string title: Database host: type: string title: Host password: type: string format: password title: Password writeOnly: true port: type: integer title: Port user: type: string title: User table_name: type: string title: Table Name schema_name: type: string title: Schema Name embed_dim: type: integer title: Embed Dim hybrid_search: anyOf: - type: boolean - type: 'null' title: Hybrid Search default: true perform_setup: type: boolean title: Perform Setup default: true hnsw_settings: anyOf: - $ref: '#/components/schemas/PGVectorHNSWSettings' - type: 'null' description: HNSW settings for PGVector index. Set to null to disable HNSW indexing in favor of a brute force indexing/exact search strategy instead. class_name: type: string title: Class Name default: CloudPostgresVectorStore type: object required: - database - host - password - port - user - table_name - schema_name - embed_dim title: CloudPostgresVectorStore CloudQdrantVectorStore: properties: supports_nested_metadata_filters: type: boolean const: true title: Supports Nested Metadata Filters default: true collection_name: type: string title: Collection Name url: type: string title: Url api_key: type: string format: password title: Api Key writeOnly: true max_retries: type: integer title: Max Retries default: 3 client_kwargs: additionalProperties: true type: object title: Client Kwargs class_name: type: string title: Class Name default: CloudQdrantVectorStore type: object required: - collection_name - url - api_key title: CloudQdrantVectorStore description: "Cloud Qdrant Vector Store.\n\nThis class is used to store the\ \ configuration for a Qdrant vector store, so that it can be\ncreated and\ \ used in LlamaCloud.\n\nArgs:\n collection_name (str): name of the Qdrant\ \ collection\n url (str): url of the Qdrant instance\n api_key (str):\ \ API key for authenticating with Qdrant\n max_retries (int): maximum number\ \ of retries in case of a failure. Defaults to 3\n client_kwargs (dict):\ \ additional kwargs to pass to the Qdrant client" CloudS3DataSource: properties: supports_access_control: type: boolean title: Supports Access Control default: false bucket: type: string title: Bucket description: The name of the S3 bucket to read from. prefix: anyOf: - type: string - type: 'null' title: Prefix description: The prefix of the S3 objects to read from. regex_pattern: anyOf: - type: string - type: 'null' title: Regex Pattern description: The regex pattern to filter S3 objects. Must be a valid regex pattern. aws_access_id: anyOf: - type: string - type: 'null' title: Aws Access Id description: The AWS access ID to use for authentication. aws_access_secret: anyOf: - type: string format: password writeOnly: true - type: 'null' title: Aws Access Secret description: The AWS access secret to use for authentication. s3_endpoint_url: anyOf: - type: string - type: 'null' title: S3 Endpoint Url description: The S3 endpoint URL to use for authentication. class_name: type: string title: Class Name default: CloudS3DataSource type: object required: - bucket title: CloudS3DataSource CloudSharepointDataSource: properties: supports_access_control: type: boolean const: true title: Supports Access Control default: true site_name: anyOf: - type: string - type: 'null' title: Site Name description: The name of the SharePoint site to download from. site_id: anyOf: - type: string - type: 'null' title: Site Id description: The ID of the SharePoint site to download from. folder_path: anyOf: - type: string - type: 'null' title: Folder Path description: The path of the Sharepoint folder to read from. folder_id: anyOf: - type: string - type: 'null' title: Folder Id description: The ID of the Sharepoint folder to read from. drive_name: anyOf: - type: string - type: 'null' title: Drive Name description: The name of the Sharepoint drive to read from. client_id: type: string title: Client Id description: The client ID to use for authentication. client_secret: type: string format: password title: Client Secret description: The client secret to use for authentication. writeOnly: true tenant_id: type: string title: Tenant Id description: The tenant ID to use for authentication. required_exts: anyOf: - items: type: string type: array - type: 'null' title: Required Exts description: The list of required file extensions. get_permissions: type: boolean title: Get Permissions description: Whether to get permissions for the sharepoint site. default: true include_path_patterns: anyOf: - items: type: string type: array - type: 'null' title: Include Path Patterns description: 'List of regex patterns for file paths to include. Full paths (including filename) must match at least one pattern to be included. Example: [''/reports/'', ''/docs/.*\.pdf$'', ''^Report.*\.pdf$'']' exclude_path_patterns: anyOf: - items: type: string type: array - type: 'null' title: Exclude Path Patterns description: 'List of regex patterns for file paths to exclude. Files whose paths (including filename) match any pattern will be excluded. Example: [''/temp/'', ''/backup/'', ''\.git/'', ''\.tmp$'', ''^~'']' class_name: type: string title: Class Name default: CloudSharepointDataSource type: object required: - client_id - client_secret - tenant_id title: CloudSharepointDataSource CloudSlackDataSource: properties: supports_access_control: type: boolean title: Supports Access Control default: false slack_token: type: string format: password title: Slack Token description: Slack Bot Token. writeOnly: true channel_ids: anyOf: - type: string - type: 'null' title: Channel Ids description: Slack Channel. latest_date: anyOf: - type: string - type: 'null' title: Latest Date description: Latest date. earliest_date: anyOf: - type: string - type: 'null' title: Earliest Date description: Earliest date. earliest_date_timestamp: anyOf: - type: number - type: 'null' title: Earliest Date Timestamp description: Earliest date timestamp. latest_date_timestamp: anyOf: - type: number - type: 'null' title: Latest Date Timestamp description: Latest date timestamp. channel_patterns: anyOf: - type: string - type: 'null' title: Channel Patterns description: Slack Channel name pattern. class_name: type: string title: Class Name default: CloudSlackDataSource type: object required: - slack_token title: CloudSlackDataSource CodeItem: properties: type: type: string const: code title: Type description: Code block item type default: code md: type: string title: Md description: Markdown representation preserving formatting bbox: anyOf: - items: $ref: '#/components/schemas/BBox' type: array - type: 'null' title: Bbox description: List of bounding boxes value: type: string title: Value description: Code content language: anyOf: - type: string - type: 'null' title: Language description: Programming language identifier type: object required: - md - value title: CodeItem CohereEmbedding: properties: model_name: type: string title: Model Name description: The modelId of the Cohere model to use. default: embed-english-v3.0 embed_batch_size: type: integer maximum: 2048.0 exclusiveMinimum: 0.0 title: Embed Batch Size description: The batch size for embedding calls. default: 10 num_workers: anyOf: - type: integer - type: 'null' title: Num Workers description: The number of workers to use for async embedding calls. api_key: anyOf: - type: string - type: 'null' title: Api Key description: The Cohere API key. truncate: type: string title: Truncate description: Truncation type - START/ END/ NONE default: END input_type: anyOf: - type: string - type: 'null' title: Input Type description: Model Input type. If not provided, search_document and search_query are used when needed. embedding_type: type: string title: Embedding Type description: Embedding type. If not provided float embedding_type is used when needed. default: float class_name: type: string title: Class Name default: CohereEmbedding type: object required: - api_key title: CohereEmbedding CohereEmbeddingConfig: properties: type: type: string const: COHERE_EMBEDDING title: Type description: Type of the embedding model. default: COHERE_EMBEDDING component: $ref: '#/components/schemas/CohereEmbedding' description: Configuration for the Cohere embedding model. type: object title: CohereEmbeddingConfig CompositeRetrievalMode: type: string enum: - routing - full title: CompositeRetrievalMode description: Enum for the mode of composite retrieval. CompositeRetrievalParams: properties: mode: $ref: '#/components/schemas/CompositeRetrievalMode' description: The mode of composite retrieval. default: full rerank_top_n: anyOf: - type: integer - type: 'null' title: Rerank Top N description: (use rerank_config.top_n instead) The number of nodes to retrieve after reranking over retrieved nodes from all retrieval tools. deprecated: true rerank_config: $ref: '#/components/schemas/ReRankConfig' description: The rerank configuration for composite retrieval. query: type: string minLength: 1 title: Query description: The query to retrieve against. type: object required: - query title: CompositeRetrievalParams CompositeRetrievalResult: properties: nodes: items: $ref: '#/components/schemas/CompositeRetrievedTextNodeWithScore' type: array title: Nodes description: The retrieved nodes from the composite retrieval. image_nodes: items: $ref: '#/components/schemas/PageScreenshotNodeWithScore' type: array title: Image Nodes description: The image nodes retrieved by the pipeline for the given query. Deprecated - will soon be replaced with 'page_screenshot_nodes'. deprecated: true page_figure_nodes: items: $ref: '#/components/schemas/PageFigureNodeWithScore' type: array title: Page Figure Nodes description: The page figure nodes retrieved by the pipeline for the given query. type: object title: CompositeRetrievalResult CompositeRetrievedTextNode: properties: id: type: string format: uuid title: Id description: The ID of the retrieved node. retriever_id: type: string format: uuid title: Retriever Id description: The ID of the retriever this node was retrieved from. retriever_pipeline_name: type: string title: Retriever Pipeline Name description: The name of the retrieval pipeline this node was retrieved from. pipeline_id: type: string format: uuid title: Pipeline Id description: The ID of the pipeline this node was retrieved from. metadata: additionalProperties: true type: object title: Metadata description: Metadata associated with the retrieved node. text: type: string title: Text description: The text of the retrieved node. start_char_idx: anyOf: - type: integer - type: 'null' title: Start Char Idx description: The start character index of the retrieved node in the document end_char_idx: anyOf: - type: integer - type: 'null' title: End Char Idx description: The end character index of the retrieved node in the document type: object required: - id - retriever_id - retriever_pipeline_name - pipeline_id - text - start_char_idx - end_char_idx title: CompositeRetrievedTextNode CompositeRetrievedTextNodeWithScore: properties: node: $ref: '#/components/schemas/CompositeRetrievedTextNode' score: anyOf: - type: number - type: 'null' title: Score class_name: type: string title: Class Name default: CompositeRetrievedTextNodeWithScore type: object required: - node title: CompositeRetrievedTextNodeWithScore ConfigurableDataSinkNames: type: string enum: - PINECONE - POSTGRES - QDRANT - AZUREAI_SEARCH - MONGODB_ATLAS - MILVUS - ASTRA_DB title: ConfigurableDataSinkNames ConfigurableDataSourceNames: type: string enum: - S3 - AZURE_STORAGE_BLOB - GOOGLE_DRIVE - MICROSOFT_ONEDRIVE - MICROSOFT_SHAREPOINT - SLACK - NOTION_PAGE - CONFLUENCE - JIRA - JIRA_V2 - BOX title: ConfigurableDataSourceNames ConfigurationCreateRequest: properties: name: type: string maxLength: 255 minLength: 1 title: Name description: Human-readable name for this configuration. parameters: oneOf: - $ref: '#/components/schemas/SplitV1Parameters' - $ref: '#/components/schemas/ExtractV2Parameters' - $ref: '#/components/schemas/ClassifyV2Parameters' - $ref: '#/components/schemas/ParseV2Parameters' - $ref: '#/components/schemas/SpreadsheetV1Parameters' - $ref: '#/components/schemas/UntypedParameters' title: Parameters description: Product-specific configuration parameters. discriminator: propertyName: product_type mapping: classify_v2: '#/components/schemas/ClassifyV2Parameters' extract_v2: '#/components/schemas/ExtractV2Parameters' parse_v2: '#/components/schemas/ParseV2Parameters' split_v1: '#/components/schemas/SplitV1Parameters' spreadsheet_v1: '#/components/schemas/SpreadsheetV1Parameters' unknown: '#/components/schemas/UntypedParameters' type: object required: - name - parameters title: ConfigurationCreateRequest description: Request body for creating a product configuration. ConfigurationQueryResponse: properties: items: items: $ref: '#/components/schemas/ConfigurationResponse' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: ConfigurationQueryResponse description: Paginated list of product configurations. ConfigurationResponse: properties: id: type: string title: Id description: Unique configuration ID. created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation timestamp. updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Last update timestamp. name: type: string title: Name description: Configuration name. product_type: type: string enum: - split_v1 - extract_v2 - classify_v2 - parse_v2 - spreadsheet_v1 - unknown title: Product Type description: Product type. version: type: string title: Version description: Version identifier (datetime string). parameters: oneOf: - $ref: '#/components/schemas/SplitV1Parameters' - $ref: '#/components/schemas/ExtractV2Parameters' - $ref: '#/components/schemas/ClassifyV2Parameters' - $ref: '#/components/schemas/ParseV2Parameters' - $ref: '#/components/schemas/SpreadsheetV1Parameters' - $ref: '#/components/schemas/UntypedParameters' title: Parameters description: Product-specific configuration parameters. discriminator: propertyName: product_type mapping: classify_v2: '#/components/schemas/ClassifyV2Parameters' extract_v2: '#/components/schemas/ExtractV2Parameters' parse_v2: '#/components/schemas/ParseV2Parameters' split_v1: '#/components/schemas/SplitV1Parameters' spreadsheet_v1: '#/components/schemas/SpreadsheetV1Parameters' unknown: '#/components/schemas/UntypedParameters' type: object required: - id - name - product_type - version - parameters title: ConfigurationResponse description: Response schema for a single product configuration. ConfigurationUpdateRequest: properties: name: anyOf: - type: string maxLength: 255 minLength: 1 - type: 'null' title: Name description: Updated name (omit to leave unchanged). parameters: anyOf: - oneOf: - $ref: '#/components/schemas/SplitV1Parameters' - $ref: '#/components/schemas/ExtractV2Parameters' - $ref: '#/components/schemas/ClassifyV2Parameters' - $ref: '#/components/schemas/ParseV2Parameters' - $ref: '#/components/schemas/SpreadsheetV1Parameters' - $ref: '#/components/schemas/UntypedParameters' discriminator: propertyName: product_type mapping: classify_v2: '#/components/schemas/ClassifyV2Parameters' extract_v2: '#/components/schemas/ExtractV2Parameters' parse_v2: '#/components/schemas/ParseV2Parameters' split_v1: '#/components/schemas/SplitV1Parameters' spreadsheet_v1: '#/components/schemas/SpreadsheetV1Parameters' unknown: '#/components/schemas/UntypedParameters' - type: 'null' title: Parameters description: Updated parameters (omit to leave unchanged). type: object title: ConfigurationUpdateRequest description: Request body for updating a product configuration. DataSink: properties: id: type: string format: uuid title: Id description: Unique identifier created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime name: type: string title: Name description: The name of the data sink. sink_type: $ref: '#/components/schemas/ConfigurableDataSinkNames' component: anyOf: - additionalProperties: true type: object - $ref: '#/components/schemas/CloudPineconeVectorStore' - $ref: '#/components/schemas/CloudPostgresVectorStore' - $ref: '#/components/schemas/CloudQdrantVectorStore' - $ref: '#/components/schemas/CloudAzureAISearchVectorStore' - $ref: '#/components/schemas/CloudMongoDBAtlasVectorSearch' - $ref: '#/components/schemas/CloudMilvusVectorStore' - $ref: '#/components/schemas/CloudAstraDBVectorStore' title: DataSinkCreateComponent description: Component that implements the data sink project_id: type: string format: uuid title: Project Id type: object required: - id - name - sink_type - component - project_id title: DataSink description: Schema for a data sink. DataSinkCreate: properties: name: type: string title: Name description: The name of the data sink. sink_type: $ref: '#/components/schemas/ConfigurableDataSinkNames' component: anyOf: - additionalProperties: true type: object - $ref: '#/components/schemas/CloudPineconeVectorStore' - $ref: '#/components/schemas/CloudPostgresVectorStore' - $ref: '#/components/schemas/CloudQdrantVectorStore' - $ref: '#/components/schemas/CloudAzureAISearchVectorStore' - $ref: '#/components/schemas/CloudMongoDBAtlasVectorSearch' - $ref: '#/components/schemas/CloudMilvusVectorStore' - $ref: '#/components/schemas/CloudAstraDBVectorStore' title: DataSinkCreateComponent description: Component that implements the data sink type: object required: - name - sink_type - component title: DataSinkCreate description: Schema for creating a data sink. DataSinkUpdate: properties: name: anyOf: - type: string - type: 'null' title: Name description: The name of the data sink. sink_type: $ref: '#/components/schemas/ConfigurableDataSinkNames' component: anyOf: - additionalProperties: true type: object - $ref: '#/components/schemas/CloudPineconeVectorStore' - $ref: '#/components/schemas/CloudPostgresVectorStore' - $ref: '#/components/schemas/CloudQdrantVectorStore' - $ref: '#/components/schemas/CloudAzureAISearchVectorStore' - $ref: '#/components/schemas/CloudMongoDBAtlasVectorSearch' - $ref: '#/components/schemas/CloudMilvusVectorStore' - $ref: '#/components/schemas/CloudAstraDBVectorStore' - type: 'null' title: DataSinkUpdateComponent description: Component that implements the data sink type: object required: - sink_type title: DataSinkUpdate description: Schema for updating a data sink. DataSource: properties: id: type: string format: uuid title: Id description: Unique identifier created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime name: type: string title: Name description: The name of the data source. source_type: $ref: '#/components/schemas/ConfigurableDataSourceNames' custom_metadata: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Custom Metadata description: Custom metadata that will be present on all data loaded from the data source component: anyOf: - additionalProperties: true type: object - $ref: '#/components/schemas/CloudS3DataSource' - $ref: '#/components/schemas/CloudAzStorageBlobDataSource' - $ref: '#/components/schemas/CloudGoogleDriveDataSource' - $ref: '#/components/schemas/CloudOneDriveDataSource' - $ref: '#/components/schemas/CloudSharepointDataSource' - $ref: '#/components/schemas/CloudSlackDataSource' - $ref: '#/components/schemas/CloudNotionPageDataSource' - $ref: '#/components/schemas/CloudConfluenceDataSource' - $ref: '#/components/schemas/CloudJiraDataSource' - $ref: '#/components/schemas/CloudJiraDataSourceV2' - $ref: '#/components/schemas/CloudBoxDataSource' title: DataSourceCreateComponent description: Component that implements the data source version_metadata: anyOf: - $ref: '#/components/schemas/DataSourceReaderVersionMetadata' - type: 'null' description: Version metadata for the data source project_id: type: string format: uuid title: Project Id type: object required: - id - name - source_type - component - project_id title: DataSource description: Schema for a data source. DataSourceCreate: properties: name: type: string title: Name description: The name of the data source. source_type: $ref: '#/components/schemas/ConfigurableDataSourceNames' custom_metadata: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Custom Metadata description: Custom metadata that will be present on all data loaded from the data source component: anyOf: - additionalProperties: true type: object - $ref: '#/components/schemas/CloudS3DataSource' - $ref: '#/components/schemas/CloudAzStorageBlobDataSource' - $ref: '#/components/schemas/CloudGoogleDriveDataSource' - $ref: '#/components/schemas/CloudOneDriveDataSource' - $ref: '#/components/schemas/CloudSharepointDataSource' - $ref: '#/components/schemas/CloudSlackDataSource' - $ref: '#/components/schemas/CloudNotionPageDataSource' - $ref: '#/components/schemas/CloudConfluenceDataSource' - $ref: '#/components/schemas/CloudJiraDataSource' - $ref: '#/components/schemas/CloudJiraDataSourceV2' - $ref: '#/components/schemas/CloudBoxDataSource' title: DataSourceCreateComponent description: Component that implements the data source type: object required: - name - source_type - component title: DataSourceCreate description: Schema for creating a data source. DataSourceReaderVersionMetadata: properties: reader_version: anyOf: - type: string enum: - '1.0' - '2.0' - '2.1' - type: 'null' title: Reader Version description: The version of the reader to use for this data source. type: object title: DataSourceReaderVersionMetadata DataSourceSyncRequest: properties: pipeline_file_ids: anyOf: - items: type: string format: uuid type: array - type: 'null' title: Pipeline File Ids type: object title: DataSourceSyncRequest description: "Request model for syncing pipeline data sources.\n\nNotes:\n-\ \ This endpoint pulls from the 3p data source and parses.\n- We accept pipeline_file_ids\ \ for now because callers (e.g. add_files_to_pipeline)\n often have these\ \ IDs handy. Internally we map them to external IDs to filter\n the reader,\ \ which enumerates by external ID.\n- Likely to evolve to take external IDs\ \ directly; keeping this for convenience." DataSourceUpdate: properties: name: anyOf: - type: string - type: 'null' title: Name description: The name of the data source. source_type: $ref: '#/components/schemas/ConfigurableDataSourceNames' custom_metadata: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Custom Metadata description: Custom metadata that will be present on all data loaded from the data source component: anyOf: - additionalProperties: true type: object - $ref: '#/components/schemas/CloudS3DataSource' - $ref: '#/components/schemas/CloudAzStorageBlobDataSource' - $ref: '#/components/schemas/CloudGoogleDriveDataSource' - $ref: '#/components/schemas/CloudOneDriveDataSource' - $ref: '#/components/schemas/CloudSharepointDataSource' - $ref: '#/components/schemas/CloudSlackDataSource' - $ref: '#/components/schemas/CloudNotionPageDataSource' - $ref: '#/components/schemas/CloudConfluenceDataSource' - $ref: '#/components/schemas/CloudJiraDataSource' - $ref: '#/components/schemas/CloudJiraDataSourceV2' - $ref: '#/components/schemas/CloudBoxDataSource' - type: 'null' title: DataSourceUpdateComponent description: Component that implements the data source type: object required: - source_type title: DataSourceUpdate description: Schema for updating a data source. DeleteRequest: properties: deployment_name: type: string title: Deployment Name description: The agent deployment's name to delete data for collection: type: string title: Collection description: The logical agent data collection to delete from default: default filter: anyOf: - additionalProperties: $ref: '#/components/schemas/FilterOperation' type: object - type: 'null' title: Filter description: Optional filters to select which items to delete type: object required: - deployment_name title: DeleteRequest description: API request body for bulk deleting agent data by query DeleteResponse: properties: deleted_count: type: integer title: Deleted Count type: object required: - deleted_count title: DeleteResponse description: API response for bulk delete operation DirectRetrievalParams: properties: mode: $ref: '#/components/schemas/CompositeRetrievalMode' description: The mode of composite retrieval. default: full rerank_top_n: anyOf: - type: integer - type: 'null' title: Rerank Top N description: (use rerank_config.top_n instead) The number of nodes to retrieve after reranking over retrieved nodes from all retrieval tools. deprecated: true rerank_config: $ref: '#/components/schemas/ReRankConfig' description: The rerank configuration for composite retrieval. query: type: string minLength: 1 title: Query description: The query to retrieve against. pipelines: items: $ref: '#/components/schemas/RetrieverPipeline' type: array title: Pipelines description: The pipelines to use for retrieval. type: object required: - query title: DirectRetrievalParams DirectoryCreateRequest: properties: name: type: string minLength: 1 title: Name description: Human-readable name for the directory. description: anyOf: - type: string - type: 'null' title: Description description: Optional description shown to users. type: type: string enum: - user - ephemeral title: Type description: Directory type. Use 'ephemeral' for batch processing with automatic cleanup. default: user examples: - user - ephemeral expires_at: anyOf: - type: string format: date-time - type: 'null' title: Expires At description: When this directory expires. Required for ephemeral directories. examples: - '2026-05-10T00:00:00Z' system_metadata: anyOf: - additionalProperties: true type: object - type: 'null' title: System Metadata description: Reserved system-managed metadata. type: object required: - name title: DirectoryCreateRequest description: API request schema for creating a directory. DirectoryFileBulkDeleteRequest: properties: directory_file_ids: items: type: string type: array maxItems: 100 minItems: 1 title: Directory File Ids description: List of directory file IDs to delete (max 100). type: object required: - directory_file_ids title: DirectoryFileBulkDeleteRequest description: API request schema for bulk deleting directory files. DirectoryFileCreateRequest: properties: file_id: type: string title: File Id description: File ID for the storage location (required). unique_id: anyOf: - type: string - type: 'null' title: Unique Id description: Unique identifier for the file in the directory. If not provided, will use the file's external_file_id or name. display_name: anyOf: - type: string - type: 'null' title: Display Name description: Display name for the file. If not provided, will use the file's name. metadata: anyOf: - $ref: '#/components/schemas/MetadataDict' - type: 'null' description: User-defined metadata key-value pairs to associate with the file. type: object required: - file_id title: DirectoryFileCreateRequest description: API request schema for creating a directory file. DirectoryFileQueryResponse: properties: items: items: $ref: '#/components/schemas/DirectoryFileResponse' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: DirectoryFileQueryResponse description: API query response schema for directory files. DirectoryFileResponse: properties: id: type: string title: Id description: Unique identifier for the directory file. created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime project_id: type: string title: Project Id description: Project the directory file belongs to. directory_id: type: string title: Directory Id description: Directory the file belongs to. unique_id: type: string minLength: 1 title: Unique Id description: Unique identifier for the file in the directory display_name: type: string minLength: 1 title: Display Name description: Display name for the file. file_id: anyOf: - type: string - type: 'null' title: File Id description: File ID for the storage location. metadata: $ref: '#/components/schemas/MetadataDict' description: Merged metadata from all sources. Higher-priority sources override lower. deleted_at: anyOf: - type: string format: date-time - type: 'null' title: Deleted At description: Soft delete marker when the file is removed upstream or by user action. download_url: anyOf: - $ref: '#/components/schemas/PresignedUrl' - type: 'null' description: Presigned URL to download the underlying file content. type: object required: - id - project_id - directory_id - unique_id - display_name title: DirectoryFileResponse description: API response schema for a directory file. DirectoryFileUpdateRequest: properties: unique_id: anyOf: - type: string minLength: 1 - type: 'null' title: Unique Id description: Updated unique identifier. display_name: anyOf: - type: string - type: 'null' title: Display Name description: Updated display name. target_directory_id: anyOf: - type: string - type: 'null' title: Target Directory Id description: Move file to a different directory. metadata: anyOf: - $ref: '#/components/schemas/MetadataDict' - type: 'null' description: User-defined metadata key-value pairs. Replaces the user metadata layer. type: object title: DirectoryFileUpdateRequest description: API request schema for updating a directory file. DirectoryQueryResponse: properties: items: items: $ref: '#/components/schemas/DirectoryResponse' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: DirectoryQueryResponse description: API query response schema for directories. DirectoryResponse: properties: id: type: string title: Id description: Unique identifier for the directory. created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime project_id: type: string title: Project Id description: Project the directory belongs to. name: type: string minLength: 1 title: Name description: Human-readable name for the directory. description: anyOf: - type: string - type: 'null' title: Description description: Optional description shown to users. type: anyOf: - type: string enum: - user - index - ephemeral - system_ephemeral - type: 'null' title: Type description: 'Directory type: ''user'', ''index'', ''ephemeral'', or ''system_ephemeral''.' expires_at: anyOf: - type: string format: date-time - type: 'null' title: Expires At description: When this directory expires and is eligible for cleanup. system_metadata: anyOf: - additionalProperties: true type: object - type: 'null' title: System Metadata description: Reserved system-managed metadata. deleted_at: anyOf: - type: string format: date-time - type: 'null' title: Deleted At description: Optional timestamp of when the directory was deleted. Null if not deleted. type: object required: - id - project_id - name title: DirectoryResponse description: API response schema for a directory. DirectoryUpdateRequest: properties: name: anyOf: - type: string minLength: 1 - type: 'null' title: Name description: Updated name for the directory. description: anyOf: - type: string - type: 'null' title: Description description: Updated description for the directory. type: object title: DirectoryUpdateRequest description: API request schema for updating a directory. DocumentChunkMode: type: string enum: - PAGE - SECTION title: DocumentChunkMode description: How to chunk documents. ElementSegmentationConfig: properties: mode: type: string const: element title: Mode default: element type: object title: ElementSegmentationConfig EmbeddingModelConfig: properties: id: type: string format: uuid title: Id description: Unique identifier created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime name: type: string title: Name description: The name of the embedding model config. embedding_config: oneOf: - $ref: '#/components/schemas/AzureOpenAIEmbeddingConfig' - $ref: '#/components/schemas/CohereEmbeddingConfig' - $ref: '#/components/schemas/GeminiEmbeddingConfig' - $ref: '#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig' - $ref: '#/components/schemas/OpenAIEmbeddingConfig' - $ref: '#/components/schemas/VertexAIEmbeddingConfig' - $ref: '#/components/schemas/BedrockEmbeddingConfig' title: Embedding Config description: The embedding configuration for the embedding model config. discriminator: propertyName: type mapping: AZURE_EMBEDDING: '#/components/schemas/AzureOpenAIEmbeddingConfig' BEDROCK_EMBEDDING: '#/components/schemas/BedrockEmbeddingConfig' COHERE_EMBEDDING: '#/components/schemas/CohereEmbeddingConfig' GEMINI_EMBEDDING: '#/components/schemas/GeminiEmbeddingConfig' HUGGINGFACE_API_EMBEDDING: '#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig' OPENAI_EMBEDDING: '#/components/schemas/OpenAIEmbeddingConfig' VERTEXAI_EMBEDDING: '#/components/schemas/VertexAIEmbeddingConfig' project_id: type: string format: uuid title: Project Id type: object required: - id - name - embedding_config - project_id title: EmbeddingModelConfig description: Schema for an embedding model config. EmbeddingModelConfigCreate: properties: name: type: string title: Name description: The name of the embedding model config. embedding_config: oneOf: - $ref: '#/components/schemas/AzureOpenAIEmbeddingConfig' - $ref: '#/components/schemas/CohereEmbeddingConfig' - $ref: '#/components/schemas/GeminiEmbeddingConfig' - $ref: '#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig' - $ref: '#/components/schemas/OpenAIEmbeddingConfig' - $ref: '#/components/schemas/VertexAIEmbeddingConfig' - $ref: '#/components/schemas/BedrockEmbeddingConfig' title: Embedding Config description: The embedding configuration for the embedding model config. discriminator: propertyName: type mapping: AZURE_EMBEDDING: '#/components/schemas/AzureOpenAIEmbeddingConfig' BEDROCK_EMBEDDING: '#/components/schemas/BedrockEmbeddingConfig' COHERE_EMBEDDING: '#/components/schemas/CohereEmbeddingConfig' GEMINI_EMBEDDING: '#/components/schemas/GeminiEmbeddingConfig' HUGGINGFACE_API_EMBEDDING: '#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig' OPENAI_EMBEDDING: '#/components/schemas/OpenAIEmbeddingConfig' VERTEXAI_EMBEDDING: '#/components/schemas/VertexAIEmbeddingConfig' type: object required: - name - embedding_config title: EmbeddingModelConfigCreate EmbeddingModelConfigUpdate: properties: name: anyOf: - type: string - type: 'null' title: Name description: The name of the embedding model config. embedding_config: anyOf: - oneOf: - $ref: '#/components/schemas/AzureOpenAIEmbeddingConfig' - $ref: '#/components/schemas/CohereEmbeddingConfig' - $ref: '#/components/schemas/GeminiEmbeddingConfig' - $ref: '#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig' - $ref: '#/components/schemas/OpenAIEmbeddingConfig' - $ref: '#/components/schemas/VertexAIEmbeddingConfig' - $ref: '#/components/schemas/BedrockEmbeddingConfig' discriminator: propertyName: type mapping: AZURE_EMBEDDING: '#/components/schemas/AzureOpenAIEmbeddingConfig' BEDROCK_EMBEDDING: '#/components/schemas/BedrockEmbeddingConfig' COHERE_EMBEDDING: '#/components/schemas/CohereEmbeddingConfig' GEMINI_EMBEDDING: '#/components/schemas/GeminiEmbeddingConfig' HUGGINGFACE_API_EMBEDDING: '#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig' OPENAI_EMBEDDING: '#/components/schemas/OpenAIEmbeddingConfig' VERTEXAI_EMBEDDING: '#/components/schemas/VertexAIEmbeddingConfig' - type: 'null' title: Embedding Config description: The embedding configuration for the embedding model config. type: object title: EmbeddingModelConfigUpdate ExtractAgent: properties: id: type: string format: uuid title: Id description: The id of the extraction agent. name: type: string title: Name description: The name of the extraction agent. project_id: type: string format: uuid title: Project Id description: The ID of the project that the extraction agent belongs to. data_schema: additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object title: Data Schema description: The schema of the data. config: $ref: '#/components/schemas/ExtractConfig' description: The configuration parameters for the extraction agent. custom_configuration: anyOf: - type: string const: default - type: 'null' title: Custom Configuration description: Custom configuration type for the extraction agent. Currently supports 'default'. created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: The creation time of the extraction agent. updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: The last update time of the extraction agent. type: object required: - id - name - project_id - data_schema - config title: ExtractAgent description: Schema and configuration for an extraction agent. ExtractAgentCreate: properties: name: type: string maxLength: 3000 minLength: 1 title: Name description: The name of the extraction schema data_schema: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: string title: Data Schema description: The schema of the data. config: $ref: '#/components/schemas/ExtractConfig' description: The configuration parameters for the extraction agent. type: object required: - name - data_schema - config title: ExtractAgentCreate description: Settings for creating an extraction agent. ExtractAgentUpdate: properties: data_schema: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: string title: Data Schema description: The schema of the data config: $ref: '#/components/schemas/ExtractConfig' description: The configuration parameters for the extraction agent. type: object required: - data_schema - config title: ExtractAgentUpdate description: Settings for updating an extraction schema. ExtractConfig: properties: priority: anyOf: - type: string enum: - low - medium - high - critical - type: 'null' title: Priority description: The priority for the request. This field may be ignored or overwritten depending on the organization tier. extraction_target: $ref: '#/components/schemas/ExtractTarget' description: The extraction target specified. default: PER_DOC extraction_mode: $ref: '#/components/schemas/ExtractMode' description: The extraction mode specified (FAST, BALANCED, MULTIMODAL, PREMIUM). default: PREMIUM parse_model: anyOf: - $ref: '#/components/schemas/PublicModelName' - type: 'null' description: The parse model to use for document parsing. If not provided, uses the default for the extraction mode. extract_model: anyOf: - $ref: '#/components/schemas/ExtractModels' - type: string - type: 'null' title: Extract Model description: The extract model to use for data extraction. If not provided, uses the default for the extraction mode. multimodal_fast_mode: type: boolean title: Multimodal Fast Mode description: 'DEPRECATED: Whether to use fast mode for multimodal extraction.' default: false system_prompt: anyOf: - type: string - type: 'null' title: System Prompt description: The system prompt to use for the extraction. use_reasoning: type: boolean title: Use Reasoning description: Whether to use reasoning for the extraction. default: false cite_sources: type: boolean title: Cite Sources description: Whether to cite sources for the extraction. default: false citation_bbox: type: boolean title: Citation Bbox description: 'Whether to fetch citation bounding boxes for the extraction. Only available in PREMIUM mode. Deprecated: this is now synonymous with cite_sources.' default: false deprecated: true confidence_scores: type: boolean title: Confidence Scores description: Whether to fetch confidence scores for the extraction. default: false chunk_mode: $ref: '#/components/schemas/DocumentChunkMode' description: The mode to use for chunking the document. default: PAGE high_resolution_mode: type: boolean title: High Resolution Mode description: Whether to use high resolution mode for the extraction. default: false invalidate_cache: type: boolean title: Invalidate Cache description: Whether to invalidate the cache for the extraction. default: false num_pages_context: anyOf: - type: integer minimum: 1.0 - type: 'null' title: Num Pages Context description: Number of pages to pass as context on long document extraction. page_range: anyOf: - type: string - type: 'null' title: Page Range description: Comma-separated list of page numbers or ranges to extract from (1-based, e.g., '1,3,5-7,9' or '1-3,8-10'). type: object title: ExtractConfig description: Configuration parameters for the extraction agent. ExtractConfiguration: properties: target_pages: anyOf: - type: string - type: 'null' title: Target Pages description: Comma-separated page numbers or ranges to process (1-based). Omit to process all pages. examples: - 1,3,5-7 - 1-3,8-10 max_pages: anyOf: - type: integer minimum: 1.0 - type: 'null' title: Max Pages description: Maximum number of pages to process. Omit for no limit. examples: - 10 tier: type: string enum: - cost_effective - agentic title: Tier description: 'Extract tier: cost_effective (5 credits/page) or agentic (15 credits/page)' default: cost_effective examples: - cost_effective - agentic version: type: string title: Version description: Use 'latest' for the latest release for the selected tier or a date string (YYYY-MM-DD format) to pin to the nearest release at or before that date. default: latest examples: - latest data_schema: additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object title: Data Schema description: JSON Schema defining the fields to extract. Validate with the /schema/validate endpoint first. extraction_target: type: string enum: - per_doc - per_page - per_table_row title: Extraction Target description: 'Granularity of extraction: per_doc returns one object per document, per_page returns one object per page, per_table_row returns one object per table row' default: per_doc examples: - per_doc - per_page - per_table_row system_prompt: anyOf: - type: string - type: 'null' title: System Prompt description: Custom system prompt to guide extraction behavior examples: - Extract all monetary values in USD. If a currency is not specified, assume USD. cite_sources: type: boolean title: Cite Sources description: Include citations in results default: false confidence_scores: type: boolean title: Confidence Scores description: Include confidence scores in results default: false parse_tier: anyOf: - type: string - type: 'null' title: Parse Tier description: Parse tier to use before extraction. Defaults to the extract tier if not specified. examples: - fast - cost_effective parse_config_id: anyOf: - type: string - type: 'null' title: Parse Config Id description: Saved parse configuration ID to control how the document is parsed before extraction examples: - cfg-11111111-2222-3333-4444-555555555555 type: object required: - data_schema title: ExtractConfiguration description: Extract configuration combining parse and extract settings. ExtractJob: properties: id: type: string format: uuid title: Id description: The id of the extraction job extraction_agent: $ref: '#/components/schemas/ExtractAgent' description: The agent that the job was run on. status: type: string enum: - PENDING - SUCCESS - ERROR - PARTIAL_SUCCESS - CANCELLED title: Status description: The status of the extraction job error: anyOf: - type: string - type: 'null' title: Error description: The error that occurred during extraction file_id: anyOf: - type: string format: uuid - type: 'null' title: File Id description: The id of the file that the extract was extracted from file: anyOf: - $ref: '#/components/schemas/File' - type: 'null' description: '[DEPRECATED] The file that the extract was extracted from' deprecated: true type: object required: - id - extraction_agent - status title: ExtractJob description: Schema for an extraction job. ExtractJobCreate: properties: priority: anyOf: - type: string enum: - low - medium - high - critical - type: 'null' title: Priority description: The priority for the request. This field may be ignored or overwritten depending on the organization tier. webhook_configurations: anyOf: - items: $ref: '#/components/schemas/WebhookConfiguration' type: array - type: 'null' title: Webhook Configurations description: Outbound webhook endpoints to notify on job status changes extraction_agent_id: type: string format: uuid title: Extraction Agent Id description: The id of the extraction agent file_id: type: string format: uuid title: File Id description: The id of the file data_schema_override: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: string - type: 'null' title: Data Schema Override description: The data schema to override the extraction agent's data schema with config_override: anyOf: - $ref: '#/components/schemas/ExtractConfig' - type: 'null' description: The config to override the extraction agent's config with type: object required: - extraction_agent_id - file_id title: ExtractJobCreate description: Schema for creating an extraction job. ExtractJobCreateBatch: properties: webhook_configurations: anyOf: - items: $ref: '#/components/schemas/WebhookConfiguration' type: array - type: 'null' title: Webhook Configurations description: Outbound webhook endpoints to notify on job status changes extraction_agent_id: type: string format: uuid title: Extraction Agent Id description: The id of the extraction agent file_ids: items: type: string format: uuid type: array minItems: 1 title: File Ids description: The ids of the files data_schema_override: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: string - type: 'null' title: Data Schema Override description: The data schema to override the extraction agent's data schema with config_override: anyOf: - $ref: '#/components/schemas/ExtractConfig' - type: 'null' description: The config to override the extraction agent's config with type: object required: - extraction_agent_id - file_ids title: ExtractJobCreateBatch description: Schema for creating extraction jobs in batch. ExtractJobMetadata: properties: field_metadata: anyOf: - $ref: '#/components/schemas/ExtractedFieldMetadata' - type: 'null' description: Per-field metadata (citations, confidence, reasoning) parse_job_id: anyOf: - type: string - type: 'null' title: Parse Job Id description: Reference to the ParseJob ID used for parsing parse_tier: anyOf: - type: string - type: 'null' title: Parse Tier description: Parse tier used for parsing the document type: object title: ExtractJobMetadata description: Extraction metadata. ExtractJobUsage: properties: num_pages_extracted: anyOf: - type: integer - type: 'null' title: Num Pages Extracted description: Number of pages extracted type: object title: ExtractJobUsage description: Extraction usage metrics. ExtractMode: type: string enum: - FAST - BALANCED - PREMIUM - MULTIMODAL title: ExtractMode description: Extraction mode options. ExtractModels: type: string enum: - openai-gpt-4-1 - openai-gpt-4-1-mini - openai-gpt-4-1-nano - openai-gpt-5 - openai-gpt-5-mini - gemini-2.0-flash - gemini-2.5-flash - gemini-2.5-flash-lite - gemini-2.5-pro - openai-gpt-4o - openai-gpt-4o-mini title: ExtractModels description: Extract model options. ExtractResultset: properties: run_id: type: string format: uuid title: Run Id description: The id of the extraction run extraction_agent_id: type: string format: uuid title: Extraction Agent Id description: The id of the extraction agent data: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - items: additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object type: array - type: 'null' title: Data description: The data extracted from the file extraction_metadata: additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object title: Extraction Metadata description: The metadata extracted from the file type: object required: - run_id - extraction_agent_id - data - extraction_metadata title: ExtractResultset description: Schema for an extraction resultset. ExtractRun: properties: id: type: string format: uuid title: Id description: The id of the extraction run created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime project_id: type: string format: uuid title: Project Id description: The id of the project that the extraction run belongs to extraction_agent_id: type: string format: uuid title: Extraction Agent Id description: The id of the extraction agent data_schema: additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object title: Data Schema description: The schema used for extraction config: $ref: '#/components/schemas/ExtractConfig' description: The config used for extraction file_id: anyOf: - type: string format: uuid - type: 'null' title: File Id description: The id of the file that the extract was extracted from file: anyOf: - $ref: '#/components/schemas/File' - type: 'null' description: '[DEPRECATED] The file that the extract was extracted from' deprecated: true status: $ref: '#/components/schemas/ExtractState' description: The status of the extraction run error: anyOf: - type: string - type: 'null' title: Error description: The error that occurred during extraction job_id: anyOf: - type: string format: uuid - type: 'null' title: Job Id description: The id of the job that the extraction run belongs to data: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - items: additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object type: array - type: 'null' title: Data description: The data extracted from the file extraction_metadata: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Extraction Metadata description: The metadata extracted from the file from_ui: type: boolean title: From Ui description: Whether this extraction run was triggered from the UI type: object required: - id - project_id - extraction_agent_id - data_schema - config - status - from_ui title: ExtractRun description: Schema for an extraction run. ExtractSchemaGenerateRequest: properties: prompt: anyOf: - type: string - type: 'null' title: Prompt description: Natural language description of the data structure to extract file_id: anyOf: - type: string format: uuid - type: 'null' title: File Id description: Optional file ID to analyze for schema generation data_schema: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: string - type: 'null' title: Data Schema description: Optional schema to validate, refine, or extend during generation type: object title: ExtractSchemaGenerateRequest description: Request schema for generating an extraction schema. ExtractSchemaGenerateResponse: properties: data_schema: additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object title: Data Schema description: The generated JSON schema type: object required: - data_schema title: ExtractSchemaGenerateResponse description: Response schema for schema generation. ExtractSchemaValidateRequest: properties: data_schema: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: string title: Data Schema type: object required: - data_schema title: ExtractSchemaValidateRequest description: Request schema for validating an extraction schema. ExtractSchemaValidateResponse: properties: data_schema: additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object title: Data Schema type: object required: - data_schema title: ExtractSchemaValidateResponse description: Response schema for schema validation. ExtractState: type: string enum: - CREATED - PENDING - SUCCESS - ERROR title: ExtractState ExtractStatelessRequest: properties: webhook_configurations: anyOf: - items: $ref: '#/components/schemas/WebhookConfiguration' type: array - type: 'null' title: Webhook Configurations description: Outbound webhook endpoints to notify on job status changes data_schema: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: string title: Data Schema description: The schema of the data to extract config: $ref: '#/components/schemas/ExtractConfig' description: The configuration parameters for the extraction file_id: anyOf: - type: string format: uuid - type: 'null' title: File Id description: The ID of the file to extract from text: anyOf: - type: string - type: 'null' title: Text description: The text content to extract from file: anyOf: - $ref: '#/components/schemas/FileData' - type: 'null' description: The file data with base64 content and MIME type type: object required: - data_schema - config title: ExtractStatelessRequest description: Schema for stateless extraction requests. ExtractTarget: type: string enum: - PER_DOC - PER_PAGE - PER_TABLE_ROW title: ExtractTarget description: Defines the extraction target scope. ExtractV2Job: properties: file_input: type: string title: File Input description: File ID or parse job ID that was extracted examples: - dfl-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee id: type: string title: Id description: Unique job identifier (job_id) examples: - ext-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee project_id: type: string title: Project Id description: Project this job belongs to examples: - prj-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee configuration_id: anyOf: - type: string - type: 'null' title: Configuration Id description: Saved extract configuration ID used for this job, if any examples: - cfg-11111111-2222-3333-4444-555555555555 configuration: anyOf: - $ref: '#/components/schemas/ExtractConfiguration' - type: 'null' description: Configuration used for this job status: type: string title: Status description: 'Current job status. - `PENDING` — queued, not yet started - `RUNNING` — actively processing - `COMPLETED` — finished successfully - `FAILED` — terminated with an error - `CANCELLED` — cancelled by user' examples: - COMPLETED error_message: anyOf: - type: string - type: 'null' title: Error Message description: Error details when status is FAILED extract_result: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - items: additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object type: array - type: 'null' title: Extract Result description: Extracted data conforming to the data_schema. Returns a single object for per_doc, or an array for per_page / per_table_row. extract_metadata: anyOf: - $ref: '#/components/schemas/ExtractJobMetadata' - type: 'null' description: Extraction metadata including per-field info metadata: anyOf: - $ref: '#/components/schemas/ExtractV2JobMetadata' - type: 'null' description: Custom metadata - limited to enterprise plans. created_at: type: string format: date-time title: Created At description: Creation timestamp updated_at: type: string format: date-time title: Updated At description: Last update timestamp type: object required: - file_input - id - project_id - status - created_at - updated_at title: ExtractV2Job description: An extraction job. ExtractV2JobCreate: properties: webhook_configurations: anyOf: - items: $ref: '#/components/schemas/WebhookConfiguration' type: array - type: 'null' title: Webhook Configurations description: Outbound webhook endpoints to notify on job status changes configuration_id: anyOf: - type: string - type: 'null' title: Configuration Id description: Saved configuration ID examples: - cfg-11111111-2222-3333-4444-555555555555 configuration: anyOf: - $ref: '#/components/schemas/ExtractConfiguration' - type: 'null' description: Inline configuration with extract options and optional parse settings file_input: type: string maxLength: 200 title: File Input description: File ID or parse job ID to extract from examples: - dfl-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee type: object required: - file_input title: ExtractV2JobCreate description: Request to create an extraction job. Provide configuration_id or inline configuration. examples: - configuration_id: cfg-11111111-2222-3333-4444-555555555555 file_input: dfl-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee - configuration: data_schema: properties: vendor_name: description: Name of the vendor type: string total_amount: description: Total amount in dollars type: number required: - vendor_name - total_amount type: object target_pages: 1,3,5-7 file_input: dfl-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee - configuration: data_schema: properties: name: type: string type: object file_input: pjb-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee ExtractV2JobMetadata: properties: usage: anyOf: - $ref: '#/components/schemas/ExtractJobUsage' - type: 'null' description: Usage metrics additionalProperties: true type: object title: ExtractV2JobMetadata description: Job-level metadata. ExtractV2JobQueryResponse: properties: items: items: $ref: '#/components/schemas/ExtractV2Job' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: ExtractV2JobQueryResponse description: Paginated list of extraction jobs. ExtractV2Parameters: properties: target_pages: anyOf: - type: string - type: 'null' title: Target Pages description: Comma-separated page numbers or ranges to process (1-based). Omit to process all pages. examples: - 1,3,5-7 - 1-3,8-10 max_pages: anyOf: - type: integer minimum: 1.0 - type: 'null' title: Max Pages description: Maximum number of pages to process. Omit for no limit. examples: - 10 tier: type: string enum: - cost_effective - agentic title: Tier description: 'Extract tier: cost_effective (5 credits/page) or agentic (15 credits/page)' default: cost_effective examples: - cost_effective - agentic version: type: string title: Version description: Use 'latest' for the latest release for the selected tier or a date string (YYYY-MM-DD format) to pin to the nearest release at or before that date. default: latest examples: - latest data_schema: additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object title: Data Schema description: JSON Schema defining the fields to extract. Validate with the /schema/validate endpoint first. extraction_target: type: string enum: - per_doc - per_page - per_table_row title: Extraction Target description: 'Granularity of extraction: per_doc returns one object per document, per_page returns one object per page, per_table_row returns one object per table row' default: per_doc examples: - per_doc - per_page - per_table_row system_prompt: anyOf: - type: string - type: 'null' title: System Prompt description: Custom system prompt to guide extraction behavior examples: - Extract all monetary values in USD. If a currency is not specified, assume USD. cite_sources: type: boolean title: Cite Sources description: Include citations in results default: false confidence_scores: type: boolean title: Confidence Scores description: Include confidence scores in results default: false parse_tier: anyOf: - type: string - type: 'null' title: Parse Tier description: Parse tier to use before extraction. Defaults to the extract tier if not specified. examples: - fast - cost_effective parse_config_id: anyOf: - type: string - type: 'null' title: Parse Config Id description: Saved parse configuration ID to control how the document is parsed before extraction examples: - cfg-11111111-2222-3333-4444-555555555555 product_type: type: string const: extract_v2 title: Product Type description: Product type. type: object required: - data_schema - product_type title: ExtractV2Parameters description: Typed parameters for an *extract v2* product configuration. ExtractV2SchemaGenerateRequest: properties: name: anyOf: - type: string maxLength: 255 - type: 'null' title: Name description: Name for the generated configuration (auto-generated if omitted) examples: - invoice_extraction prompt: anyOf: - type: string - type: 'null' title: Prompt description: Natural language description of the data structure to extract examples: - Extract vendor name, invoice number, line items, and total amount file_id: anyOf: - type: string - type: 'null' title: File Id description: Optional file ID to analyze for schema generation examples: - dfl-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee data_schema: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Data Schema description: Optional schema to validate, refine, or extend type: object title: ExtractV2SchemaGenerateRequest description: Request schema for generating an extraction schema. examples: - name: invoice_extraction prompt: Extract vendor name, invoice number, date, line items with descriptions and amounts, and total amount from invoices. ExtractV2SchemaValidateRequest: properties: data_schema: additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object title: Data Schema description: JSON Schema to validate for use with extract jobs type: object required: - data_schema title: ExtractV2SchemaValidateRequest description: Request schema for validating an extraction schema. examples: - data_schema: properties: vendor_name: description: Name of the vendor or supplier type: string invoice_number: description: Unique invoice identifier type: string total_amount: description: Total invoice amount in dollars type: number line_items: description: List of invoice line items items: properties: description: type: string quantity: type: integer unit_price: type: number required: - description - quantity - unit_price type: object type: array required: - vendor_name - invoice_number - total_amount type: object ExtractV2SchemaValidateResponse: properties: data_schema: additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object title: Data Schema description: Validated JSON Schema, ready for use in extract jobs type: object required: - data_schema title: ExtractV2SchemaValidateResponse description: Response schema for schema validation. ExtractedFieldMetadata: properties: document_metadata: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Document Metadata description: Per-field metadata keyed by field name from your schema. Scalar fields (e.g. `vendor`) map to a FieldMetadataEntry with citation and confidence. Array fields (e.g. `items`) map to a list where each element contains per-sub-field FieldMetadataEntry objects, indexed by array position. Nested objects contain sub-field entries recursively. examples: - items: - amount: citation: - matching_text: $10.00 page: 1 confidence: 1.0 description: citation: - matching_text: $10/month page: 1 confidence: 0.998 total: citation: - matching_text: $10.00 page: 1 confidence: 1.0 vendor: citation: - matching_text: Noisebridge page: 1 confidence: 1.0 extraction_confidence: 1.0 parsing_confidence: 1.0 page_metadata: anyOf: - items: additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object type: array - type: 'null' title: Page Metadata description: Per-page metadata when extraction_target is per_page row_metadata: anyOf: - items: additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object type: array - type: 'null' title: Row Metadata description: Per-row metadata when extraction_target is per_table_row type: object title: ExtractedFieldMetadata description: Metadata for extracted fields including document, page, and row level info. ExtractedRegionSummary: properties: region_id: type: string title: Region Id description: Unique identifier for this region within the file sheet_name: type: string title: Sheet Name description: Worksheet name where region was found location: type: string title: Location description: Location of the region in the spreadsheet region_type: type: string title: Region Type description: Type of the extracted region title: anyOf: - type: string - type: 'null' title: Title description: Generated title for the region description: anyOf: - type: string - type: 'null' title: Description description: Generated description for the region type: object required: - sheet_name - location - region_type title: ExtractedRegionSummary description: A summary of a single extracted region from a spreadsheet FailPageMode: type: string enum: - raw_text - blank_page - error_message title: FailPageMode description: Enum for representing the different available page error handling modes. FailedMarkdownPage: properties: page_number: type: integer title: Page Number description: Page number of the document error: type: string title: Error description: Error message describing the failure success: type: boolean const: false title: Success description: Failure indicator type: object required: - page_number - error - success title: FailedMarkdownPage FailedStructuredPage: properties: page_number: type: integer title: Page Number description: Page number of the document error: type: string title: Error description: Error message describing the failure success: type: boolean const: false title: Success description: Failure indicator type: object required: - page_number - error - success title: FailedStructuredPage FailureHandlingConfig: properties: skip_list_failures: type: boolean title: Skip List Failures description: Whether to skip failed batches/lists and continue processing default: false type: object title: FailureHandlingConfig description: Configuration for handling different types of failures during data source processing. File: properties: id: type: string format: uuid title: Id description: Unique identifier created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime name: type: string maxLength: 3000 minLength: 1 title: Name external_file_id: anyOf: - type: string - type: 'null' title: External File Id description: The ID of the file in the external system file_size: anyOf: - type: integer minimum: 0.0 - type: 'null' title: File Size description: Size of the file in bytes file_type: anyOf: - type: string maxLength: 3000 minLength: 1 - type: 'null' title: File Type description: File type (e.g. pdf, docx, etc.) project_id: type: string format: uuid title: Project Id description: The ID of the project that the file belongs to last_modified_at: anyOf: - type: string format: date-time - type: 'null' title: Last Modified At description: The last modified time of the file resource_info: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Resource Info description: Resource information for the file permission_info: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Permission Info description: Permission information for the file data_source_id: anyOf: - type: string format: uuid - type: 'null' title: Data Source Id description: The ID of the data source that the file belongs to expires_at: anyOf: - type: string format: date-time - type: 'null' title: Expires At description: The expiration date for the file. Files past this date can be deleted. purpose: anyOf: - type: string - type: 'null' title: Purpose description: The intended purpose of the file (e.g., 'user_data', 'parse', 'extract', 'split', 'classify') type: object required: - id - name - project_id title: File description: Schema for a file. FileClassification: properties: id: type: string format: uuid title: Id description: Unique identifier created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime classify_job_id: type: string format: uuid title: Classify Job Id description: The ID of the classify job file_id: anyOf: - type: string format: uuid - type: 'null' title: File Id description: The ID of the classified file result: anyOf: - $ref: '#/components/schemas/ClassificationResult' - type: 'null' description: The classification result type: object required: - id - classify_job_id title: FileClassification description: A file classification. FileCountByStatusResponse: properties: counts: additionalProperties: type: integer type: object title: Counts description: The counts of files by status total_count: type: integer title: Total Count description: The total number of files pipeline_id: anyOf: - type: string format: uuid - type: 'null' title: Pipeline Id description: The ID of the pipeline that the files belong to data_source_id: anyOf: - type: string format: uuid - type: 'null' title: Data Source Id description: The ID of the data source that the files belong to only_manually_uploaded: type: boolean title: Only Manually Uploaded description: Whether to only count manually uploaded files default: false type: object required: - counts - total_count title: FileCountByStatusResponse FileData: properties: data: type: string title: Data description: The file content as base64-encoded string mime_type: type: string title: Mime Type description: The MIME type of the file (e.g., 'application/pdf', 'text/plain') type: object required: - data - mime_type title: FileData description: Schema for file data with base64 content and MIME type. FileFilter: properties: project_id: anyOf: - type: string format: uuid - type: 'null' title: Project Id description: Filter by project ID file_ids: anyOf: - items: type: string format: uuid type: array - type: 'null' title: File Ids description: Filter by specific file IDs file_name: anyOf: - type: string - type: 'null' title: File Name description: Filter by file name data_source_id: anyOf: - type: string format: uuid - type: 'null' title: Data Source Id description: Filter by data source ID external_file_id: anyOf: - type: string - type: 'null' title: External File Id description: Filter by external file ID only_manually_uploaded: anyOf: - type: boolean - type: 'null' title: Only Manually Uploaded description: Filter only manually uploaded files (data_source_id is null) type: object title: FileFilter description: Filter parameters for file queries. FileFindEntry: properties: file_id: type: string title: File Id description: ID of the file. file_name: type: string title: File Name description: Display name of the file. type: object required: - file_id - file_name title: FileFindEntry description: A file returned by find. FileFindParams: properties: page_size: anyOf: - type: integer - type: 'null' title: Page Size description: The maximum number of items to return. The service may return fewer than this value. If unspecified, a default page size will be used. The maximum value is typically 1000; values above this will be coerced to the maximum. page_token: anyOf: - type: string - type: 'null' title: Page Token description: A page token, received from a previous list call. Provide this to retrieve the subsequent page. index_id: type: string title: Index Id description: ID of the index to search within. examples: - idx-abc123 file_name: anyOf: - type: string - type: 'null' title: File Name description: Exact file name to match. file_name_contains: anyOf: - type: string - type: 'null' title: File Name Contains description: Substring match on file name (case-insensitive). type: object required: - index_id title: FileFindParams description: Search for files by name. FileFindResult: properties: items: items: $ref: '#/components/schemas/FileFindEntry' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: FileFindResult description: Paginated file find results. FileGrepMatch: properties: start_char: type: integer title: Start Char description: Start character offset of the match. end_char: type: integer title: End Char description: End character offset of the match. content: type: string title: Content description: Matched text content. type: object required: - start_char - end_char - content title: FileGrepMatch description: A single grep match within a file. FileGrepParams: properties: page_size: anyOf: - type: integer - type: 'null' title: Page Size description: The maximum number of items to return. The service may return fewer than this value. If unspecified, a default page size will be used. The maximum value is typically 1000; values above this will be coerced to the maximum. page_token: anyOf: - type: string - type: 'null' title: Page Token description: A page token, received from a previous list call. Provide this to retrieve the subsequent page. index_id: type: string title: Index Id description: ID of the index the file belongs to. examples: - idx-abc123 file_id: type: string title: File Id description: ID of the file to grep. pattern: type: string title: Pattern description: Regex pattern to search for. examples: - revenue|profit context_chars: anyOf: - type: integer - type: 'null' title: Context Chars description: Number of characters of context to include before and after the matched pattern in the content field of the response type: object required: - index_id - file_id - pattern title: FileGrepParams description: Grep within a specific file's parsed content. FileGrepResult: properties: items: items: $ref: '#/components/schemas/FileGrepMatch' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: FileGrepResult description: Paginated grep results for a file. FileQueryRequest: properties: page_size: anyOf: - type: integer - type: 'null' title: Page Size description: The maximum number of items to return. The service may return fewer than this value. If unspecified, a default page size will be used. The maximum value is typically 1000; values above this will be coerced to the maximum. page_token: anyOf: - type: string - type: 'null' title: Page Token description: A page token, received from a previous list call. Provide this to retrieve the subsequent page. filter: anyOf: - $ref: '#/components/schemas/FileFilter' - type: 'null' description: A filter object or expression that filters resources listed in the response. order_by: anyOf: - type: string - type: 'null' title: Order By description: A comma-separated list of fields to order by, sorted in ascending order. Use 'field_name desc' to specify descending order. type: object title: FileQueryRequest description: Request schema for querying files with pagination and filtering. FileQueryResponseV2: properties: items: items: $ref: '#/components/schemas/FileV2' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: FileQueryResponseV2 description: Paginated list of files. FileReadParams: properties: index_id: type: string title: Index Id description: ID of the index the file belongs to. examples: - idx-abc123 file_id: type: string title: File Id description: ID of the file to read. offset: type: integer title: Offset description: Starting character offset. default: 0 max_length: anyOf: - type: integer - type: 'null' title: Max Length description: Maximum number of characters to read from the offset. type: object required: - index_id - file_id title: FileReadParams description: Read parsed content of a specific file. FileReadResult: properties: content: type: string title: Content description: Parsed text content of the file. type: object required: - content title: FileReadResult description: File read result. FileV2: properties: id: type: string title: Id description: Unique file identifier examples: - dfl-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee name: type: string maxLength: 3000 minLength: 1 title: Name description: File name including extension examples: - invoice.pdf external_file_id: anyOf: - type: string - type: 'null' title: External File Id description: Optional ID for correlating with an external system examples: - ext-12345 file_type: anyOf: - type: string maxLength: 3000 minLength: 1 - type: 'null' title: File Type description: File extension (pdf, docx, png, etc.) examples: - pdf project_id: type: string format: uuid title: Project Id description: Project this file belongs to examples: - 123e4567-e89b-12d3-a456-426614174000 last_modified_at: anyOf: - type: string format: date-time - type: 'null' title: Last Modified At description: When the file was last modified (ISO 8601) expires_at: anyOf: - type: string format: date-time - type: 'null' title: Expires At description: When the file expires and may be automatically removed. Null means no expiration. purpose: anyOf: - type: string - type: 'null' title: Purpose description: 'How the file will be used: user_data, parse, extract, classify, split, sheet, or agent_app' examples: - parse download_url: anyOf: - $ref: '#/components/schemas/PresignedUrl' - type: 'null' description: Presigned URL to download the file content. type: object required: - id - name - project_id title: FileV2 description: An uploaded file. FilterCondition: type: string enum: - and - or - not title: FilterCondition description: Vector store filter conditions to combine different filters. FilterOperation: properties: eq: anyOf: - type: number - type: integer - type: string - type: string format: date-time - type: 'null' title: Eq ne: anyOf: - type: number - type: integer - type: string - type: string format: date-time - type: 'null' title: Ne gt: anyOf: - type: number - type: integer - type: string - type: string format: date-time - type: 'null' title: Gt gte: anyOf: - type: number - type: integer - type: string - type: string format: date-time - type: 'null' title: Gte lt: anyOf: - type: number - type: integer - type: string - type: string format: date-time - type: 'null' title: Lt lte: anyOf: - type: number - type: integer - type: string - type: string format: date-time - type: 'null' title: Lte includes: items: anyOf: - type: number - type: integer - type: string - type: string format: date-time - type: 'null' type: array title: Includes excludes: items: anyOf: - type: number - type: integer - type: string - type: string format: date-time - type: 'null' type: array title: Excludes type: object title: FilterOperation description: API request model for a filter comparison operation. FilterOperator: type: string enum: - == - '>' - < - '!=' - '>=' - <= - in - nin - any - all - text_match - text_match_insensitive - contains - is_empty title: FilterOperator description: Vector store filter operator. FilterType_Union_int__float__: properties: operator: type: string enum: - eq - ne - gt - lt - gte - lte - in - nin title: Operator value: anyOf: - type: integer - type: number - items: anyOf: - type: integer - type: number type: array title: Value type: object required: - operator - value title: FilterType[Union[int, float]] FilterType_Union_str__int__bool__float__: properties: operator: type: string enum: - eq - ne - gt - lt - gte - lte - in - nin title: Operator value: anyOf: - type: string - type: integer - type: boolean - type: number - items: anyOf: - type: string - type: integer - type: boolean - type: number type: array title: Value type: object required: - operator - value title: FilterType[Union[str, int, bool, float]] FilterType_str_: properties: operator: type: string enum: - eq - ne - gt - lt - gte - lte - in - nin title: Operator value: anyOf: - type: string - items: type: string type: array title: Value type: object required: - operator - value title: FilterType[str] FooterItem: properties: type: type: string const: footer title: Type description: Page footer container default: footer md: type: string title: Md description: Markdown representation preserving formatting bbox: anyOf: - items: $ref: '#/components/schemas/BBox' type: array - type: 'null' title: Bbox description: List of bounding boxes items: items: oneOf: - $ref: '#/components/schemas/TextItem' - $ref: '#/components/schemas/HeadingItem' - $ref: '#/components/schemas/ListItem' - $ref: '#/components/schemas/CodeItem' - $ref: '#/components/schemas/TableItem' - $ref: '#/components/schemas/ImageItem' - $ref: '#/components/schemas/LinkItem' discriminator: propertyName: type mapping: code: '#/components/schemas/CodeItem' heading: '#/components/schemas/HeadingItem' image: '#/components/schemas/ImageItem' link: '#/components/schemas/LinkItem' list: '#/components/schemas/ListItem' table: '#/components/schemas/TableItem' text: '#/components/schemas/TextItem' type: array title: Items description: List of items within the footer type: object required: - md - items title: FooterItem GeminiEmbedding: properties: model_name: type: string title: Model Name description: The modelId of the Gemini model to use. default: models/embedding-001 embed_batch_size: type: integer maximum: 2048.0 exclusiveMinimum: 0.0 title: Embed Batch Size description: The batch size for embedding calls. default: 10 num_workers: anyOf: - type: integer - type: 'null' title: Num Workers description: The number of workers to use for async embedding calls. title: anyOf: - type: string - type: 'null' title: Title description: Title is only applicable for retrieval_document tasks, and is used to represent a document title. For other tasks, title is invalid. default: '' task_type: anyOf: - type: string - type: 'null' title: Task Type description: The task for embedding model. default: retrieval_document api_key: anyOf: - type: string - type: 'null' title: Api Key description: API key to access the model. Defaults to None. api_base: anyOf: - type: string - type: 'null' title: Api Base description: API base to access the model. Defaults to None. transport: anyOf: - type: string - type: 'null' title: Transport description: Transport to access the model. Defaults to None. output_dimensionality: anyOf: - type: integer - type: 'null' title: Output Dimensionality description: Optional reduced dimension for output embeddings. Supported by models/text-embedding-004 and newer (e.g. gemini-embedding-001). Not supported by models/embedding-001. class_name: type: string title: Class Name default: GeminiEmbedding type: object title: GeminiEmbedding GeminiEmbeddingConfig: properties: type: type: string const: GEMINI_EMBEDDING title: Type description: Type of the embedding model. default: GEMINI_EMBEDDING component: $ref: '#/components/schemas/GeminiEmbedding' description: Configuration for the Gemini embedding model. type: object title: GeminiEmbeddingConfig HTTPValidationError: properties: detail: items: $ref: '#/components/schemas/ValidationError' type: array title: Detail type: object title: HTTPValidationError HeaderItem: properties: type: type: string const: header title: Type description: Page header container default: header md: type: string title: Md description: Markdown representation preserving formatting bbox: anyOf: - items: $ref: '#/components/schemas/BBox' type: array - type: 'null' title: Bbox description: List of bounding boxes items: items: oneOf: - $ref: '#/components/schemas/TextItem' - $ref: '#/components/schemas/HeadingItem' - $ref: '#/components/schemas/ListItem' - $ref: '#/components/schemas/CodeItem' - $ref: '#/components/schemas/TableItem' - $ref: '#/components/schemas/ImageItem' - $ref: '#/components/schemas/LinkItem' discriminator: propertyName: type mapping: code: '#/components/schemas/CodeItem' heading: '#/components/schemas/HeadingItem' image: '#/components/schemas/ImageItem' link: '#/components/schemas/LinkItem' list: '#/components/schemas/ListItem' table: '#/components/schemas/TableItem' text: '#/components/schemas/TextItem' type: array title: Items description: List of items within the header type: object required: - md - items title: HeaderItem HeadingItem: properties: type: type: string const: heading title: Type description: Heading item type default: heading md: type: string title: Md description: Markdown representation preserving formatting bbox: anyOf: - items: $ref: '#/components/schemas/BBox' type: array - type: 'null' title: Bbox description: List of bounding boxes level: type: integer title: Level description: Heading level (1-6) value: type: string title: Value description: Heading text content type: object required: - md - level - value title: HeadingItem HuggingFaceInferenceAPIEmbedding: properties: model_name: anyOf: - type: string - type: 'null' title: Model Name description: Hugging Face model name. If None, the task will be used. embed_batch_size: type: integer maximum: 2048.0 exclusiveMinimum: 0.0 title: Embed Batch Size description: The batch size for embedding calls. default: 10 num_workers: anyOf: - type: integer - type: 'null' title: Num Workers description: The number of workers to use for async embedding calls. pooling: anyOf: - $ref: '#/components/schemas/Pooling' - type: 'null' description: Pooling strategy. If None, the model's default pooling is used. default: cls query_instruction: anyOf: - type: string - type: 'null' title: Query Instruction description: Instruction to prepend during query embedding. text_instruction: anyOf: - type: string - type: 'null' title: Text Instruction description: Instruction to prepend during text embedding. token: anyOf: - type: string - type: boolean - type: 'null' title: Token description: Hugging Face token. Will default to the locally saved token. Pass token=False if you don’t want to send your token to the server. timeout: anyOf: - type: number - type: 'null' title: Timeout description: The maximum number of seconds to wait for a response from the server. Loading a new model in Inference API can take up to several minutes. Defaults to None, meaning it will loop until the server is available. headers: anyOf: - additionalProperties: type: string type: object - type: 'null' title: Headers description: Additional headers to send to the server. By default only the authorization and user-agent headers are sent. Values in this dictionary will override the default values. cookies: anyOf: - additionalProperties: type: string type: object - type: 'null' title: Cookies description: Additional cookies to send to the server. task: anyOf: - type: string - type: 'null' title: Task description: Optional task to pick Hugging Face's recommended model, used when model_name is left as default of None. class_name: type: string title: Class Name default: HuggingFaceInferenceAPIEmbedding type: object title: HuggingFaceInferenceAPIEmbedding HuggingFaceInferenceAPIEmbeddingConfig: properties: type: type: string const: HUGGINGFACE_API_EMBEDDING title: Type description: Type of the embedding model. default: HUGGINGFACE_API_EMBEDDING component: $ref: '#/components/schemas/HuggingFaceInferenceAPIEmbedding' description: Configuration for the HuggingFace Inference API embedding model. type: object title: HuggingFaceInferenceAPIEmbeddingConfig ImageAttachmentRef: properties: source_id: type: string title: Source Id attachment_name: type: string title: Attachment Name type: object required: - source_id - attachment_name title: ImageAttachmentRef description: Coordinates for lazily resolving a page screenshot presigned URL. ImageItem: properties: type: type: string const: image title: Type description: Image item type default: image md: type: string title: Md description: Markdown representation preserving formatting bbox: anyOf: - items: $ref: '#/components/schemas/BBox' type: array - type: 'null' title: Bbox description: List of bounding boxes caption: type: string title: Caption description: Image caption url: type: string title: Url description: URL to the image type: object required: - md - caption - url title: ImageItem ImageMetadata: properties: index: type: integer title: Index description: Index of the image in the extraction order filename: type: string title: Filename description: Image filename (e.g., 'image_0.png') content_type: anyOf: - type: string - type: 'null' title: Content Type description: MIME type of the image size_bytes: anyOf: - type: integer - type: 'null' title: Size Bytes description: 'Deprecated: always returns None. Will be removed in a future release.' deprecated: true presigned_url: anyOf: - type: string - type: 'null' title: Presigned Url description: Presigned URL to download the image category: anyOf: - type: string enum: - screenshot - embedded - layout - type: 'null' title: Category description: 'Image category: ''screenshot'' (full page), ''embedded'' (images in document), or ''layout'' (cropped from layout detection)' bbox: anyOf: - $ref: '#/components/schemas/ImageMetadataBBox' - type: 'null' description: Bounding box of the image on its page type: object required: - index - filename title: ImageMetadata description: Metadata for a single extracted image. ImageMetadataBBox: properties: x: type: integer title: X description: X coordinate of the bounding box y: type: integer title: Y description: Y coordinate of the bounding box w: type: integer title: W description: Width of the bounding box h: type: integer title: H description: Height of the bounding box type: object required: - x - y - w - h title: ImageMetadataBBox description: Bounding box for an image on its page. ImagesContentMetadata: properties: total_count: type: integer title: Total Count description: Total number of extracted images images: items: $ref: '#/components/schemas/ImageMetadata' type: array title: Images description: List of image metadata with presigned URLs type: object required: - total_count - images title: ImagesContentMetadata description: Metadata for all extracted images. IndexCreateRequest: properties: source_directory_id: type: string title: Source Directory Id description: ID of the source directory containing your documents. examples: - dir-abc123 name: anyOf: - type: string - type: 'null' title: Name description: Optional display name for the index. If omitted, the index is named after the source directory. products: anyOf: - items: $ref: '#/components/schemas/IndexProductEntry' type: array - type: 'null' title: Products description: Product configurations for syncing. Omit to use a default parse configuration. Include an explicit entry per product type (e.g. parse, extract) to override the default. examples: - - product_config_id: cfg-abc123 product_type: parse description: anyOf: - type: string - type: 'null' title: Description description: Optional description of the index. sync_frequency: type: string title: Sync Frequency description: 'How often to re-run the sync. One of: manual, daily, on_source_change. Defaults to manual.' default: manual examples: - manual store_attachments: anyOf: - items: type: string type: array - type: 'null' title: Store Attachments description: 'Attachment kinds to store alongside parsed output. Each entry must be one of: screenshots, items. For example, [''screenshots''] renders and stores per-page screenshots; [''items''] stores structured items with bounding boxes. Omit or pass an empty list to skip attachments.' examples: - - screenshots vector_target: type: string enum: - DEFAULT - DISABLED title: Vector Target description: Vector export destination for the index. 'DEFAULT' exports to the managed vector DB destination resolved from configuration. 'DISABLED' skips vector export — the export destination falls back to 'Download'. default: DEFAULT examples: - DEFAULT - DISABLED type: object required: - source_directory_id title: IndexCreateRequest description: 'Create a searchable index over a source directory. Sets up the full pipeline — parsing, chunking, embedding, and vector store indexing — in a single call. An initial sync is triggered automatically. Omit ``products`` to use a default parse configuration (cost-effective tier). Pass it explicitly to control parsing or to include extraction.' IndexMetadata: properties: {} additionalProperties: true type: object title: IndexMetadata description: Build state and diagnostic info about an index. Schema is open; only `status` of `ready` or `failed` is stable. IndexProductEntry: properties: product_type: type: string title: Product Type description: 'Product type. One of: parse, extract.' examples: - parse product_config_id: type: string title: Product Config Id description: ID of the product configuration. type: object required: - product_type - product_config_id title: IndexProductEntry description: 'A product configuration to include in an index''s sync. Structurally mirrors ``directory_sync.SyncProductEntryRequest`` but is a distinct class so the Index API surface stays SDK-gen-isolated from directory-sync internals. Translation between the two happens in ``index/api_utils.py``.' IndexQueryResponse: properties: items: items: $ref: '#/components/schemas/IndexResponse' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: IndexQueryResponse description: Paginated list of indexes. IndexResponse: properties: id: anyOf: - type: string - type: string format: uuid title: Id description: Unique identifier created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime name: type: string title: Name description: Index name. description: anyOf: - type: string - type: 'null' title: Description description: Index description. project_id: type: string title: Project Id description: Project this index belongs to. source_directory_id: type: string title: Source Directory Id description: ID of the source directory. sync_config_id: type: string title: Sync Config Id description: ID of the sync configuration. export_config_id: type: string title: Export Config Id description: ID of the export configuration. last_synced_at: anyOf: - type: string format: date-time - type: 'null' title: Last Synced At description: Last sync time. last_exported_at: anyOf: - type: string format: date-time - type: 'null' title: Last Exported At description: Last export time. metadata: $ref: '#/components/schemas/IndexMetadata' description: Build state and diagnostic info. type: object required: - id - name - project_id - source_directory_id - sync_config_id - export_config_id title: IndexResponse description: A searchable index over a directory of documents. IngestionErrorResponse: properties: job_id: type: string format: uuid title: Job Id description: ID of the job that failed. message: type: string title: Message description: List of errors that occurred during ingestion. step: $ref: '#/components/schemas/JobNameMapping' description: Name of the job that failed. type: object required: - job_id - message - step title: IngestionErrorResponse InputMessage: properties: id: type: string format: uuid title: Id description: ID of the message, if any. a UUID. role: $ref: '#/components/schemas/MessageRole' content: type: string title: Content data: anyOf: - additionalProperties: true type: object - type: 'null' title: Data description: Additional data to be stored with the message. class_name: type: string title: Class Name default: base_component type: object required: - role - content title: InputMessage description: This is distinct from a ChatMessage because this schema is enforced by the AI Chat library used in the frontend ItemProcessingResultsResponse: properties: item_id: type: string title: Item Id description: ID of the source item item_name: type: string title: Item Name description: Name of the source item processing_results: items: $ref: '#/components/schemas/ProcessingResult' type: array title: Processing Results description: List of all processing operations performed on this item type: object required: - item_id - item_name title: ItemProcessingResultsResponse description: Response containing all processing results for an item. JobDataPoint: properties: id: type: string title: Id description: Job ID. examples: - pjb-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee project_id: type: string title: Project Id description: Project ID. examples: - 11111111-1111-1111-1111-111111111111 created_at: type: string format: date-time title: Created At description: Created timestamp. updated_at: type: string format: date-time title: Updated At description: Updated timestamp. status: type: string title: Status description: Job status. examples: - completed custom_tag: type: string title: Custom Tag description: Custom tag. examples: - premium - default error_message: anyOf: - type: string - type: 'null' title: Error Message description: Error message, if any. examples: - Failed to process file. state_transitions: $ref: '#/components/schemas/JobStateTransitions' description: Job state transition timestamps. type: object required: - id - project_id - created_at - updated_at - status - custom_tag title: JobDataPoint description: A job data point. examples: - created_at: '2026-04-29T18:00:00Z' custom_tag: premium id: pjb-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee project_id: 11111111-1111-1111-1111-111111111111 state_transitions: completed_at: '2026-04-29T18:01:00Z' pending_at: '2026-04-29T18:00:00Z' running_at: '2026-04-29T18:00:05Z' status: completed updated_at: '2026-04-29T18:01:00Z' JobDataPointResponse: properties: items: items: $ref: '#/components/schemas/JobDataPoint' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: JobDataPointResponse description: Paginated list of job data points. JobMetadata: properties: total_input_tokens: anyOf: - type: integer - type: 'null' title: Total Input Tokens total_output_tokens: anyOf: - type: integer - type: 'null' title: Total Output Tokens turns: type: integer title: Turns default: 1 duration_ms: type: number title: Duration Ms default: 0.0 is_error: type: boolean title: Is Error default: false error: anyOf: - type: string - type: 'null' title: Error export_config_ids: anyOf: - items: type: string type: array - type: 'null' title: Export Config Ids type: object title: JobMetadata JobNameMapping: type: string enum: - MANAGED_INGESTION - DATA_SOURCE - FILE_UPDATER - PARSE - TRANSFORM - INGESTION - METADATA_UPDATE title: JobNameMapping description: Enum for mapping original job names to readable names. JobStateTransitions: properties: pending_at: anyOf: - type: string format: date-time - type: 'null' title: Pending At examples: - '2026-04-29T18:00:00Z' throttled_at: anyOf: - type: string format: date-time - type: 'null' title: Throttled At examples: - '2026-04-29T18:00:02Z' running_at: anyOf: - type: string format: date-time - type: 'null' title: Running At examples: - '2026-04-29T18:00:05Z' completed_at: anyOf: - type: string format: date-time - type: 'null' title: Completed At examples: - '2026-04-29T18:01:00Z' failed_at: anyOf: - type: string format: date-time - type: 'null' title: Failed At examples: - '2026-04-29T18:01:00Z' cancelled_at: anyOf: - type: string format: date-time - type: 'null' title: Cancelled At examples: - '2026-04-29T18:01:00Z' type: object title: JobStateTransitions description: 'State transition timestamps extracted from job_metadata. Matches the shape of StateTransitionTimestamps from job_latency/base.py with the addition of pending_at. The frontend derives queue latency, processing time, etc. from these timestamps.' LLMParameters: properties: model_name: $ref: '#/components/schemas/SupportedLLMModelNames' description: The name of the model to use for LLM completions. default: GPT_4_1_MINI system_prompt: anyOf: - type: string maxLength: 3000 - type: 'null' title: System Prompt description: The system prompt to use for the completion. temperature: anyOf: - type: number - type: 'null' title: Temperature description: The temperature value for the model. default: 0.1 use_chain_of_thought_reasoning: anyOf: - type: boolean - type: 'null' title: Use Chain Of Thought Reasoning description: Whether to use chain of thought reasoning. use_citation: anyOf: - type: boolean - type: 'null' title: Use Citation description: Whether to show citations in the response. default: true class_name: type: string title: Class Name default: base_component type: object title: LLMParameters LinkItem: properties: type: type: string const: link title: Type description: Link item type default: link md: type: string title: Md description: Markdown representation preserving formatting bbox: anyOf: - items: $ref: '#/components/schemas/BBox' type: array - type: 'null' title: Bbox description: List of bounding boxes url: type: string title: Url description: URL of the link text: type: string title: Text description: Display text of the link type: object required: - md - url - text title: LinkItem ListItem: properties: type: type: string const: list title: Type description: List item type default: list md: type: string title: Md description: Markdown representation preserving formatting bbox: anyOf: - items: $ref: '#/components/schemas/BBox' type: array - type: 'null' title: Bbox description: List of bounding boxes items: items: anyOf: - $ref: '#/components/schemas/TextItem' - $ref: '#/components/schemas/ListItem' type: array title: Items description: List of nested text or list items ordered: type: boolean title: Ordered description: Whether the list is ordered or unordered type: object required: - md - items - ordered title: ListItem LlamaParseAgenticOptions: properties: custom_prompt: anyOf: - type: string - type: 'null' title: Custom Prompt description: 'Custom instructions for the AI parser. Use to guide extraction behavior, specify output formatting, or provide domain-specific context. Example: ''Extract financial tables with currency symbols. Format dates as YYYY-MM-DD.''' additionalProperties: false type: object title: LlamaParseAgenticOptions description: 'Options for AI-powered parsing tiers (cost_effective, agentic, agentic_plus). These options customize how the AI processes and interprets document content. Only applicable when using non-fast tiers.' LlamaParseCostOptimizerParameters: properties: enable: anyOf: - type: boolean - type: 'null' title: Enable description: 'Enable cost-optimized parsing. Routes simpler pages to faster processing while complex pages use full AI analysis. May reduce speed on some documents. IMPORTANT: Only available with ''agentic'' or ''agentic_plus'' tiers' additionalProperties: false type: object title: LlamaParseCostOptimizerParameters description: 'Cost optimizer configuration for reducing parsing costs on simpler pages. When enabled, the parser analyzes each page and routes simpler pages to faster, cheaper processing while preserving quality for complex pages. Only works with ''agentic'' or ''agentic_plus'' tiers.' LlamaParseCropBox: properties: bottom: anyOf: - type: number maximum: 1.0 minimum: 0.0 - type: 'null' title: Bottom description: Bottom boundary as ratio (0-1). 0=top edge, 1=bottom edge. Content below this line is excluded left: anyOf: - type: number maximum: 1.0 minimum: 0.0 - type: 'null' title: Left description: Left boundary as ratio (0-1). 0=left edge, 1=right edge. Content left of this line is excluded right: anyOf: - type: number maximum: 1.0 minimum: 0.0 - type: 'null' title: Right description: Right boundary as ratio (0-1). 0=left edge, 1=right edge. Content right of this line is excluded top: anyOf: - type: number maximum: 1.0 minimum: 0.0 - type: 'null' title: Top description: Top boundary as ratio (0-1). 0=top edge, 1=bottom edge. Content above this line is excluded additionalProperties: false type: object title: LlamaParseCropBox description: 'Crop box boundaries for processing only a portion of each page. All values are ratios from 0 to 1, where (0,0) is the top-left corner and (1,1) is the bottom-right corner. For example, to process only the top half of each page, set bottom=0.5 (keeping top=0, left=0, right=1).' LlamaParseFastOptions: properties: {} additionalProperties: false type: object title: LlamaParseFastOptions description: 'Options for fast tier parsing (rule-based, no AI). Fast tier uses deterministic algorithms for text extraction without AI enhancement. It''s the fastest and most cost-effective option, best suited for simple documents with standard layouts. Currently has no configurable options but reserved for future expansion.' LlamaParseHtmlOptions: properties: make_all_elements_visible: anyOf: - type: boolean - type: 'null' title: Make All Elements Visible description: Force all HTML elements to be visible by overriding CSS display/visibility properties. Useful for parsing pages with hidden content or collapsed sections remove_fixed_elements: anyOf: - type: boolean - type: 'null' title: Remove Fixed Elements description: Remove fixed-position elements (headers, footers, floating buttons) that appear on every page render remove_navigation_elements: anyOf: - type: boolean - type: 'null' title: Remove Navigation Elements description: Remove navigation elements (nav bars, sidebars, menus) to focus on main content additionalProperties: false type: object title: LlamaParseHtmlOptions description: HTML/web page parsing options. LlamaParseIgnoreOptions: properties: ignore_diagonal_text: anyOf: - type: boolean - type: 'null' title: Ignore Diagonal Text description: Skip text rotated at an angle (not horizontal/vertical). Useful for ignoring watermarks or decorative angled text ignore_text_in_image: anyOf: - type: boolean - type: 'null' title: Ignore Text In Image description: Skip OCR text extraction from embedded images. Use when images contain irrelevant text (watermarks, logos) that shouldn't be in the output ignore_hidden_text: anyOf: - type: boolean - type: 'null' title: Ignore Hidden Text description: Skip text marked as hidden in the document structure. Some PDFs contain invisible text layers used for accessibility or search indexing additionalProperties: false type: object title: LlamaParseIgnoreOptions description: Options for ignoring specific types of text during extraction. LlamaParseInputOptions: properties: html: $ref: '#/components/schemas/LlamaParseHtmlOptions' description: HTML/web page parsing options (applies to .html, .htm files) pdf: $ref: '#/components/schemas/LlamaParsePdfOptions' description: PDF-specific parsing options (applies to .pdf files) spreadsheet: $ref: '#/components/schemas/LlamaParseSpreadsheetOptions' description: Spreadsheet parsing options (applies to .xlsx, .xls, .csv, .ods files) presentation: $ref: '#/components/schemas/LlamaParsePresentationOptions' description: Presentation parsing options (applies to .pptx, .ppt, .odp, .key files) additionalProperties: false type: object title: LlamaParseInputOptions description: 'Input format-specific parsing options. These options only apply when parsing documents of the corresponding format. LlamaParse automatically detects the input format based on file extension and content.' LlamaParseJobFailureConditions: properties: allowed_page_failure_ratio: anyOf: - type: number maximum: 1.0 exclusiveMinimum: 0.0 - type: 'null' title: Allowed Page Failure Ratio description: 'Maximum ratio of pages allowed to fail before the job fails (0-1). Example: 0.1 means job fails if more than 10% of pages fail. Default is 0.05 (5%)' fail_on_image_extraction_error: anyOf: - type: boolean - type: 'null' title: Fail On Image Extraction Error description: Fail the entire job if any embedded image cannot be extracted. By default, image extraction errors are logged but don't fail the job fail_on_image_ocr_error: anyOf: - type: boolean - type: 'null' title: Fail On Image Ocr Error description: Fail the entire job if OCR fails on any image. By default, OCR errors result in empty text for that image fail_on_markdown_reconstruction_error: anyOf: - type: boolean - type: 'null' title: Fail On Markdown Reconstruction Error description: Fail the entire job if markdown cannot be reconstructed for any page. By default, failed pages use fallback text extraction fail_on_buggy_font: anyOf: - type: boolean - type: 'null' title: Fail On Buggy Font description: Fail the job if a problematic font is detected that may cause incorrect text extraction. Buggy fonts can produce garbled or missing characters additionalProperties: false type: object title: LlamaParseJobFailureConditions description: 'Conditions that determine when a parsing job should fail vs complete with partial results. By default, jobs complete successfully even if some pages fail to parse. Use these settings to enforce stricter quality requirements.' LlamaParseMarkdownOptions: properties: annotate_links: anyOf: - type: boolean - type: 'null' title: Annotate Links description: Add link annotations to markdown output in the format [text](url). When false, only the link text is included tables: $ref: '#/components/schemas/LlamaParseTables' description: Table formatting options including markdown vs HTML format and merging behavior inline_images: anyOf: - type: boolean - type: 'null' title: Inline Images description: Embed images directly in markdown as base64 data URIs instead of extracting them as separate files. Useful for self-contained markdown output additionalProperties: false type: object title: LlamaParseMarkdownOptions description: Markdown output formatting options. LlamaParseOcrParameters: properties: languages: anyOf: - items: $ref: '#/components/schemas/ParserLanguages' type: array - type: 'null' title: Languages description: 'Languages to use for OCR text recognition. Specify multiple languages if document contains mixed-language content. Order matters - put primary language first. Example: [''en'', ''es''] for English with Spanish' additionalProperties: false type: object title: LlamaParseOcrParameters description: OCR (Optical Character Recognition) configuration parameters. LlamaParseOutputOptions: properties: markdown: $ref: '#/components/schemas/LlamaParseMarkdownOptions' description: Markdown formatting options including table styles and link annotations spatial_text: $ref: '#/components/schemas/LlamaParseSpatialTextOptions' description: Spatial text output options for preserving document layout structure tables_as_spreadsheet: $ref: '#/components/schemas/LlamaParseTablesAsSpreadsheetOptions' description: Options for exporting tables as XLSX spreadsheets extract_printed_page_number: anyOf: - type: boolean - type: 'null' title: Extract Printed Page Number description: Extract the printed page number as it appears in the document (e.g., 'Page 5 of 10', 'v', 'A-3'). Useful for referencing original page numbers images_to_save: items: type: string enum: - screenshot - embedded - layout type: array title: Images To Save description: 'Image categories to extract and save. Options: ''screenshot'' (full page renders useful for visual QA), ''embedded'' (images found within the document), ''layout'' (cropped regions from layout detection like figures and diagrams). Empty list saves no images' additional_outputs: items: type: string type: array title: Additional Outputs description: 'Optional additional output artifacts to save alongside the primary parse output. Each value opts in to generating and persisting one extra file; the empty list (default) saves none. The three accepted values are: ''stripped_md'' — per-page markdown stripped of formatting (links, bold/italic, images, HTML), saved as JSON for full-text-search indexing; fetch via `expand=stripped_markdown_content_metadata`. ''concatenated_stripped_txt'' — all stripped pages concatenated into a single plain-text file with `\n\n---\n\n` between pages, useful for feeding the document into search or embedding pipelines as one blob; fetch via `expand=concatenated_stripped_markdown_content_metadata`. ''word_bbox'' — raw word-level bounding boxes (one JSON object per word, with page number and x/y/w/h coordinates) saved as JSONL, useful for highlighting or grounding extracted answers back to the source document; fetch via `expand=raw_words_content_metadata`.' examples: - - stripped_md - concatenated_stripped_txt - word_bbox granular_bboxes: items: type: string enum: - cell - line - word type: array title: Granular Bboxes description: Bounding-box granularity levels to compute for the parse. 'word' computes one bounding box per detected word; 'line' computes one per text line; 'cell' computes one per table cell. Multiple levels can be requested. Empty list (default) disables granular bboxes — only item-level layout boxes are returned on the result. When set, the computed boxes are not inlined on the result items; they are written to a separate `grounded_items` sidecar (JSONL, one row per page) and exposed as `result_content_metadata.grounded_items` (a presigned download URL) on the parse result. Each row matches the `GroundedJsonItem` shape. examples: - - word - line - cell additionalProperties: false type: object title: LlamaParseOutputOptions description: 'Output formatting and content extraction options. Controls how parsed content is formatted and what additional data is extracted.' LlamaParsePageRanges: properties: max_pages: anyOf: - type: integer minimum: 1.0 - type: 'null' title: Max Pages description: Maximum number of pages to process. Pages are processed in order starting from page 1. If both max_pages and target_pages are set, target_pages takes precedence target_pages: anyOf: - type: string - type: 'null' title: Target Pages description: 'Comma-separated list of specific pages to process using 1-based indexing. Supports individual pages and ranges. Examples: ''1,3,5'' (pages 1, 3, 5), ''1-5'' (pages 1 through 5 inclusive), ''1,3,5-8,10'' (pages 1, 3, 5-8, and 10). Pages are sorted and deduplicated automatically. Duplicate pages cause an error' additionalProperties: false type: object title: LlamaParsePageRanges description: Page selection options for processing specific pages or limiting page count. LlamaParseParameters: properties: webhook_configurations: anyOf: - items: $ref: '#/components/schemas/WebhookConfiguration' type: array - type: 'null' title: Webhook Configurations description: Outbound webhook endpoints to notify on job status changes priority: anyOf: - type: string enum: - low - medium - high - critical - type: 'null' title: Priority description: The priority for the request. This field may be ignored or overwritten depending on the organization tier. languages: items: $ref: '#/components/schemas/ParserLanguages' type: array minItems: 1 title: Languages parsing_instruction: anyOf: - type: string - type: 'null' title: Parsing Instruction default: '' disable_ocr: anyOf: - type: boolean - type: 'null' title: Disable Ocr default: false annotate_links: anyOf: - type: boolean - type: 'null' title: Annotate Links default: false adaptive_long_table: anyOf: - type: boolean - type: 'null' title: Adaptive Long Table default: false compact_markdown_table: anyOf: - type: boolean - type: 'null' title: Compact Markdown Table default: false disable_reconstruction: anyOf: - type: boolean - type: 'null' title: Disable Reconstruction default: false disable_image_extraction: anyOf: - type: boolean - type: 'null' title: Disable Image Extraction default: false invalidate_cache: anyOf: - type: boolean - type: 'null' title: Invalidate Cache default: false outlined_table_extraction: anyOf: - type: boolean - type: 'null' title: Outlined Table Extraction default: false aggressive_table_extraction: anyOf: - type: boolean - type: 'null' title: Aggressive Table Extraction default: false merge_tables_across_pages_in_markdown: anyOf: - type: boolean - type: 'null' title: Merge Tables Across Pages In Markdown default: false output_pdf_of_document: anyOf: - type: boolean - type: 'null' title: Output Pdf Of Document default: false do_not_cache: anyOf: - type: boolean - type: 'null' title: Do Not Cache default: false fast_mode: anyOf: - type: boolean - type: 'null' title: Fast Mode default: false skip_diagonal_text: anyOf: - type: boolean - type: 'null' title: Skip Diagonal Text default: false preserve_layout_alignment_across_pages: anyOf: - type: boolean - type: 'null' title: Preserve Layout Alignment Across Pages default: false preserve_very_small_text: anyOf: - type: boolean - type: 'null' title: Preserve Very Small Text default: false gpt4o_mode: anyOf: - type: boolean - type: 'null' title: Gpt4O Mode default: false gpt4o_api_key: anyOf: - type: string - type: 'null' title: Gpt4O Api Key do_not_unroll_columns: anyOf: - type: boolean - type: 'null' title: Do Not Unroll Columns default: false extract_layout: anyOf: - type: boolean - type: 'null' title: Extract Layout default: false high_res_ocr: anyOf: - type: boolean - type: 'null' title: High Res Ocr default: false html_make_all_elements_visible: anyOf: - type: boolean - type: 'null' title: Html Make All Elements Visible default: false layout_aware: anyOf: - type: boolean - type: 'null' title: Layout Aware default: false specialized_chart_parsing_agentic: anyOf: - type: boolean - type: 'null' title: Specialized Chart Parsing Agentic default: false specialized_chart_parsing_plus: anyOf: - type: boolean - type: 'null' title: Specialized Chart Parsing Plus default: false specialized_chart_parsing_efficient: anyOf: - type: boolean - type: 'null' title: Specialized Chart Parsing Efficient default: false specialized_image_parsing: anyOf: - type: boolean - type: 'null' title: Specialized Image Parsing default: false precise_bounding_box: anyOf: - type: boolean - type: 'null' title: Precise Bounding Box default: false line_level_bounding_box: anyOf: - type: boolean - type: 'null' title: Line Level Bounding Box default: false html_remove_navigation_elements: anyOf: - type: boolean - type: 'null' title: Html Remove Navigation Elements default: false html_remove_fixed_elements: anyOf: - type: boolean - type: 'null' title: Html Remove Fixed Elements default: false guess_xlsx_sheet_name: anyOf: - type: boolean - type: 'null' title: Guess Xlsx Sheet Name default: false page_separator: anyOf: - type: string - type: 'null' title: Page Separator bounding_box: anyOf: - type: string - type: 'null' title: Bounding Box bbox_top: anyOf: - type: number - type: 'null' title: Bbox Top bbox_right: anyOf: - type: number - type: 'null' title: Bbox Right bbox_bottom: anyOf: - type: number - type: 'null' title: Bbox Bottom bbox_left: anyOf: - type: number - type: 'null' title: Bbox Left target_pages: anyOf: - type: string - type: 'null' title: Target Pages use_vendor_multimodal_model: anyOf: - type: boolean - type: 'null' title: Use Vendor Multimodal Model default: false vendor_multimodal_model_name: anyOf: - type: string - type: 'null' title: Vendor Multimodal Model Name model: anyOf: - type: string - type: 'null' title: Model vendor_multimodal_api_key: anyOf: - type: string - type: 'null' title: Vendor Multimodal Api Key page_prefix: anyOf: - type: string - type: 'null' title: Page Prefix page_suffix: anyOf: - type: string - type: 'null' title: Page Suffix webhook_url: anyOf: - type: string - type: 'null' title: Webhook Url preset: anyOf: - type: string - type: 'null' title: Preset take_screenshot: anyOf: - type: boolean - type: 'null' title: Take Screenshot default: false is_formatting_instruction: anyOf: - type: boolean - type: 'null' title: Is Formatting Instruction default: true premium_mode: anyOf: - type: boolean - type: 'null' title: Premium Mode default: false continuous_mode: anyOf: - type: boolean - type: 'null' title: Continuous Mode default: false input_s3_path: anyOf: - type: string - type: 'null' title: Input S3 Path input_s3_region: anyOf: - type: string - type: 'null' title: Input S3 Region output_s3_path_prefix: anyOf: - type: string - type: 'null' title: Output S3 Path Prefix output_s3_region: anyOf: - type: string - type: 'null' title: Output S3 Region project_id: anyOf: - type: string - type: 'null' title: Project Id azure_openai_deployment_name: anyOf: - type: string - type: 'null' title: Azure Openai Deployment Name azure_openai_endpoint: anyOf: - type: string - type: 'null' title: Azure Openai Endpoint azure_openai_api_version: anyOf: - type: string - type: 'null' title: Azure Openai Api Version azure_openai_key: anyOf: - type: string - type: 'null' title: Azure Openai Key input_url: anyOf: - type: string - type: 'null' title: Input Url http_proxy: anyOf: - type: string - type: 'null' title: Http Proxy auto_mode: anyOf: - type: boolean - type: 'null' title: Auto Mode default: false auto_mode_trigger_on_regexp_in_page: anyOf: - type: string - type: 'null' title: Auto Mode Trigger On Regexp In Page auto_mode_trigger_on_text_in_page: anyOf: - type: string - type: 'null' title: Auto Mode Trigger On Text In Page auto_mode_trigger_on_table_in_page: anyOf: - type: boolean - type: 'null' title: Auto Mode Trigger On Table In Page default: false auto_mode_trigger_on_image_in_page: anyOf: - type: boolean - type: 'null' title: Auto Mode Trigger On Image In Page default: false auto_mode_configuration_json: anyOf: - type: string - type: 'null' title: Auto Mode Configuration Json structured_output: anyOf: - type: boolean - type: 'null' title: Structured Output default: false structured_output_json_schema: anyOf: - type: string - type: 'null' title: Structured Output Json Schema structured_output_json_schema_name: anyOf: - type: string - type: 'null' title: Structured Output Json Schema Name max_pages: anyOf: - type: integer - type: 'null' title: Max Pages max_pages_enforced: anyOf: - type: integer - type: 'null' title: Max Pages Enforced extract_charts: anyOf: - type: boolean - type: 'null' title: Extract Charts default: false formatting_instruction: anyOf: - type: string - type: 'null' title: Formatting Instruction complemental_formatting_instruction: anyOf: - type: string - type: 'null' title: Complemental Formatting Instruction content_guideline_instruction: anyOf: - type: string - type: 'null' title: Content Guideline Instruction spreadsheet_extract_sub_tables: anyOf: - type: boolean - type: 'null' title: Spreadsheet Extract Sub Tables default: false spreadsheet_force_formula_computation: anyOf: - type: boolean - type: 'null' title: Spreadsheet Force Formula Computation default: false spreadsheet_include_hidden_sheets: anyOf: - type: boolean - type: 'null' title: Spreadsheet Include Hidden Sheets default: false inline_images_in_markdown: anyOf: - type: boolean - type: 'null' title: Inline Images In Markdown default: false job_timeout_in_seconds: anyOf: - type: number - type: 'null' title: Job Timeout In Seconds job_timeout_extra_time_per_page_in_seconds: anyOf: - type: number - type: 'null' title: Job Timeout Extra Time Per Page In Seconds strict_mode_image_extraction: anyOf: - type: boolean - type: 'null' title: Strict Mode Image Extraction default: false strict_mode_image_ocr: anyOf: - type: boolean - type: 'null' title: Strict Mode Image Ocr default: false strict_mode_reconstruction: anyOf: - type: boolean - type: 'null' title: Strict Mode Reconstruction default: false strict_mode_buggy_font: anyOf: - type: boolean - type: 'null' title: Strict Mode Buggy Font default: false save_images: anyOf: - type: boolean - type: 'null' title: Save Images default: true images_to_save: anyOf: - items: type: string enum: - screenshot - embedded - layout type: array - type: 'null' title: Images To Save hide_headers: anyOf: - type: boolean - type: 'null' title: Hide Headers default: false hide_footers: anyOf: - type: boolean - type: 'null' title: Hide Footers default: false page_header_prefix: anyOf: - type: string - type: 'null' title: Page Header Prefix page_header_suffix: anyOf: - type: string - type: 'null' title: Page Header Suffix page_footer_prefix: anyOf: - type: string - type: 'null' title: Page Footer Prefix page_footer_suffix: anyOf: - type: string - type: 'null' title: Page Footer Suffix remove_hidden_text: anyOf: - type: boolean - type: 'null' title: Remove Hidden Text default: false keep_page_separator_when_merging_tables: anyOf: - type: boolean - type: 'null' title: Keep Page Separator When Merging Tables default: false ignore_document_elements_for_layout_detection: anyOf: - type: boolean - type: 'null' title: Ignore Document Elements For Layout Detection default: false output_tables_as_HTML: anyOf: - type: boolean - type: 'null' title: Output Tables As Html default: false internal_is_screenshot_job: anyOf: - type: boolean - type: 'null' title: Internal Is Screenshot Job default: false parse_mode: anyOf: - $ref: '#/components/schemas/ParsingMode' - type: 'null' system_prompt: anyOf: - type: string - type: 'null' title: System Prompt system_prompt_append: anyOf: - type: string - type: 'null' title: System Prompt Append user_prompt: anyOf: - type: string - type: 'null' title: User Prompt page_error_tolerance: anyOf: - type: number - type: 'null' title: Page Error Tolerance default: 0.05 replace_failed_page_mode: anyOf: - $ref: '#/components/schemas/FailPageMode' - type: 'null' default: raw_text replace_failed_page_with_error_message_prefix: anyOf: - type: string - type: 'null' title: Replace Failed Page With Error Message Prefix replace_failed_page_with_error_message_suffix: anyOf: - type: string - type: 'null' title: Replace Failed Page With Error Message Suffix markdown_table_multiline_header_separator: anyOf: - type: string - type: 'null' title: Markdown Table Multiline Header Separator presentation_out_of_bounds_content: anyOf: - type: boolean - type: 'null' title: Presentation Out Of Bounds Content default: false presentation_skip_embedded_data: anyOf: - type: boolean - type: 'null' title: Presentation Skip Embedded Data default: false tier: anyOf: - type: string - type: 'null' title: Tier version: anyOf: - type: string - type: 'null' title: Version extract_printed_page_number: anyOf: - type: boolean - type: 'null' title: Extract Printed Page Number default: false enable_cost_optimizer: anyOf: - type: boolean - type: 'null' title: Enable Cost Optimizer type: object title: LlamaParseParameters LlamaParsePdfOptions: properties: {} additionalProperties: false type: object title: LlamaParsePdfOptions LlamaParsePresentationOptions: properties: out_of_bounds_content: anyOf: - type: boolean - type: 'null' title: Out Of Bounds Content description: Extract content positioned outside the visible slide area. Some presentations have hidden notes or content that extends beyond slide boundaries skip_embedded_data: anyOf: - type: boolean - type: 'null' title: Skip Embedded Data description: Skip extraction of embedded chart data tables. When true, only the visual representation of charts is captured, not the underlying data additionalProperties: false type: object title: LlamaParsePresentationOptions description: Presentation (PowerPoint, Keynote, ODP) parsing options. LlamaParseProcessingControl: properties: timeouts: $ref: '#/components/schemas/LlamaParseTimeouts' description: Timeout settings for job execution. Increase for large or complex documents job_failure_conditions: $ref: '#/components/schemas/LlamaParseJobFailureConditions' description: Quality thresholds that determine when a job should fail vs complete with partial results additionalProperties: false type: object title: LlamaParseProcessingControl description: Job processing controls for timeouts and failure handling. LlamaParseProcessingOptions: properties: ignore: $ref: '#/components/schemas/LlamaParseIgnoreOptions' description: Options for ignoring specific text types (diagonal, hidden, text in images) ocr_parameters: $ref: '#/components/schemas/LlamaParseOcrParameters' description: OCR configuration including language detection settings aggressive_table_extraction: anyOf: - type: boolean - type: 'null' title: Aggressive Table Extraction description: Use aggressive heuristics to detect table boundaries, even without visible borders. Useful for documents with borderless or complex tables disable_heuristics: anyOf: - type: boolean - type: 'null' title: Disable Heuristics description: Disable automatic heuristics including outlined table extraction and adaptive long table handling. Use when heuristics produce incorrect results specialized_chart_parsing: anyOf: - type: string enum: - agentic_plus - agentic - efficient - type: 'null' title: Specialized Chart Parsing description: 'Enable AI-powered chart analysis. Modes: ''efficient'' (fast, lower cost), ''agentic'' (balanced), ''agentic_plus'' (highest accuracy). Automatically enables extract_layout and precise_bounding_box when set' cost_optimizer: anyOf: - $ref: '#/components/schemas/LlamaParseCostOptimizerParameters' - type: 'null' description: Cost optimization settings. Only available with 'agentic' or 'agentic_plus' tiers auto_mode_configuration: anyOf: - items: $ref: '#/components/schemas/AutoModeConfigurationEntry' type: array - type: 'null' title: Auto Mode Configuration description: Conditional processing rules that apply different parsing options based on page content, document structure, or filename patterns. Each entry defines trigger conditions and the parsing configuration to apply when triggered additionalProperties: false type: object title: LlamaParseProcessingOptions description: 'Processing options shared across all parsing tiers. These options control how documents are analyzed and processed regardless of the selected tier. Some options automatically enable additional behaviors (e.g., specialized_chart_parsing enables extract_layout and precise_bounding_box).' LlamaParseSpatialTextOptions: properties: preserve_layout_alignment_across_pages: anyOf: - type: boolean - type: 'null' title: Preserve Layout Alignment Across Pages description: Maintain consistent text column alignment across page boundaries. Automatically enabled for document-level parsing modes preserve_very_small_text: anyOf: - type: boolean - type: 'null' title: Preserve Very Small Text description: Include text below the normal size threshold. Useful for footnotes, watermarks, or fine print that might otherwise be filtered out do_not_unroll_columns: anyOf: - type: boolean - type: 'null' title: Do Not Unroll Columns description: Keep multi-column layouts intact instead of linearizing columns into sequential text. Automatically enabled for non-fast tiers additionalProperties: false type: object title: LlamaParseSpatialTextOptions description: 'Spatial text output options for preserving document layout. Spatial text maintains the visual positioning of text elements, useful for documents where layout conveys meaning (forms, tables, multi-column layouts).' LlamaParseSpreadsheetOptions: properties: detect_sub_tables_in_sheets: anyOf: - type: boolean - type: 'null' title: Detect Sub Tables In Sheets description: Detect and extract multiple tables within a single sheet. Useful when spreadsheets contain several data regions separated by blank rows/columns force_formula_computation_in_sheets: anyOf: - type: boolean - type: 'null' title: Force Formula Computation In Sheets description: Compute formula results instead of extracting formula text. Use when you need calculated values rather than formula definitions include_hidden_sheets: anyOf: - type: boolean - type: 'null' title: Include Hidden Sheets description: Parse hidden sheets in addition to visible ones. By default, hidden sheets are skipped additionalProperties: false type: object title: LlamaParseSpreadsheetOptions description: Spreadsheet (Excel, CSV, ODS) parsing options. LlamaParseSupportedFileExtensions: type: string enum: - .pdf - .abw - .awt - .cgm - .cwk - .doc - .docm - .docx - .dot - .dotm - .dotx - .fodg - .fodp - .fopd - .fodt - .fb2 - .hwp - .lwp - .mcw - .mw - .mwd - .odf - .odt - .otg - .ott - .pages - .pbd - .psw - .rtf - .sda - .sdd - .sdp - .sdw - .sgl - .std - .stw - .sxd - .sxg - .sxm - .sxw - .uof - .uop - .uot - .vor - .wpd - .wps - .wpt - .wri - .wn - .xml - .zabw - .key - .odp - .odg - .otp - .pot - .potm - .potx - .ppt - .pptm - .pptx - .sti - .sxi - .vsd - .vsdm - .vsdx - .vdx - .bmp - .gif - .heic - .heif - .jpg - .jpeg - .png - .svg - .tif - .tiff - .webp - .htm - .html - .xhtm - .csv - .dbf - .dif - .et - .eth - .fods - .numbers - .ods - .ots - .prn - .qpw - .slk - .stc - .sxc - .sylk - .tsv - .uos1 - .uos2 - .uos - .wb1 - .wb2 - .wb3 - .wk1 - .wk2 - .wk3 - .wk4 - .wks - .wq1 - .wq2 - .xlr - .xls - .xlsb - .xlsm - .xlsx - .xlw - .azw - .azw3 - .azw4 - .cb7 - .cbc - .cbr - .cbz - .chm - .djvu - .epub - .fbz - .htmlz - .lit - .lrf - .md - .mobi - .pdb - .pml - .prc - .rb - .snb - .tcr - .txtz - .m4a - .mp3 - .mp4 - .mpeg - .mpga - .wav - .webm - .yxmd title: LlamaParseSupportedFileExtensions description: Enum for supported file extensions. LlamaParseTables: properties: compact_markdown_tables: anyOf: - type: boolean - type: 'null' title: Compact Markdown Tables description: Remove extra whitespace padding in markdown table cells for more compact output output_tables_as_markdown: anyOf: - type: boolean - type: 'null' title: Output Tables As Markdown description: Output tables as markdown pipe tables instead of HTML <table> tags. Markdown tables are simpler but cannot represent complex structures like merged cells markdown_table_multiline_separator: anyOf: - type: string - type: 'null' title: Markdown Table Multiline Separator description: 'Separator string for multiline cell content in markdown tables. Example: ''<br>'' to preserve line breaks, '' '' to join with spaces' merge_continued_tables: anyOf: - type: boolean - type: 'null' title: Merge Continued Tables description: Automatically merge tables that span multiple pages into a single table. The merged table appears on the first page with merged_from_pages metadata additionalProperties: false type: object title: LlamaParseTables description: Table formatting options for markdown output. LlamaParseTablesAsSpreadsheetOptions: properties: enable: anyOf: - type: boolean - type: 'null' title: Enable description: Whether this option is enabled guess_sheet_name: type: boolean title: Guess Sheet Name description: Automatically generate descriptive sheet names from table context (headers, surrounding text) instead of using generic names like 'Table_1' default: true additionalProperties: false type: object title: LlamaParseTablesAsSpreadsheetOptions description: Options for exporting extracted tables as XLSX spreadsheet files. LlamaParseTimeouts: properties: base_in_seconds: anyOf: - type: integer maximum: 7200.0 exclusiveMinimum: 0.0 - type: 'null' title: Base In Seconds description: Base timeout for the job in seconds (max 7200 = 2 hours). This is the minimum time allowed regardless of document size extra_time_per_page_in_seconds: anyOf: - type: integer maximum: 300.0 exclusiveMinimum: 0.0 - type: 'null' title: Extra Time Per Page In Seconds description: Additional timeout per page in seconds (max 300 = 5 minutes). Total timeout = base + (this value × page count) additionalProperties: false type: object title: LlamaParseTimeouts description: 'Job timeout configuration. Total timeout = base_in_seconds + (extra_time_per_page_in_seconds × page_count). Use these settings for large documents or complex parsing that needs more time.' LlamaParseWebhookConfiguration: properties: webhook_url: anyOf: - type: string pattern: '^https?:' - type: 'null' title: Webhook Url description: HTTPS URL to receive webhook POST requests. Must be publicly accessible webhook_headers: anyOf: - additionalProperties: true type: object - type: 'null' title: Webhook Headers description: 'Custom HTTP headers to include in webhook requests. Use for authentication tokens or custom routing. Example: {''Authorization'': ''Bearer xyz''}' webhook_events: anyOf: - items: type: string type: array - type: 'null' title: Webhook Events description: 'Events that trigger this webhook. Options: ''parse.success'' (job completed), ''parse.error'' (job failed), ''parse.partial_success'' (some pages failed), ''parse.pending'', ''parse.running'', ''parse.cancelled''. If not specified, webhook fires for all events' examples: - - parse.success - parse.error webhook_output_format: anyOf: - type: string enum: - string - json - type: 'null' title: Webhook Output Format description: Format of the webhook payload body. 'string' (default) sends the payload as a JSON-encoded string; 'json' sends it as a JSON object. examples: - json additionalProperties: false type: object title: LlamaParseWebhookConfiguration description: 'Webhook configuration for receiving parsing job notifications. Webhooks are called when specified events occur during job processing. Configure multiple webhook configurations to send to different endpoints.' ManagedIngestionStatus: type: string enum: - NOT_STARTED - IN_PROGRESS - SUCCESS - ERROR - PARTIAL_SUCCESS - CANCELLED title: ManagedIngestionStatus description: Status of managed ingestion with partial Updates. ManagedIngestionStatusResponse: properties: job_id: anyOf: - type: string format: uuid - type: 'null' title: Job Id description: ID of the latest job. deployment_date: anyOf: - type: string format: date-time - type: 'null' title: Deployment Date description: Date of the deployment. status: $ref: '#/components/schemas/ManagedIngestionStatus' description: Status of the ingestion. error: anyOf: - items: $ref: '#/components/schemas/IngestionErrorResponse' type: array - type: 'null' title: Error description: List of errors that occurred during ingestion. effective_at: anyOf: - type: string format: date-time - type: 'null' title: Effective At description: When the status is effective type: object required: - status title: ManagedIngestionStatusResponse ManagedOpenAIEmbedding: properties: model_name: type: string const: openai-text-embedding-3-small title: Model Name description: The name of the OpenAI embedding model. default: openai-text-embedding-3-small embed_batch_size: type: integer maximum: 2048.0 exclusiveMinimum: 0.0 title: Embed Batch Size description: The batch size for embedding calls. default: 10 num_workers: anyOf: - type: integer - type: 'null' title: Num Workers description: The number of workers to use for async embedding calls. class_name: type: string title: Class Name default: ManagedOpenAIEmbedding type: object title: ManagedOpenAIEmbedding ManagedOpenAIEmbeddingConfig: properties: type: type: string const: MANAGED_OPENAI_EMBEDDING title: Type description: Type of the embedding model. default: MANAGED_OPENAI_EMBEDDING component: $ref: '#/components/schemas/ManagedOpenAIEmbedding' description: Configuration for the Managed OpenAI embedding model. type: object title: ManagedOpenAIEmbeddingConfig MarkdownResult: properties: pages: items: anyOf: - $ref: '#/components/schemas/MarkdownResultPage' - $ref: '#/components/schemas/FailedMarkdownPage' type: array title: Pages description: List of markdown pages or failed page entries type: object required: - pages title: MarkdownResult MarkdownResultPage: properties: page_number: type: integer title: Page Number description: Page number of the document markdown: type: string title: Markdown description: Markdown content of the page header: anyOf: - type: string - type: 'null' title: Header description: Header of the page in markdown footer: anyOf: - type: string - type: 'null' title: Footer description: Footer of the page in markdown success: type: boolean const: true title: Success description: Success indicator type: object required: - page_number - markdown - success title: MarkdownResultPage MessageAnnotation: properties: type: type: string title: Type data: type: string contentMediaType: application/json contentSchema: {} title: Data class_name: type: string title: Class Name default: base_component type: object required: - type - data title: MessageAnnotation MessageRole: type: string enum: - system - developer - user - assistant - function - tool - chatbot - model title: MessageRole description: Message role. MetadataDict: additionalProperties: $ref: '#/components/schemas/MetadataValue' type: object MetadataFilter: properties: key: type: string title: Key value: anyOf: - type: integer - type: number - type: string - items: type: string type: array - items: type: number type: array - items: type: integer type: array - type: 'null' title: Value operator: $ref: '#/components/schemas/FilterOperator' default: == type: object required: - key - value title: MetadataFilter description: 'Comprehensive metadata filter for vector stores to support more operators. Value uses Strict types, as int, float and str are compatible types and were all converted to string before. See: https://docs.pydantic.dev/latest/usage/types/#strict-types' MetadataFilters: properties: filters: items: anyOf: - $ref: '#/components/schemas/MetadataFilter' - $ref: '#/components/schemas/MetadataFilters' type: array title: Filters condition: anyOf: - $ref: '#/components/schemas/FilterCondition' - type: 'null' default: and type: object required: - filters title: MetadataFilters description: Metadata filters for vector stores. MetadataListValue: items: type: string type: array MetadataResult: properties: pages: items: $ref: '#/components/schemas/MetadataResultPage' type: array title: Pages description: List of page metadata entries type: object required: - pages title: MetadataResult description: Result containing metadata (page level and general) for the parsed document. MetadataResultPage: properties: page_number: type: integer title: Page Number description: Page number of the document confidence: anyOf: - type: number - type: 'null' title: Confidence description: Confidence score for the page parsing (0-1) speaker_notes: anyOf: - type: string - type: 'null' title: Speaker Notes description: Speaker notes from presentation slides slide_section_name: anyOf: - type: string - type: 'null' title: Slide Section Name description: Section name from presentation slides printed_page_number: anyOf: - type: string - type: 'null' title: Printed Page Number description: Printed page number as it appears in the document original_orientation_angle: anyOf: - type: integer - type: 'null' title: Original Orientation Angle description: Original orientation angle of the page in degrees cost_optimized: anyOf: - type: boolean - type: 'null' title: Cost Optimized description: Whether cost-optimized parsing was used for the page triggered_auto_mode: anyOf: - type: boolean - type: 'null' title: Triggered Auto Mode description: Whether auto mode was triggered for the page type: object required: - page_number title: MetadataResultPage description: Page-level metadata including confidence scores and presentation-specific data. MetadataScalarValue: anyOf: - type: string - type: integer - type: number - type: boolean - type: 'null' MetadataValue: anyOf: - $ref: '#/components/schemas/MetadataScalarValue' - $ref: '#/components/schemas/MetadataListValue' MongoStaticFilters: properties: parsed_directory_file_id: anyOf: - $ref: '#/components/schemas/FilterType_str_' - type: 'null' type: object title: MongoStaticFilters NodeRelationship: type: string enum: - '1' - '2' - '3' - '4' - '5' title: NodeRelationship description: "Node relationships used in `BaseNode` class.\n\nAttributes:\n\ \ SOURCE: The node is the source document.\n PREVIOUS: The node is the\ \ previous node in the document.\n NEXT: The node is the next node in the\ \ document.\n PARENT: The node is the parent node in the document.\n \ \ CHILD: The node is a child node in the document." NoneChunkingConfig: properties: mode: type: string const: none title: Mode default: none type: object title: NoneChunkingConfig NoneSegmentationConfig: properties: mode: type: string const: none title: Mode default: none type: object title: NoneSegmentationConfig ObjectType: type: string enum: - '1' - '2' - '3' - '4' - '5' title: ObjectType OpenAIEmbedding: properties: model_name: type: string title: Model Name description: The name of the OpenAI embedding model. default: text-embedding-ada-002 embed_batch_size: type: integer maximum: 2048.0 exclusiveMinimum: 0.0 title: Embed Batch Size description: The batch size for embedding calls. default: 10 num_workers: anyOf: - type: integer - type: 'null' title: Num Workers description: The number of workers to use for async embedding calls. additional_kwargs: additionalProperties: true type: object title: Additional Kwargs description: Additional kwargs for the OpenAI API. api_key: anyOf: - type: string - type: 'null' title: Api Key description: The OpenAI API key. api_base: anyOf: - type: string - type: 'null' title: Api Base description: The base URL for OpenAI API. default: https://api.openai.com/v1 api_version: anyOf: - type: string - type: 'null' title: Api Version description: The version for OpenAI API. default: '' max_retries: type: integer minimum: 0.0 title: Max Retries description: Maximum number of retries. default: 10 timeout: type: number minimum: 0.0 title: Timeout description: Timeout for each request. default: 60.0 default_headers: anyOf: - additionalProperties: type: string type: object - type: 'null' title: Default Headers description: The default headers for API requests. reuse_client: type: boolean title: Reuse Client description: Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. default: true dimensions: anyOf: - type: integer - type: 'null' title: Dimensions description: The number of dimensions on the output embedding vectors. Works only with v3 embedding models. class_name: type: string title: Class Name default: OpenAIEmbedding type: object title: OpenAIEmbedding OpenAIEmbeddingConfig: properties: type: type: string const: OPENAI_EMBEDDING title: Type description: Type of the embedding model. default: OPENAI_EMBEDDING component: $ref: '#/components/schemas/OpenAIEmbedding' description: Configuration for the OpenAI embedding model. type: object title: OpenAIEmbeddingConfig Organization: properties: id: type: string format: uuid title: Id description: Unique identifier created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime name: type: string maxLength: 3000 minLength: 1 title: Name description: A name for the organization. parse_plan_level: $ref: '#/components/schemas/ParsePlanLevel' description: '[Deprecated] Whether the organization is a Parse Premium customer.' default: DEFAULT stripe_customer_id: anyOf: - type: string - type: 'null' title: Stripe Customer Id description: The Stripe customer ID for the organization. feature_flags: anyOf: - additionalProperties: true type: object - type: 'null' title: Feature Flags description: Feature flags for the organization. type: object required: - id - name title: Organization description: Schema for an organization. OrganizationQueryResponse: properties: items: items: $ref: '#/components/schemas/OrganizationResponse' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: OrganizationQueryResponse description: API query response schema for organizations. OrganizationResponse: properties: id: type: string title: Id description: The organization's unique identifier. created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime name: type: string title: Name description: The organization's display name. type: object required: - id - name title: OrganizationResponse description: API response schema for an organization. PGVectorDistanceMethod: type: string enum: - l2 - ip - cosine - l1 - hamming - jaccard title: PGVectorDistanceMethod description: 'Distance methods for PGVector. Docs: https://github.com/pgvector/pgvector?tab=readme-ov-file#query-options' PGVectorHNSWSettings: properties: ef_construction: type: integer minimum: 1.0 title: Ef Construction description: The number of edges to use during the construction phase. default: 64 ef_search: type: integer minimum: 1.0 title: Ef Search description: The number of edges to use during the search phase. default: 40 m: type: integer minimum: 1.0 title: M description: The number of bi-directional links created for each new element. default: 16 vector_type: $ref: '#/components/schemas/PGVectorVectorType' description: The type of vector to use. default: vector distance_method: $ref: '#/components/schemas/PGVectorDistanceMethod' description: The distance method to use. default: cosine type: object title: PGVectorHNSWSettings description: HNSW settings for PGVector. PGVectorVectorType: type: string enum: - vector - half_vec - bit - sparse_vec title: PGVectorVectorType description: 'Vector storage formats for PGVector. Docs: https://github.com/pgvector/pgvector?tab=readme-ov-file#query-options' PageFigureMetadata: properties: figure_name: type: string title: Figure Name description: The name of the figure file_id: type: string format: uuid title: File Id description: The ID of the file that the figure was taken from page_index: type: integer minimum: 0.0 title: Page Index description: The index of the page for which the figure is taken (0-indexed) figure_size: type: integer minimum: 0.0 title: Figure Size description: The size of the figure in bytes is_likely_noise: type: boolean title: Is Likely Noise description: Whether the figure is likely to be noise default: false confidence: type: number maximum: 1.0 minimum: 0.0 title: Confidence description: The confidence of the figure metadata: anyOf: - additionalProperties: true type: object - type: 'null' title: Metadata description: Metadata for the figure type: object required: - figure_name - file_id - page_index - figure_size - confidence title: PageFigureMetadata PageFigureNodeWithScore: properties: node: $ref: '#/components/schemas/PageFigureMetadata' score: type: number title: Score description: The score of the figure node class_name: type: string title: Class Name default: PageFigureNodeWithScore type: object required: - node - score title: PageFigureNodeWithScore description: Page figure metadata with score PageScreenshotMetadata: properties: page_index: type: integer minimum: 0.0 title: Page Index description: The index of the page for which the screenshot is taken (0-indexed) file_id: type: string format: uuid title: File Id description: The ID of the file that the page screenshot was taken from image_size: type: integer minimum: 0.0 title: Image Size description: The size of the image in bytes metadata: anyOf: - additionalProperties: true type: object - type: 'null' title: Metadata description: Metadata for the screenshot type: object required: - page_index - file_id - image_size title: PageScreenshotMetadata PageScreenshotNodeWithScore: properties: node: $ref: '#/components/schemas/PageScreenshotMetadata' score: type: number title: Score description: The score of the screenshot node class_name: type: string title: Class Name default: NodeWithScore type: object required: - node - score title: PageScreenshotNodeWithScore description: Page screenshot metadata with score PageSegmentationConfig: properties: mode: type: string const: page title: Mode default: page page_separator: type: string title: Page Separator default: ' --- ' type: object title: PageSegmentationConfig PaginatedExtractRunsResponse: properties: items: items: $ref: '#/components/schemas/ExtractRun' type: array title: Items description: The list of extraction runs total: type: integer title: Total description: The total number of extraction runs skip: type: integer title: Skip description: The number of extraction runs skipped limit: type: integer title: Limit description: The maximum number of extraction runs returned type: object required: - items - total - skip - limit title: PaginatedExtractRunsResponse description: Schema for paginated extraction runs response. PaginatedListCloudDocumentsResponse: properties: documents: items: $ref: '#/components/schemas/CloudDocument' type: array title: Documents description: The documents to list limit: type: integer title: Limit description: The limit of the documents offset: type: integer title: Offset description: The offset of the documents total_count: type: integer title: Total Count description: The total number of documents type: object required: - documents - limit - offset - total_count title: PaginatedListCloudDocumentsResponse PaginatedListPipelineFilesResponse: properties: files: items: $ref: '#/components/schemas/PipelineFile' type: array title: Files description: The files to list limit: type: integer title: Limit description: The limit of the files offset: type: integer title: Offset description: The offset of the files total_count: type: integer title: Total Count description: The total number of files type: object required: - files - limit - offset - total_count title: PaginatedListPipelineFilesResponse description: Paginated list of pipeline files. PaginatedResponse_AgentData_: properties: items: items: $ref: '#/components/schemas/AgentData' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: PaginatedResponse[AgentData] PaginatedResponse_AggregateGroup_: properties: items: items: $ref: '#/components/schemas/AggregateGroup' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: PaginatedResponse[AggregateGroup] PaginatedResponse_ClassifyJob_: properties: items: items: $ref: '#/components/schemas/ClassifyJob' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: PaginatedResponse[ClassifyJob] PaginatedResponse_SpreadsheetJob_: properties: items: items: $ref: '#/components/schemas/SpreadsheetJob' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: PaginatedResponse[SpreadsheetJob] ParseConcernItem: properties: type: type: string title: Type description: Type of parse concern (e.g. header_value_type_mismatch, inconsistent_row_cell_count) details: type: string title: Details description: Human-readable details about the concern type: object required: - type - details title: ParseConcernItem ParseJobQueryResponse: properties: items: items: $ref: '#/components/schemas/ParseJobResponse' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: ParseJobQueryResponse description: Response schema for paginated parse job queries. ParseJobResponse: properties: id: type: string title: Id description: Unique parse job identifier examples: - pjb-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime project_id: type: string title: Project Id description: Project this job belongs to examples: - prj-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee status: type: string enum: - PENDING - RUNNING - COMPLETED - FAILED - CANCELLED title: Status description: 'Current job status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED' error_message: anyOf: - type: string - type: 'null' title: Error Message description: Error details when status is FAILED name: anyOf: - type: string - type: 'null' title: Name description: Optional display name for this parse job examples: - Q4 Financial Report tier: anyOf: - type: string - type: 'null' title: Tier description: Parsing tier used for this job examples: - fast - cost_effective - agentic - agentic_plus type: object required: - id - project_id - status title: ParseJobResponse description: A parse job. ParsePlanLevel: type: string enum: - DEFAULT - PREMIUM title: ParsePlanLevel description: Enum for the Parse plan level. ParseRequestConfiguration: properties: tier: type: string enum: - fast - cost_effective - agentic - agentic_plus title: Tier description: 'Parsing tier: ''fast'' (rule-based, cheapest), ''cost_effective'' (balanced), ''agentic'' (AI-powered with custom prompts), or ''agentic_plus'' (premium AI with highest accuracy)' version: anyOf: - type: string enum: - latest - '2026-06-05' - '2026-06-04' - '2025-12-11' x-enum-order-preserved: true - type: string title: Version description: 'Version for the selected tier. Use `latest`, or pin one of that tier''s dated versions. Current `latest` by tier: - `fast`: `2025-12-11` - `cost_effective`: `2026-06-05` - `agentic`: `2026-06-04` - `agentic_plus`: `2026-06-04` Full list: `GET /api/v2/parse/versions`.' client_name: anyOf: - type: string - type: 'null' title: Client Name description: 'Identifier for the client/application making the request. Used for analytics and debugging. Example: ''my-app-v2''' processing_options: $ref: '#/components/schemas/LlamaParseProcessingOptions' description: Document processing options including OCR, table extraction, and chart parsing fast_options: anyOf: - $ref: '#/components/schemas/LlamaParseFastOptions' - type: 'null' description: Fast tier configuration options. Auto-initialized when tier='fast'. Cannot be used with other tiers agentic_options: anyOf: - $ref: '#/components/schemas/LlamaParseAgenticOptions' - type: 'null' description: AI-powered tier configuration (custom prompts). Auto-initialized for cost_effective/agentic/agentic_plus tiers. Cannot be used with fast tier webhook_configurations: items: $ref: '#/components/schemas/LlamaParseWebhookConfiguration' type: array title: Webhook Configurations description: Webhook endpoints for job status notifications. Multiple webhooks can be configured for different events or services input_options: $ref: '#/components/schemas/LlamaParseInputOptions' description: Format-specific options (HTML, PDF, spreadsheet, presentation). Applied based on detected input file type crop_box: $ref: '#/components/schemas/LlamaParseCropBox' description: Crop boundaries to process only a portion of each page. Values are ratios 0-1 from page edges page_ranges: $ref: '#/components/schemas/LlamaParsePageRanges' description: 'Page selection: limit total pages or specify exact pages to process' disable_cache: anyOf: - type: boolean - type: 'null' title: Disable Cache description: Bypass result caching and force re-parsing. Use when document content may have changed or you need fresh results output_options: $ref: '#/components/schemas/LlamaParseOutputOptions' description: Output formatting options for markdown, text, and extracted images processing_control: $ref: '#/components/schemas/LlamaParseProcessingControl' description: Job execution controls including timeouts and failure thresholds file_id: anyOf: - type: string - type: 'null' title: File Id description: ID of an existing file in the project to parse. Mutually exclusive with source_url source_url: anyOf: - type: string pattern: '^https?:' - type: 'null' title: Source Url description: Public URL of the document to parse. Mutually exclusive with file_id http_proxy: anyOf: - type: string pattern: '^https?:' - type: 'null' title: Http Proxy description: HTTP/HTTPS proxy for fetching source_url. Ignored if using file_id additionalProperties: false type: object required: - tier - version title: ParseRequestConfiguration description: 'Unified configuration for parsing with flexible input source. Specify exactly one input source: either an existing file by ID or a URL to fetch. This endpoint consolidates file-based and URL-based parsing into a single interface.' ParseResultResponse: properties: job: $ref: '#/components/schemas/ParseJobResponse' description: Parse job status and metadata result_content_metadata: anyOf: - additionalProperties: $ref: '#/components/schemas/ResultTypeMetadata' type: object - type: 'null' title: Result Content Metadata description: Metadata including size, existence, and presigned URLs for result files text: anyOf: - $ref: '#/components/schemas/TextResult' - type: 'null' description: Plain text result (if requested) markdown: anyOf: - $ref: '#/components/schemas/MarkdownResult' - type: 'null' description: Markdown result (if requested) items: anyOf: - $ref: '#/components/schemas/StructuredResult' - type: 'null' description: Structured JSON result (if requested) metadata: anyOf: - $ref: '#/components/schemas/MetadataResult' - type: 'null' description: Page-level metadata including confidence scores and presentation data (if requested) markdown_full: anyOf: - type: string - type: 'null' title: Markdown Full description: Full raw markdown content (if requested) text_full: anyOf: - type: string - type: 'null' title: Text Full description: Full raw text content (if requested) images_content_metadata: anyOf: - $ref: '#/components/schemas/ImagesContentMetadata' - type: 'null' description: Metadata for all extracted images with presigned URLs (if requested) job_metadata: anyOf: - additionalProperties: true type: object - type: 'null' title: Job Metadata description: Job execution metadata (if requested) raw_parameters: anyOf: - additionalProperties: true type: object - type: 'null' title: Raw Parameters type: object required: - job title: ParseResultResponse description: 'Parse result response with job status and optional content or metadata. The job field is always included. Other fields are included based on expand parameters.' ParseV2Parameters: properties: tier: type: string enum: - fast - cost_effective - agentic - agentic_plus title: Tier description: 'Parsing tier: ''fast'' (rule-based, cheapest), ''cost_effective'' (balanced), ''agentic'' (AI-powered with custom prompts), or ''agentic_plus'' (premium AI with highest accuracy)' version: anyOf: - type: string enum: - latest - '2026-06-05' - '2026-06-04' - '2025-12-11' x-enum-order-preserved: true - type: string title: Version description: 'Version for the selected tier. Use `latest`, or pin one of that tier''s dated versions. Current `latest` by tier: - `fast`: `2025-12-11` - `cost_effective`: `2026-06-05` - `agentic`: `2026-06-04` - `agentic_plus`: `2026-06-04` Full list: `GET /api/v2/parse/versions`.' client_name: anyOf: - type: string - type: 'null' title: Client Name description: 'Identifier for the client/application making the request. Used for analytics and debugging. Example: ''my-app-v2''' processing_options: $ref: '#/components/schemas/LlamaParseProcessingOptions' description: Document processing options including OCR, table extraction, and chart parsing fast_options: anyOf: - $ref: '#/components/schemas/LlamaParseFastOptions' - type: 'null' description: Fast tier configuration options. Auto-initialized when tier='fast'. Cannot be used with other tiers agentic_options: anyOf: - $ref: '#/components/schemas/LlamaParseAgenticOptions' - type: 'null' description: AI-powered tier configuration (custom prompts). Auto-initialized for cost_effective/agentic/agentic_plus tiers. Cannot be used with fast tier webhook_configurations: items: $ref: '#/components/schemas/LlamaParseWebhookConfiguration' type: array title: Webhook Configurations description: Webhook endpoints for job status notifications. Multiple webhooks can be configured for different events or services input_options: $ref: '#/components/schemas/LlamaParseInputOptions' description: Format-specific options (HTML, PDF, spreadsheet, presentation). Applied based on detected input file type crop_box: $ref: '#/components/schemas/LlamaParseCropBox' description: Crop boundaries to process only a portion of each page. Values are ratios 0-1 from page edges page_ranges: $ref: '#/components/schemas/LlamaParsePageRanges' description: 'Page selection: limit total pages or specify exact pages to process' disable_cache: anyOf: - type: boolean - type: 'null' title: Disable Cache description: Bypass result caching and force re-parsing. Use when document content may have changed or you need fresh results output_options: $ref: '#/components/schemas/LlamaParseOutputOptions' description: Output formatting options for markdown, text, and extracted images processing_control: $ref: '#/components/schemas/LlamaParseProcessingControl' description: Job execution controls including timeouts and failure thresholds product_type: type: string const: parse_v2 title: Product Type description: Product type. type: object required: - tier - version - product_type title: ParseV2Parameters description: 'Configuration for LlamaParse v2 document parsing. Includes tier selection, processing options, output formatting, page targeting, and webhook delivery. Refer to the LlamaParse documentation for details on each field.' ParseVersionsResponse: properties: fast: items: type: string enum: - '2025-12-11' x-enum-order-preserved: true type: array title: Fast description: Versions for the fast tier cost_effective: items: type: string enum: - '2026-06-05' - '2026-05-28' - '2026-04-09' - '2026-03-31' - '2026-03-27' - '2026-03-25' x-enum-order-preserved: true type: array title: Cost Effective description: Versions for the cost_effective tier agentic: items: type: string enum: - '2026-06-04' - '2026-06-01' - '2026-05-26' - '2026-05-21' - '2026-05-20' - '2026-05-19' - '2026-05-13' - '2026-05-11' - '2026-05-06' - '2026-05-04' - '2026-04-27' - '2026-04-22' - '2026-04-09' - '2026-04-06' - '2026-04-02' - '2026-03-31' - '2026-03-30' - '2026-03-27' - '2026-03-25' - '2026-03-23' - '2026-03-22' - '2026-03-20' - '2026-03-11' - '2026-03-10' - '2026-03-09' - '2026-03-03' - '2026-03-02' - '2026-02-26' - '2026-02-24' - '2026-01-30' - '2026-01-22' - '2026-01-21' - '2026-01-16' - '2026-01-08' - '2025-12-31' - '2025-12-18' - '2025-12-11' x-enum-order-preserved: true type: array title: Agentic description: Versions for the agentic tier agentic_plus: items: type: string enum: - '2026-06-04' - '2026-06-01' - '2026-05-26' - '2026-05-21' - '2026-05-20' - '2026-05-19' - '2026-05-11' - '2026-05-06' - '2026-05-04' - '2026-05-01' - '2026-04-27' - '2026-04-19' - '2026-04-14' - '2026-04-09' - '2026-04-02' - '2026-03-31' - '2026-03-26' - '2026-03-25' - '2026-03-22' - '2026-03-20' - '2026-03-17' - '2026-03-12' - '2026-03-10' - '2026-03-09' - '2026-03-02' - '2026-02-26' - '2026-02-24' - '2026-01-30' - '2026-01-29' - '2026-01-24' - '2026-01-22' - '2026-01-21' - '2026-01-16' - '2025-12-31' - '2025-12-18' - '2025-12-11' x-enum-order-preserved: true type: array title: Agentic Plus description: Versions for the agentic_plus tier type: object required: - fast - cost_effective - agentic - agentic_plus title: ParseVersionsResponse description: Versions accepted by the parse API, grouped by tier. ParserLanguages: type: string enum: - af - az - bs - cs - cy - da - de - en - es - et - fr - ga - hr - hu - id - is - it - ku - la - lt - lv - mi - ms - mt - nl - 'no' - oc - pi - pl - pt - ro - rs_latin - sk - sl - sq - sv - sw - tl - tr - uz - vi - ar - fa - ug - ur - bn - as - mni - ru - rs_cyrillic - be - bg - uk - mn - abq - ady - kbd - ava - dar - inh - che - lbe - lez - tab - tjk - hi - mr - ne - bh - mai - ang - bho - mah - sck - new - gom - sa - bgc - th - ch_sim - ch_tra - ja - ko - ta - te - kn title: ParserLanguages description: Enum for representing the languages supported by the parser. ParsingJob: properties: id: type: string title: Id description: Unique parse job identifier status: $ref: '#/components/schemas/StatusEnum' description: Current job status error_code: anyOf: - type: string - type: 'null' title: Error Code description: Machine-readable error code when failed error_message: anyOf: - type: string - type: 'null' title: Error Message description: Human-readable error details when failed type: object required: - id - status title: ParsingJob description: A parse job (v1). ParsingJobJsonResult: properties: pages: title: Pages description: Array of per-page objects containing parsed content, images, and layout data job_metadata: title: Job Metadata description: Job metadata including credit usage and page counts type: object required: - pages - job_metadata title: ParsingJobJsonResult description: Parse job result as structured JSON pages. ParsingJobMarkdownResult: properties: markdown: type: string title: Markdown description: Full document content rendered as markdown job_metadata: title: Job Metadata description: Job metadata including credit usage and page counts type: object required: - markdown - job_metadata title: ParsingJobMarkdownResult description: Parse job result in markdown format. ParsingJobStructuredResult: properties: structured: title: Structured description: Extracted structured data conforming to the output_schema provided at job creation job_metadata: title: Job Metadata description: Job metadata including credit usage and page counts type: object required: - structured - job_metadata title: ParsingJobStructuredResult description: Parse job result as structured output (custom schema). ParsingJobTextResult: properties: text: type: string title: Text description: Full document content as plain text (no formatting) job_metadata: title: Job Metadata description: Job metadata including credit usage and page counts type: object required: - text - job_metadata title: ParsingJobTextResult description: Parse job result as plain text. ParsingMode: type: string enum: - parse_page_without_llm - parse_page_with_llm - parse_page_with_lvm - parse_page_with_agent - parse_page_with_layout_agent - parse_document_with_llm - parse_document_with_lvm - parse_document_with_agent title: ParsingMode description: Enum for representing the mode of parsing to be used. PartitionNames: type: string enum: - data_source_id_partition - pipeline_id_partition - eval_dataset_id_partition - file_id_partition - pipeline_file_id_partition - file_parsing_id_partition - extraction_schema_id_partition title: PartitionNames description: Enum for dataset partition names. Pipeline: properties: id: type: string format: uuid title: Id description: Unique identifier created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime name: type: string title: Name project_id: type: string format: uuid title: Project Id embedding_model_config_id: anyOf: - type: string format: uuid - type: 'null' title: Embedding Model Config Id description: The ID of the EmbeddingModelConfig this pipeline is using. embedding_model_config: anyOf: - $ref: '#/components/schemas/EmbeddingModelConfig' - type: 'null' description: The embedding model configuration for this pipeline. pipeline_type: $ref: '#/components/schemas/PipelineType' description: Type of pipeline. Either PLAYGROUND or MANAGED. default: MANAGED managed_pipeline_id: anyOf: - type: string format: uuid - type: 'null' title: Managed Pipeline Id description: The ID of the ManagedPipeline this playground pipeline is linked to. embedding_config: oneOf: - $ref: '#/components/schemas/ManagedOpenAIEmbeddingConfig' - $ref: '#/components/schemas/AzureOpenAIEmbeddingConfig' - $ref: '#/components/schemas/CohereEmbeddingConfig' - $ref: '#/components/schemas/GeminiEmbeddingConfig' - $ref: '#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig' - $ref: '#/components/schemas/OpenAIEmbeddingConfig' - $ref: '#/components/schemas/VertexAIEmbeddingConfig' - $ref: '#/components/schemas/BedrockEmbeddingConfig' title: Embedding Config discriminator: propertyName: type mapping: AZURE_EMBEDDING: '#/components/schemas/AzureOpenAIEmbeddingConfig' BEDROCK_EMBEDDING: '#/components/schemas/BedrockEmbeddingConfig' COHERE_EMBEDDING: '#/components/schemas/CohereEmbeddingConfig' GEMINI_EMBEDDING: '#/components/schemas/GeminiEmbeddingConfig' HUGGINGFACE_API_EMBEDDING: '#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig' MANAGED_OPENAI_EMBEDDING: '#/components/schemas/ManagedOpenAIEmbeddingConfig' OPENAI_EMBEDDING: '#/components/schemas/OpenAIEmbeddingConfig' VERTEXAI_EMBEDDING: '#/components/schemas/VertexAIEmbeddingConfig' sparse_model_config: anyOf: - $ref: '#/components/schemas/SparseModelConfig' - type: 'null' description: Configuration for the sparse model used in hybrid search. config_hash: anyOf: - $ref: '#/components/schemas/PipelineConfigurationHashes' - type: 'null' description: Hashes for the configuration of the pipeline. transform_config: anyOf: - $ref: '#/components/schemas/AutoTransformConfig' - $ref: '#/components/schemas/AdvancedModeTransformConfig' title: Transform Config description: Configuration for the transformation. preset_retrieval_parameters: $ref: '#/components/schemas/PresetRetrievalParams' description: Preset retrieval parameters for the pipeline. llama_parse_parameters: anyOf: - $ref: '#/components/schemas/LlamaParseParameters' - type: 'null' description: Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline. data_sink: anyOf: - $ref: '#/components/schemas/DataSink' - type: 'null' description: The data sink for the pipeline. If None, the pipeline will use the fully managed data sink. status: anyOf: - type: string enum: - CREATED - DELETING - type: 'null' title: Status description: Status of the pipeline. metadata_config: anyOf: - $ref: '#/components/schemas/PipelineMetadataConfig' - type: 'null' description: Metadata configuration for the pipeline. type: object required: - id - name - project_id - embedding_config title: Pipeline description: Schema for a pipeline. PipelineConfigurationHashes: properties: embedding_config_hash: anyOf: - type: string - type: 'null' title: Embedding Config Hash description: Hash of the embedding config. default: '' parsing_config_hash: anyOf: - type: string - type: 'null' title: Parsing Config Hash description: Hash of the llama parse parameters. default: '' transform_config_hash: anyOf: - type: string - type: 'null' title: Transform Config Hash description: Hash of the transform config. default: '' type: object title: PipelineConfigurationHashes description: Hashes for the configuration of a pipeline. PipelineCreate: properties: embedding_config: anyOf: - oneOf: - $ref: '#/components/schemas/AzureOpenAIEmbeddingConfig' - $ref: '#/components/schemas/CohereEmbeddingConfig' - $ref: '#/components/schemas/GeminiEmbeddingConfig' - $ref: '#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig' - $ref: '#/components/schemas/OpenAIEmbeddingConfig' - $ref: '#/components/schemas/VertexAIEmbeddingConfig' - $ref: '#/components/schemas/BedrockEmbeddingConfig' discriminator: propertyName: type mapping: AZURE_EMBEDDING: '#/components/schemas/AzureOpenAIEmbeddingConfig' BEDROCK_EMBEDDING: '#/components/schemas/BedrockEmbeddingConfig' COHERE_EMBEDDING: '#/components/schemas/CohereEmbeddingConfig' GEMINI_EMBEDDING: '#/components/schemas/GeminiEmbeddingConfig' HUGGINGFACE_API_EMBEDDING: '#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig' OPENAI_EMBEDDING: '#/components/schemas/OpenAIEmbeddingConfig' VERTEXAI_EMBEDDING: '#/components/schemas/VertexAIEmbeddingConfig' - type: 'null' title: Embedding Config transform_config: anyOf: - $ref: '#/components/schemas/AutoTransformConfig' - $ref: '#/components/schemas/AdvancedModeTransformConfig' - type: 'null' title: Transform Config description: Configuration for the transformation. sparse_model_config: anyOf: - $ref: '#/components/schemas/SparseModelConfig' - type: 'null' description: Configuration for the sparse model used in hybrid search. data_sink_id: anyOf: - type: string format: uuid - type: 'null' title: Data Sink Id description: Data sink ID. When provided instead of data_sink, the data sink will be looked up by ID. embedding_model_config_id: anyOf: - type: string format: uuid - type: 'null' title: Embedding Model Config Id description: Embedding model config ID. When provided instead of embedding_config, the embedding model config will be looked up by ID. data_sink: anyOf: - $ref: '#/components/schemas/DataSinkCreate' - type: 'null' description: Data sink. When provided instead of data_sink_id, the data sink will be created. preset_retrieval_parameters: $ref: '#/components/schemas/PresetRetrievalParams' description: Preset retrieval parameters for the pipeline. llama_parse_parameters: $ref: '#/components/schemas/LlamaParseParameters' description: Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline. status: anyOf: - type: string - type: 'null' title: Status description: Status of the pipeline deployment. metadata_config: anyOf: - $ref: '#/components/schemas/PipelineMetadataConfig' - type: 'null' description: Metadata configuration for the pipeline. name: type: string maxLength: 3000 minLength: 1 title: Name pipeline_type: $ref: '#/components/schemas/PipelineType' description: Type of pipeline. Either PLAYGROUND or MANAGED. default: MANAGED managed_pipeline_id: anyOf: - type: string format: uuid - type: 'null' title: Managed Pipeline Id description: The ID of the ManagedPipeline this playground pipeline is linked to. type: object required: - name title: PipelineCreate description: Schema for creating a pipeline. PipelineDataSource: properties: id: type: string format: uuid title: Id description: Unique identifier created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime name: type: string title: Name description: The name of the data source. source_type: $ref: '#/components/schemas/ConfigurableDataSourceNames' custom_metadata: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Custom Metadata description: Custom metadata that will be present on all data loaded from the data source component: anyOf: - additionalProperties: true type: object - $ref: '#/components/schemas/CloudS3DataSource' - $ref: '#/components/schemas/CloudAzStorageBlobDataSource' - $ref: '#/components/schemas/CloudGoogleDriveDataSource' - $ref: '#/components/schemas/CloudOneDriveDataSource' - $ref: '#/components/schemas/CloudSharepointDataSource' - $ref: '#/components/schemas/CloudSlackDataSource' - $ref: '#/components/schemas/CloudNotionPageDataSource' - $ref: '#/components/schemas/CloudConfluenceDataSource' - $ref: '#/components/schemas/CloudJiraDataSource' - $ref: '#/components/schemas/CloudJiraDataSourceV2' - $ref: '#/components/schemas/CloudBoxDataSource' title: DataSourceCreateComponent description: Component that implements the data source version_metadata: anyOf: - $ref: '#/components/schemas/DataSourceReaderVersionMetadata' - type: 'null' description: Version metadata for the data source project_id: type: string format: uuid title: Project Id data_source_id: type: string format: uuid title: Data Source Id description: The ID of the data source. pipeline_id: type: string format: uuid title: Pipeline Id description: The ID of the pipeline. last_synced_at: type: string format: date-time title: Last Synced At description: The last time the data source was automatically synced. sync_interval: anyOf: - type: number - type: 'null' title: Sync Interval description: The interval at which the data source should be synced. sync_schedule_set_by: anyOf: - type: string - type: 'null' title: Sync Schedule Set By description: The id of the user who set the sync schedule. status: anyOf: - type: string enum: - NOT_STARTED - IN_PROGRESS - SUCCESS - ERROR - CANCELLED - type: 'null' title: Status description: The status of the data source in the pipeline. status_updated_at: anyOf: - type: string format: date-time - type: 'null' title: Status Updated At description: The last time the status was updated. type: object required: - id - name - source_type - component - project_id - data_source_id - pipeline_id - last_synced_at title: PipelineDataSource description: Schema for a data source in a pipeline. PipelineDataSourceCreate: properties: data_source_id: type: string format: uuid title: Data Source Id description: The ID of the data source. sync_interval: anyOf: - type: number - type: 'null' title: Sync Interval description: 'The interval at which the data source should be synced. Valid values are: 21600, 43200, 86400' type: object required: - data_source_id title: PipelineDataSourceCreate description: Schema for creating an association between a data source and a pipeline. PipelineDataSourceUpdate: properties: sync_interval: anyOf: - type: number - type: 'null' title: Sync Interval description: The interval at which the data source should be synced. type: object title: PipelineDataSourceUpdate description: Schema for updating an association between a data source and a pipeline. PipelineFile: properties: id: type: string format: uuid title: Id description: Unique identifier for the pipeline file. name: anyOf: - type: string - type: 'null' title: Name description: Name of the file. external_file_id: anyOf: - type: string - type: 'null' title: External File Id description: The ID of the file in the external system. file_size: anyOf: - type: integer - type: 'null' title: File Size description: Size of the file in bytes. file_type: anyOf: - type: string - type: 'null' title: File Type description: File type (e.g. pdf, docx, etc.). project_id: anyOf: - type: string format: uuid - type: 'null' title: Project Id description: The ID of the project that the file belongs to. last_modified_at: anyOf: - type: string format: date-time - type: 'null' title: Last Modified At description: The last modified time of the file. file_id: anyOf: - type: string format: uuid - type: 'null' title: File Id description: The ID of the file. pipeline_id: type: string format: uuid title: Pipeline Id description: The ID of the pipeline that the file is associated with. resource_info: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Resource Info description: Resource information for the file. permission_info: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Permission Info description: Permission information for the file. custom_metadata: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Custom Metadata description: Custom metadata for the file. data_source_id: anyOf: - type: string format: uuid - type: 'null' title: Data Source Id description: The ID of the data source that the file belongs to. config_hash: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Config Hash description: Hashes for the configuration of the pipeline. indexed_page_count: anyOf: - type: integer - type: 'null' title: Indexed Page Count description: The number of pages that have been indexed for this file. status: anyOf: - type: string enum: - NOT_STARTED - IN_PROGRESS - SUCCESS - ERROR - CANCELLED - type: 'null' title: Status description: Status of the pipeline file. status_updated_at: anyOf: - type: string format: date-time - type: 'null' title: Status Updated At description: The last time the status was updated. created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: When the pipeline file was created. updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: When the pipeline file was last updated. type: object required: - id - pipeline_id title: PipelineFile description: A file associated with a pipeline. PipelineFileCreate: properties: file_id: type: string format: uuid title: File Id description: The ID of the file custom_metadata: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Custom Metadata description: Custom metadata for the file type: object required: - file_id title: PipelineFileCreate description: Schema for creating a file that is associated with a pipeline. PipelineFileListResponse: properties: items: items: $ref: '#/components/schemas/PipelineFileResponse' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: PipelineFileListResponse description: Beta API query response schema for pipeline files. PipelineFileResponse: properties: id: type: string format: uuid title: Id description: Unique identifier for the pipeline file. name: anyOf: - type: string - type: 'null' title: Name description: Name of the file. external_file_id: anyOf: - type: string - type: 'null' title: External File Id description: The ID of the file in the external system. file_size: anyOf: - type: integer - type: 'null' title: File Size description: Size of the file in bytes. file_type: anyOf: - type: string - type: 'null' title: File Type description: File type (e.g. pdf, docx, etc.). project_id: anyOf: - type: string format: uuid - type: 'null' title: Project Id description: The ID of the project that the file belongs to. last_modified_at: anyOf: - type: string format: date-time - type: 'null' title: Last Modified At description: The last modified time of the file. file_id: anyOf: - type: string format: uuid - type: 'null' title: File Id description: The ID of the file. pipeline_id: type: string format: uuid title: Pipeline Id description: The ID of the pipeline that the file is associated with. resource_info: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Resource Info description: Resource information for the file. permission_info: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Permission Info description: Permission information for the file. custom_metadata: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Custom Metadata description: Custom metadata for the file. data_source_id: anyOf: - type: string format: uuid - type: 'null' title: Data Source Id description: The ID of the data source that the file belongs to. status: anyOf: - type: string enum: - NOT_STARTED - IN_PROGRESS - SUCCESS - ERROR - CANCELLED - type: 'null' title: Status description: Status of the pipeline file. status_updated_at: anyOf: - type: string format: date-time - type: 'null' title: Status Updated At description: The last time the status was updated. created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: When the pipeline file was created. updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: When the pipeline file was last updated. type: object required: - id - pipeline_id title: PipelineFileResponse description: Beta API response schema for a pipeline file. PipelineFileUpdate: properties: custom_metadata: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Custom Metadata description: Custom metadata for the file type: object title: PipelineFileUpdate description: Request to update a pipeline file. PipelineMetadataConfig: properties: excluded_embed_metadata_keys: items: type: string type: array title: Excluded Embed Metadata Keys description: List of metadata keys to exclude from embeddings excluded_llm_metadata_keys: items: type: string type: array title: Excluded Llm Metadata Keys description: List of metadata keys to exclude from LLM during retrieval type: object title: PipelineMetadataConfig PipelineType: type: string enum: - PLAYGROUND - MANAGED title: PipelineType description: Enum for representing the type of a pipeline PipelineUpdate: properties: embedding_config: anyOf: - oneOf: - $ref: '#/components/schemas/AzureOpenAIEmbeddingConfig' - $ref: '#/components/schemas/CohereEmbeddingConfig' - $ref: '#/components/schemas/GeminiEmbeddingConfig' - $ref: '#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig' - $ref: '#/components/schemas/OpenAIEmbeddingConfig' - $ref: '#/components/schemas/VertexAIEmbeddingConfig' - $ref: '#/components/schemas/BedrockEmbeddingConfig' discriminator: propertyName: type mapping: AZURE_EMBEDDING: '#/components/schemas/AzureOpenAIEmbeddingConfig' BEDROCK_EMBEDDING: '#/components/schemas/BedrockEmbeddingConfig' COHERE_EMBEDDING: '#/components/schemas/CohereEmbeddingConfig' GEMINI_EMBEDDING: '#/components/schemas/GeminiEmbeddingConfig' HUGGINGFACE_API_EMBEDDING: '#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig' OPENAI_EMBEDDING: '#/components/schemas/OpenAIEmbeddingConfig' VERTEXAI_EMBEDDING: '#/components/schemas/VertexAIEmbeddingConfig' - type: 'null' title: Embedding Config transform_config: anyOf: - $ref: '#/components/schemas/AutoTransformConfig' - $ref: '#/components/schemas/AdvancedModeTransformConfig' - type: 'null' title: Transform Config description: Configuration for the transformation. sparse_model_config: anyOf: - $ref: '#/components/schemas/SparseModelConfig' - type: 'null' description: Configuration for the sparse model used in hybrid search. data_sink_id: anyOf: - type: string format: uuid - type: 'null' title: Data Sink Id description: Data sink ID. When provided instead of data_sink, the data sink will be looked up by ID. embedding_model_config_id: anyOf: - type: string format: uuid - type: 'null' title: Embedding Model Config Id description: Embedding model config ID. When provided instead of embedding_config, the embedding model config will be looked up by ID. data_sink: anyOf: - $ref: '#/components/schemas/DataSinkCreate' - type: 'null' description: Data sink. When provided instead of data_sink_id, the data sink will be created. preset_retrieval_parameters: anyOf: - $ref: '#/components/schemas/PresetRetrievalParams' - type: 'null' description: Preset retrieval parameters for the pipeline. llama_parse_parameters: anyOf: - $ref: '#/components/schemas/LlamaParseParameters' - type: 'null' description: Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline. deprecated: true status: anyOf: - type: string - type: 'null' title: Status description: Status of the pipeline deployment. metadata_config: anyOf: - $ref: '#/components/schemas/PipelineMetadataConfig' - type: 'null' description: Metadata configuration for the pipeline. name: anyOf: - type: string - type: 'null' title: Name managed_pipeline_id: anyOf: - type: string format: uuid - type: 'null' title: Managed Pipeline Id description: The ID of the ManagedPipeline this playground pipeline is linked to. type: object title: PipelineUpdate description: Schema for updating a pipeline. PlaygroundSession: properties: id: type: string format: uuid title: Id description: Unique identifier created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime pipeline_id: type: string format: uuid title: Pipeline Id user_id: type: string title: User Id llm_params_id: type: string format: uuid title: Llm Params Id llm_params: $ref: '#/components/schemas/LLMParameters' description: LLM parameters last used in this session. retrieval_params_id: type: string format: uuid title: Retrieval Params Id retrieval_params: $ref: '#/components/schemas/PresetRetrievalParams' description: Preset retrieval parameters last used in this session. chat_messages: items: $ref: '#/components/schemas/ChatMessage' type: array title: Chat Messages description: Chat message history for this session. type: object required: - id - pipeline_id - user_id - llm_params_id - retrieval_params_id title: PlaygroundSession description: A playground session for a user. Pooling: type: string enum: - cls - mean - last title: Pooling description: Enum of possible pooling choices with pooling behaviors. PresetRetrievalParams: properties: dense_similarity_top_k: anyOf: - type: integer maximum: 100.0 minimum: 1.0 - type: 'null' title: Dense Similarity Top K description: Number of nodes for dense retrieval. default: 30 dense_similarity_cutoff: anyOf: - type: number maximum: 1.0 minimum: 0.0 - type: 'null' title: Dense Similarity Cutoff description: Minimum similarity score wrt query for retrieval default: 0.0 sparse_similarity_top_k: anyOf: - type: integer maximum: 100.0 minimum: 1.0 - type: 'null' title: Sparse Similarity Top K description: Number of nodes for sparse retrieval. default: 30 enable_reranking: anyOf: - type: boolean - type: 'null' title: Enable Reranking description: Enable reranking for retrieval rerank_top_n: anyOf: - type: integer maximum: 100.0 minimum: 1.0 - type: 'null' title: Rerank Top N description: Number of reranked nodes for returning. default: 6 alpha: anyOf: - type: number maximum: 1.0 minimum: 0.0 - type: 'null' title: Alpha description: Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval. search_filters: anyOf: - $ref: '#/components/schemas/MetadataFilters' - type: 'null' description: Search filters for retrieval. search_filters_inference_schema: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Search Filters Inference Schema description: JSON Schema that will be used to infer search_filters. Omit or leave as null to skip inference. files_top_k: anyOf: - type: integer maximum: 5.0 minimum: 1.0 - type: 'null' title: Files Top K description: Number of files to retrieve (only for retrieval mode files_via_metadata and files_via_content). default: 1 retrieval_mode: $ref: '#/components/schemas/RetrievalMode' description: The retrieval mode for the query. default: chunks retrieve_image_nodes: type: boolean title: Retrieve Image Nodes description: Whether to retrieve image nodes. default: false deprecated: true retrieve_page_screenshot_nodes: type: boolean title: Retrieve Page Screenshot Nodes description: Whether to retrieve page screenshot nodes. default: false retrieve_page_figure_nodes: type: boolean title: Retrieve Page Figure Nodes description: Whether to retrieve page figure nodes. default: false class_name: type: string title: Class Name default: base_component type: object title: PresetRetrievalParams description: Schema for the search params for an retrieval execution that can be preset for a pipeline. PresignedUrl: properties: url: type: string minLength: 1 format: uri title: Url description: A presigned URL for IO operations against a private file expires_at: type: string format: date-time title: Expires At description: The time at which the presigned URL expires form_fields: anyOf: - additionalProperties: type: string type: object - type: 'null' title: Form Fields description: Form fields for a presigned POST request type: object required: - url - expires_at title: PresignedUrl description: Schema for a presigned URL. ProcessingResult: properties: result_id: type: string title: Result Id description: Unique identifier for this result item_id: type: string title: Item Id description: Source item that was processed job_type: $ref: '#/components/schemas/BatchJobType' description: Type of processing performed job_config: anyOf: - $ref: '#/components/schemas/BatchParseJobRecordCreate' - $ref: '#/components/schemas/ClassifyJob' title: Job Config description: Job configuration used for processing parameters_hash: type: string title: Parameters Hash description: Content hash of the job configuration for dedup output_s3_path: type: string title: Output S3 Path description: Location of the processing output output_metadata: anyOf: - $ref: '#/components/schemas/ProcessingResultMetadata' - type: 'null' description: Summary statistics about the output processed_at: type: string format: date-time title: Processed At description: When this processing occurred type: object required: - result_id - item_id - job_type - job_config - parameters_hash - output_s3_path - processed_at title: ProcessingResult description: A processing result with lineage information. ProcessingResultMetadata: properties: {} type: object title: ProcessingResultMetadata description: 'Metadata about processing output. Currently empty - will be populated with job-type-specific metadata fields in the future.' Project: properties: name: type: string maxLength: 3000 minLength: 1 title: Name id: type: string format: uuid title: Id description: Unique identifier created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime organization_id: type: string format: uuid title: Organization Id description: The Organization ID the project is under. is_default: type: boolean title: Is Default description: Whether this project is the default project for the user. default: false type: object required: - name - id - organization_id title: Project description: Schema for a project. ProjectQueryResponse: properties: items: items: $ref: '#/components/schemas/ProjectResponse' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: ProjectQueryResponse description: API query response schema for projects. ProjectResponse: properties: id: type: string title: Id description: The project's unique identifier. created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime name: type: string title: Name description: The project's display name. organization_id: type: string title: Organization Id description: The organization the project belongs to. is_default: type: boolean title: Is Default description: Whether this project is the default project for its organization. default: false type: object required: - id - name - organization_id title: ProjectResponse description: API response schema for a project. PublicModelName: type: string enum: - openai-gpt-4o - openai-gpt-4o-mini - openai-gpt-4-1 - openai-gpt-4-1-mini - openai-gpt-4-1-nano - openai-gpt-5 - openai-gpt-5-mini - openai-gpt-5-nano - openai-text-embedding-3-large - openai-text-embedding-3-small - openai-whisper-1 - anthropic-sonnet-3.5 - anthropic-sonnet-3.5-v2 - anthropic-sonnet-3.7 - anthropic-sonnet-4.0 - anthropic-sonnet-4.5 - anthropic-haiku-3.5 - anthropic-haiku-4.5 - gemini-2.5-flash - gemini-3.0-pro - gemini-3.1-pro - gemini-2.5-pro - gemini-2.0-flash - gemini-2.0-flash-lite - gemini-2.5-flash-lite title: PublicModelName description: Public model names. ReRankConfig: properties: top_n: type: integer minimum: 1.0 title: Top N description: The number of nodes to retrieve after reranking over retrieved nodes from all retrieval tools. default: 6 type: $ref: '#/components/schemas/ReRankerType' description: The type of reranker to use. default: system_default type: object title: ReRankConfig ReRankerType: type: string enum: - system_default - llm - cohere - bedrock - score - disabled title: ReRankerType description: Enum for the reranker type. RelatedNodeInfo: properties: node_id: type: string title: Node Id node_type: anyOf: - $ref: '#/components/schemas/ObjectType' - type: string - type: 'null' title: Node Type metadata: additionalProperties: true type: object title: Metadata hash: anyOf: - type: string - type: 'null' title: Hash class_name: type: string title: Class Name default: RelatedNodeInfo type: object required: - node_id title: RelatedNodeInfo RerankConfig: properties: enabled: type: boolean title: Enabled description: Set to false to disable reranking. default: true top_n: anyOf: - type: integer - type: 'null' title: Top N description: Number of results to return after reranking. examples: - 5 type: object title: RerankConfig description: Reranking configuration to apply after hybrid search. ResultTypeMetadata: properties: size_bytes: type: integer title: Size Bytes description: Size of the result file in bytes exists: type: boolean title: Exists description: Whether the result file exists in S3 default: true presigned_url: anyOf: - type: string - type: 'null' title: Presigned Url description: Presigned URL to download the result file type: object required: - size_bytes title: ResultTypeMetadata description: Metadata about a specific result type stored in S3. RetrievalMode: type: string enum: - chunks - files_via_metadata - files_via_content - auto_routed title: RetrievalMode RetrievalParams: properties: dense_similarity_top_k: anyOf: - type: integer maximum: 100.0 minimum: 1.0 - type: 'null' title: Dense Similarity Top K description: Number of nodes for dense retrieval. default: 30 dense_similarity_cutoff: anyOf: - type: number maximum: 1.0 minimum: 0.0 - type: 'null' title: Dense Similarity Cutoff description: Minimum similarity score wrt query for retrieval default: 0.0 sparse_similarity_top_k: anyOf: - type: integer maximum: 100.0 minimum: 1.0 - type: 'null' title: Sparse Similarity Top K description: Number of nodes for sparse retrieval. default: 30 enable_reranking: anyOf: - type: boolean - type: 'null' title: Enable Reranking description: Enable reranking for retrieval rerank_top_n: anyOf: - type: integer maximum: 100.0 minimum: 1.0 - type: 'null' title: Rerank Top N description: Number of reranked nodes for returning. default: 6 alpha: anyOf: - type: number maximum: 1.0 minimum: 0.0 - type: 'null' title: Alpha description: Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval. search_filters: anyOf: - $ref: '#/components/schemas/MetadataFilters' - type: 'null' description: Search filters for retrieval. search_filters_inference_schema: anyOf: - additionalProperties: anyOf: - additionalProperties: true type: object - items: {} type: array - type: string - type: integer - type: number - type: boolean - type: 'null' type: object - type: 'null' title: Search Filters Inference Schema description: JSON Schema that will be used to infer search_filters. Omit or leave as null to skip inference. files_top_k: anyOf: - type: integer maximum: 5.0 minimum: 1.0 - type: 'null' title: Files Top K description: Number of files to retrieve (only for retrieval mode files_via_metadata and files_via_content). default: 1 retrieval_mode: $ref: '#/components/schemas/RetrievalMode' description: The retrieval mode for the query. default: chunks retrieve_image_nodes: type: boolean title: Retrieve Image Nodes description: Whether to retrieve image nodes. default: false deprecated: true retrieve_page_screenshot_nodes: type: boolean title: Retrieve Page Screenshot Nodes description: Whether to retrieve page screenshot nodes. default: false retrieve_page_figure_nodes: type: boolean title: Retrieve Page Figure Nodes description: Whether to retrieve page figure nodes. default: false query: type: string minLength: 1 title: Query description: The query to retrieve against. class_name: type: string title: Class Name default: base_component type: object required: - query title: RetrievalParams description: Schema for the search params for an retrieval execution. RetrievalResult: properties: content: type: string title: Content description: Text content of the retrieved chunk. score: anyOf: - type: number - type: 'null' title: Score description: Hybrid search relevance score. rerank_score: anyOf: - type: number - type: 'null' title: Rerank Score description: Relevance score from the reranker, if reranking was applied. metadata: anyOf: - $ref: '#/components/schemas/MetadataDict' - type: 'null' description: User-defined metadata associated with the chunk. static_fields: $ref: '#/components/schemas/StaticFields' type: object required: - content title: RetrievalResult description: A single retrieval result. RetrieveParams: properties: index_id: type: string title: Index Id description: ID of the index to retrieve against. examples: - idx-abc123 query: type: string title: Query description: Natural-language query to retrieve relevant chunks. examples: - What are the key findings? top_k: anyOf: - type: integer - type: 'null' title: Top K description: Maximum number of results to return. examples: - 10 num_candidates: anyOf: - type: integer - type: 'null' title: Num Candidates description: Number of candidates for approximate nearest neighbor search. vector_pipeline_weight: anyOf: - type: number - type: 'null' title: Vector Pipeline Weight description: Weight of the vector search pipeline (0-1). full_text_pipeline_weight: anyOf: - type: number - type: 'null' title: Full Text Pipeline Weight description: Weight of the full-text search pipeline (0-1). score_threshold: anyOf: - type: number - type: 'null' title: Score Threshold description: Minimum score threshold for returned results. static_filters: anyOf: - $ref: '#/components/schemas/MongoStaticFilters' - type: 'null' description: Filters on built-in document fields (page range, chunk index, etc.). custom_filters: anyOf: - additionalProperties: anyOf: - $ref: '#/components/schemas/FilterType_Union_str__int__bool__float__' - items: $ref: '#/components/schemas/FilterType_Union_int__float__' type: array - type: 'null' type: object - type: 'null' title: Custom Filters description: Filters on user-defined metadata fields. rerank: $ref: '#/components/schemas/RerankConfig' description: Reranking configuration applied after hybrid search. Enabled by default. type: object required: - index_id - query title: RetrieveParams description: Hybrid retrieval request combining vector and full-text search. RetrieveResult: properties: results: items: $ref: '#/components/schemas/RetrievalResult' type: array title: Results description: Ordered list of retrieved chunks. type: object required: - results title: RetrieveResult description: Response containing retrieval results. RetrieveResults: properties: pipeline_id: type: string format: uuid title: Pipeline Id description: The ID of the pipeline that the query was retrieved against. retrieval_nodes: items: $ref: '#/components/schemas/TextNodeWithScore' type: array title: Retrieval Nodes description: The nodes retrieved by the pipeline for the given query. image_nodes: items: $ref: '#/components/schemas/PageScreenshotNodeWithScore' type: array title: Image Nodes description: The image nodes retrieved by the pipeline for the given query. Deprecated - will soon be replaced with 'page_screenshot_nodes'. deprecated: true page_figure_nodes: items: $ref: '#/components/schemas/PageFigureNodeWithScore' type: array title: Page Figure Nodes description: The page figure nodes retrieved by the pipeline for the given query. retrieval_latency: additionalProperties: type: number type: object title: Retrieval Latency description: The end-to-end latency for retrieval and reranking. metadata: additionalProperties: type: string type: object title: Metadata description: Metadata associated with the retrieval execution inferred_search_filters: anyOf: - $ref: '#/components/schemas/MetadataFilters' - type: 'null' description: The inferred search filters for the query. class_name: type: string title: Class Name default: RetrieveResults type: object required: - pipeline_id - retrieval_nodes title: RetrieveResults description: Schema for the result of an retrieval execution. Retriever: properties: name: type: string maxLength: 3000 minLength: 1 title: Name description: A name for the retriever tool. Will default to the pipeline name if not provided. pipelines: items: $ref: '#/components/schemas/RetrieverPipeline' type: array title: Pipelines description: The pipelines this retriever uses. id: type: string format: uuid title: Id description: Unique identifier created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime project_id: type: string format: uuid title: Project Id description: The ID of the project this retriever resides in. type: object required: - name - id - project_id title: Retriever description: An entity that retrieves context nodes from several sub RetrieverTools. RetrieverCreate: properties: name: type: string maxLength: 3000 minLength: 1 title: Name description: A name for the retriever tool. Will default to the pipeline name if not provided. pipelines: items: $ref: '#/components/schemas/RetrieverPipeline' type: array title: Pipelines description: The pipelines this retriever uses. type: object required: - name title: RetrieverCreate RetrieverPipeline: properties: name: anyOf: - type: string maxLength: 3000 minLength: 1 - type: 'null' title: Name description: A name for the retriever tool. Will default to the pipeline name if not provided. description: anyOf: - type: string maxLength: 15000 - type: 'null' title: Description description: A description of the retriever tool. pipeline_id: type: string format: uuid title: Pipeline Id description: The ID of the pipeline this tool uses. preset_retrieval_parameters: $ref: '#/components/schemas/PresetRetrievalParams' description: Parameters for retrieval configuration. type: object required: - name - description - pipeline_id title: RetrieverPipeline RetrieverUpdate: properties: name: anyOf: - type: string - type: 'null' title: Name description: A name for the retriever. pipelines: anyOf: - items: $ref: '#/components/schemas/RetrieverPipeline' type: array - type: 'null' title: Pipelines description: The pipelines this retriever uses. type: object required: - pipelines title: RetrieverUpdate SearchRequest: properties: page_size: anyOf: - type: integer - type: 'null' title: Page Size description: The maximum number of items to return. The service may return fewer than this value. If unspecified, a default page size will be used. The maximum value is typically 1000; values above this will be coerced to the maximum. page_token: anyOf: - type: string - type: 'null' title: Page Token description: A page token, received from a previous list call. Provide this to retrieve the subsequent page. filter: anyOf: - additionalProperties: $ref: '#/components/schemas/FilterOperation' type: object - type: 'null' title: Filter description: A filter object or expression that filters resources listed in the response. order_by: anyOf: - type: string - type: 'null' title: Order By description: A comma-separated list of fields to order by, sorted in ascending order. Use 'field_name desc' to specify descending order. deployment_name: type: string title: Deployment Name description: The agent deployment's name to search within collection: type: string title: Collection description: The logical agent data collection to search within default: default include_total: type: boolean title: Include Total description: Whether to include the total number of items in the response default: false offset: anyOf: - type: integer maximum: 1000.0 minimum: 0.0 - type: 'null' title: Offset description: The offset to start from. If not provided, the first page is returned default: 0 type: object required: - deployment_name title: SearchRequest description: API request body for searching agent data SemanticChunkingConfig: properties: mode: type: string const: semantic title: Mode default: semantic buffer_size: type: integer title: Buffer Size default: 1 breakpoint_percentile_threshold: type: integer title: Breakpoint Percentile Threshold default: 95 type: object title: SemanticChunkingConfig SentenceChunkingConfig: properties: chunk_size: type: integer exclusiveMinimum: 0.0 title: Chunk Size default: 1024 chunk_overlap: type: integer title: Chunk Overlap default: 200 gte: 0 mode: type: string const: sentence title: Mode default: sentence separator: type: string title: Separator default: ' ' paragraph_separator: type: string title: Paragraph Separator default: ' ' type: object title: SentenceChunkingConfig SessionCreate: properties: index_ids: anyOf: - items: type: string type: array maxItems: 10 - type: 'null' title: Index Ids description: Indexes this session will retrieve from. Once set and the first message has been sent, the source set is locked for the session's lifetime. Leave null to create an unbound session. examples: - - idx-abc123 - idx-def456 type: object title: SessionCreate description: Request body for creating a chat session. SessionDetail: properties: session_id: type: string title: Session Id description: Unique session identifier. examples: - ses-abc123 generated_title: anyOf: - type: string - type: 'null' title: Generated Title description: Auto-generated title derived from the first user message. examples: - What were the main findings in Q3?... last_updated_at: type: string title: Last Updated At description: ISO-format timestamp showing when the session was last updated. examples: - '2026-04-22T12:34:41.342245' job_metadata: anyOf: - $ref: '#/components/schemas/JobMetadata' - type: 'null' description: Token usage and status from the most recent run. Null if the session has not been run yet. index_ids: anyOf: - items: type: string type: array - type: 'null' title: Index Ids description: Indexes this session is bound to. Null on unbound sessions. examples: - - idx-abc123 - idx-def456 events: items: oneOf: - $ref: '#/components/schemas/ThinkingDeltaEvent' - $ref: '#/components/schemas/TextDeltaEvent' - $ref: '#/components/schemas/ThinkingEvent' - $ref: '#/components/schemas/TextEvent' - $ref: '#/components/schemas/ToolCallEvent' - $ref: '#/components/schemas/ToolResultEvent' - $ref: '#/components/schemas/StopEvent' - $ref: '#/components/schemas/UserInputEvent' discriminator: propertyName: type mapping: stop: '#/components/schemas/StopEvent' text: '#/components/schemas/TextEvent' text_delta: '#/components/schemas/TextDeltaEvent' thinking: '#/components/schemas/ThinkingEvent' thinking_delta: '#/components/schemas/ThinkingDeltaEvent' tool_call: '#/components/schemas/ToolCallEvent' tool_result: '#/components/schemas/ToolResultEvent' user_input: '#/components/schemas/UserInputEvent' type: array title: Events description: Ordered list of events that make up the conversation history. type: object required: - session_id - last_updated_at - events title: SessionDetail description: Full chat session including its complete event history. SessionList: properties: items: items: $ref: '#/components/schemas/ChatSessionSummary' type: array title: Items description: Chat sessions for the current page. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: Opaque token to retrieve the next page. Omitted when there are no further pages. type: object required: - items title: SessionList description: Paginated list of chat sessions. SparseModelConfig: properties: model_type: $ref: '#/components/schemas/SparseModelType' description: The sparse model type to use. 'bm25' uses Qdrant's FastEmbed BM25 model (default for new pipelines), 'splade' uses HuggingFace Splade model, 'auto' selects based on deployment mode (BYOC uses term frequency, Cloud uses Splade). default: bm25 class_name: type: string title: Class Name default: SparseModelConfig type: object title: SparseModelConfig description: 'Configuration for sparse embedding models used in hybrid search. This allows users to choose between Splade and BM25 models for sparse retrieval in managed data sinks.' SparseModelType: type: string enum: - splade - bm25 - auto title: SparseModelType description: 'Enum for sparse model types supported in LlamaCloud. SPLADE: Uses HuggingFace Splade model for sparse embeddings BM25: Uses Qdrant''s FastEmbed BM25 model for sparse embeddings AUTO: Automatically selects based on deployment mode (BYOC uses term frequency, Cloud uses Splade)' SplitCategory: properties: name: type: string maxLength: 200 minLength: 1 title: Name description: Name of the category. description: anyOf: - type: string maxLength: 2000 minLength: 1 - type: 'null' title: Description description: Optional description of what content belongs in this category. type: object required: - name title: SplitCategory description: Category definition for document splitting. SplitConfiguration: properties: categories: items: $ref: '#/components/schemas/SplitCategory' type: array maxItems: 50 minItems: 1 title: Categories description: Categories to split documents into. splitting_strategy: $ref: '#/components/schemas/SplitStrategy' description: Strategy for splitting documents. type: object required: - categories title: SplitConfiguration description: Split configuration with categories and splitting strategy. SplitCreateRequest: properties: document_input: $ref: '#/components/schemas/SplitDocumentInput' description: Document to be split. configuration: anyOf: - $ref: '#/components/schemas/SplitConfiguration' - type: 'null' description: Inline split configuration. configuration_id: anyOf: - type: string - type: 'null' title: Configuration Id description: Saved split configuration ID. type: object required: - document_input title: SplitCreateRequest description: Beta create request — accepts legacy document_input and flat formats. SplitDocumentInput: properties: type: type: string title: Type description: 'Type of document input. Valid values are: file_id' value: type: string title: Value description: Document identifier. type: object required: - type - value title: SplitDocumentInput description: Document input specification for beta API. SplitJobQueryResponse: properties: items: items: $ref: '#/components/schemas/SplitJobResponse' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: SplitJobQueryResponse description: Beta paginated list of split jobs. SplitJobResponse: properties: id: type: string title: Id description: Unique identifier for the split job. created_at: anyOf: - type: string format: date-time - type: 'null' title: Created At description: Creation datetime updated_at: anyOf: - type: string format: date-time - type: 'null' title: Updated At description: Update datetime project_id: type: string title: Project Id description: Project ID this job belongs to. user_id: type: string title: User Id description: User ID who created this job. configuration_id: anyOf: - type: string - type: 'null' title: Configuration Id description: Split configuration ID used for this job. document_input: $ref: '#/components/schemas/SplitDocumentInput' description: Document that was split. categories: items: $ref: '#/components/schemas/SplitCategory' type: array title: Categories description: Categories used for splitting. status: type: string title: Status description: 'Current status of the job. Valid values are: pending, processing, completed, failed, cancelled.' result: anyOf: - $ref: '#/components/schemas/SplitResultResponse' - type: 'null' description: Split result (available when status is COMPLETED). error_message: anyOf: - type: string - type: 'null' title: Error Message description: Error message if the job failed. type: object required: - id - project_id - user_id - document_input - categories - status title: SplitJobResponse description: Beta response — uses nested document_input object. SplitResultResponse: properties: segments: items: $ref: '#/components/schemas/SplitSegmentResponse' type: array title: Segments description: List of document segments. type: object required: - segments title: SplitResultResponse description: Result of a completed split job. SplitSegmentResponse: properties: category: type: string title: Category description: Category name this split belongs to. pages: items: type: integer type: array title: Pages description: 1-indexed page numbers in this split. confidence_category: type: string title: Confidence Category description: 'Categorical confidence level. Valid values are: high, medium, low.' type: object required: - category - pages - confidence_category title: SplitSegmentResponse description: A segment of the split document. SplitStrategy: properties: allow_uncategorized: type: string enum: - include - forbid - omit title: Allow Uncategorized description: 'Controls handling of pages that don''t match any category. ''include'': pages can be grouped as ''uncategorized'' and included in results. ''forbid'': all pages must be assigned to a defined category. ''omit'': pages can be classified as ''uncategorized'' but are excluded from results.' default: include type: object title: SplitStrategy description: Configuration for how to split the document. SplitV1Parameters: properties: categories: items: $ref: '#/components/schemas/SplitCategory' type: array maxItems: 50 minItems: 1 title: Categories description: Categories to split documents into. splitting_strategy: $ref: '#/components/schemas/SplitStrategy' description: Strategy for splitting documents. product_type: type: string const: split_v1 title: Product Type description: Product type. type: object required: - categories - product_type title: SplitV1Parameters description: Typed parameters for a *split v1* product configuration. SpreadsheetJob: properties: id: type: string title: Id description: The ID of the job user_id: type: string title: User Id description: The ID of the user project_id: type: string format: uuid title: Project Id description: The ID of the project configuration: $ref: '#/components/schemas/SpreadsheetParsingConfig' description: Configuration applied to the parsing job (inline or resolved from a saved preset). config: anyOf: - $ref: '#/components/schemas/SpreadsheetParsingConfig' - type: 'null' description: 'Deprecated: use `configuration` instead.' deprecated: true status: type: string enum: - PENDING - SUCCESS - ERROR - PARTIAL_SUCCESS - CANCELLED title: Status description: The status of the parsing job parameters: $ref: '#/components/schemas/SpreadsheetJobParameters' description: Job-time parameters such as webhook configurations. configuration_id: anyOf: - type: string - type: 'null' title: Configuration Id description: The saved product configuration ID used at create time, if any. metadata_state_transitions: anyOf: - additionalProperties: true type: object - type: 'null' title: Metadata State Transitions description: Per-status entry timestamps. Returned only when requested via `?expand=metadata_state_transitions`. created_at: type: string title: Created At description: When the job was created updated_at: type: string title: Updated At description: When the job was last updated success: anyOf: - type: boolean - type: 'null' title: Success description: Whether the job completed successfully regions: items: $ref: '#/components/schemas/ExtractedRegionSummary' type: array title: Regions description: All extracted regions (populated when job is complete) worksheet_metadata: items: $ref: '#/components/schemas/WorksheetMetadata' type: array title: Worksheet Metadata description: Metadata for each processed worksheet (populated when job is complete) errors: items: type: string type: array title: Errors description: Any errors encountered file_id: anyOf: - type: string format: uuid - type: 'null' title: File Id description: The ID of the input file file: anyOf: - $ref: '#/components/schemas/File' - type: 'null' description: '[DEPRECATED] The file that was parsed. Use file_id instead.' deprecated: true type: object required: - id - user_id - project_id - configuration - status - created_at - updated_at - file_id title: SpreadsheetJob description: A spreadsheet parsing job. SpreadsheetJobCreate: properties: configuration_id: anyOf: - type: string - type: 'null' title: Configuration Id description: Saved configuration ID examples: - cfg-11111111-2222-3333-4444-555555555555 configuration: anyOf: - $ref: '#/components/schemas/SpreadsheetParsingConfig' - type: 'null' description: Inline configuration webhook_configurations: anyOf: - items: $ref: '#/components/schemas/WebhookConfiguration' type: array - type: 'null' title: Webhook Configurations description: Outbound webhook endpoints to notify on job status changes config: anyOf: - $ref: '#/components/schemas/SpreadsheetParsingConfig' - type: 'null' description: 'Deprecated: use `configuration` instead.' deprecated: true file_id: type: string format: uuid title: File Id description: The ID of the file to parse type: object required: - file_id title: SpreadsheetJobCreate description: 'Request to create a spreadsheet parsing job. Provide at most one of `configuration` (inline) or `configuration_id` (saved preset). If neither is provided, a default inline configuration is used.' SpreadsheetJobParameters: properties: webhook_configurations: anyOf: - items: $ref: '#/components/schemas/WebhookConfiguration' type: array - type: 'null' title: Webhook Configurations description: Webhook configurations for job status notifications. type: object title: SpreadsheetJobParameters description: Job-time parameters returned on a spreadsheet job response. SpreadsheetParsingConfig: properties: sheet_names: anyOf: - items: type: string type: array - type: 'null' title: Sheet Names description: The names of the sheets to extract regions from. If empty, all sheets will be processed. include_hidden_cells: type: boolean title: Include Hidden Cells description: Whether to include hidden cells when extracting regions from the spreadsheet. default: true extraction_range: anyOf: - type: string - type: 'null' title: Extraction Range description: A1 notation of the range to extract a single region from. If None, the entire sheet is used. generate_additional_metadata: type: boolean title: Generate Additional Metadata description: Whether to generate additional metadata (title, description) for each extracted region. default: true use_experimental_processing: type: boolean title: Use Experimental Processing description: Enables experimental processing. Accuracy may be impacted. default: false flatten_hierarchical_tables: type: boolean title: Flatten Hierarchical Tables description: Return a flattened dataframe when a detected table is recognized as hierarchical. default: false table_merge_sensitivity: type: string enum: - strong - weak title: Table Merge Sensitivity description: Influences how likely similar-looking regions are merged into a single table. Useful for spreadsheets that either have sparse tables (strong merging) or many distinct tables close together (weak merging). default: strong specialization: anyOf: - type: string - type: 'null' title: Specialization description: 'Optional specialization mode for domain-specific extraction. Supported values: ''financial-standard'', ''financial-enhanced'', ''financial-precise''. Default None uses the general-purpose pipeline.' type: object title: SpreadsheetParsingConfig description: Configuration for spreadsheet parsing and region extraction SpreadsheetResultType: type: string enum: - table - extra - cell_metadata title: SpreadsheetResultType SpreadsheetV1Parameters: properties: sheet_names: anyOf: - items: type: string type: array - type: 'null' title: Sheet Names description: The names of the sheets to extract regions from. If empty, all sheets will be processed. include_hidden_cells: type: boolean title: Include Hidden Cells description: Whether to include hidden cells when extracting regions from the spreadsheet. default: true extraction_range: anyOf: - type: string - type: 'null' title: Extraction Range description: A1 notation of the range to extract a single region from. If None, the entire sheet is used. generate_additional_metadata: type: boolean title: Generate Additional Metadata description: Whether to generate additional metadata (title, description) for each extracted region. default: true use_experimental_processing: type: boolean title: Use Experimental Processing description: Enables experimental processing. Accuracy may be impacted. default: false flatten_hierarchical_tables: type: boolean title: Flatten Hierarchical Tables description: Return a flattened dataframe when a detected table is recognized as hierarchical. default: false table_merge_sensitivity: type: string enum: - strong - weak title: Table Merge Sensitivity description: Influences how likely similar-looking regions are merged into a single table. Useful for spreadsheets that either have sparse tables (strong merging) or many distinct tables close together (weak merging). default: strong specialization: anyOf: - type: string - type: 'null' title: Specialization description: 'Optional specialization mode for domain-specific extraction. Supported values: ''financial-standard'', ''financial-enhanced'', ''financial-precise''. Default None uses the general-purpose pipeline.' product_type: type: string const: spreadsheet_v1 title: Product Type description: Product type. type: object required: - product_type title: SpreadsheetV1Parameters description: Typed parameters for a *spreadsheet v1* product configuration. StaticFields: properties: parsed_directory_file_id: anyOf: - type: string - type: 'null' title: Parsed Directory File Id description: ID of the parsed file. page_range_start: anyOf: - type: integer - type: 'null' title: Page Range Start description: First page number covered by this chunk. page_range_end: anyOf: - type: integer - type: 'null' title: Page Range End description: Last page number covered by this chunk. chunk_start_char: anyOf: - type: integer - type: 'null' title: Chunk Start Char description: Start character offset of the chunk. chunk_end_char: anyOf: - type: integer - type: 'null' title: Chunk End Char description: End character offset of the chunk. chunk_index: anyOf: - type: integer - type: 'null' title: Chunk Index description: Index of the chunk within the file. chunk_token_count: anyOf: - type: integer - type: 'null' title: Chunk Token Count description: Token count of the chunk. attachments: items: $ref: '#/components/schemas/AttachmentRef' type: array title: Attachments description: Attachments associated with the chunk type: object title: StaticFields description: Built-in fields stored for every exported chunk. StatusEnum: type: string enum: - PENDING - SUCCESS - ERROR - PARTIAL_SUCCESS - CANCELLED title: StatusEnum description: Enum for representing the status of a job StopEvent: properties: type: type: string const: stop title: Type default: stop usage: $ref: '#/components/schemas/Usage' is_error: type: boolean title: Is Error error: anyOf: - type: string - type: 'null' title: Error type: object required: - usage - is_error - error title: StopEvent StructuredResult: properties: pages: items: anyOf: - $ref: '#/components/schemas/StructuredResultPage' - $ref: '#/components/schemas/FailedStructuredPage' type: array title: Pages description: List of structured pages or failed page entries type: object required: - pages title: StructuredResult StructuredResultPage: properties: page_number: type: integer title: Page Number description: Page number of the document items: items: oneOf: - $ref: '#/components/schemas/TextItem' - $ref: '#/components/schemas/HeadingItem' - $ref: '#/components/schemas/ListItem' - $ref: '#/components/schemas/CodeItem' - $ref: '#/components/schemas/TableItem' - $ref: '#/components/schemas/ImageItem' - $ref: '#/components/schemas/LinkItem' - $ref: '#/components/schemas/HeaderItem' - $ref: '#/components/schemas/FooterItem' discriminator: propertyName: type mapping: code: '#/components/schemas/CodeItem' footer: '#/components/schemas/FooterItem' header: '#/components/schemas/HeaderItem' heading: '#/components/schemas/HeadingItem' image: '#/components/schemas/ImageItem' link: '#/components/schemas/LinkItem' list: '#/components/schemas/ListItem' table: '#/components/schemas/TableItem' text: '#/components/schemas/TextItem' type: array title: Items description: List of structured items on the page page_width: type: number title: Page Width description: Width of the page in points page_height: type: number title: Page Height description: Height of the page in points success: type: boolean const: true title: Success description: Success indicator type: object required: - page_number - items - page_width - page_height - success title: StructuredResultPage SupportedLLMModelNames: type: string enum: - GPT_4O - GPT_4O_MINI - GPT_4_1 - GPT_4_1_NANO - GPT_4_1_MINI - AZURE_OPENAI_GPT_4O - AZURE_OPENAI_GPT_4O_MINI - AZURE_OPENAI_GPT_4_1 - AZURE_OPENAI_GPT_4_1_MINI - AZURE_OPENAI_GPT_4_1_NANO - CLAUDE_4_5_SONNET - BEDROCK_CLAUDE_3_5_SONNET_V1 - BEDROCK_CLAUDE_3_5_SONNET_V2 title: SupportedLLMModelNames TableItem: properties: type: type: string const: table title: Type description: Table item type default: table md: type: string title: Md description: Markdown representation preserving formatting bbox: anyOf: - items: $ref: '#/components/schemas/BBox' type: array - type: 'null' title: Bbox description: List of bounding boxes rows: items: items: anyOf: - type: string - type: integer - type: number - type: 'null' type: array type: array title: Rows description: Table data as array of arrays (string, number, or null) html: type: string title: Html description: HTML representation of the table csv: type: string title: Csv description: CSV representation of the table merged_from_pages: anyOf: - items: type: integer type: array - type: 'null' title: Merged From Pages description: List of page numbers with tables that were merged into this table (e.g., [1, 2, 3, 4]) merged_into_page: anyOf: - type: integer - type: 'null' title: Merged Into Page description: Populated when merged into another table. Page number where the full merged table begins (used on empty tables). parse_concerns: anyOf: - items: $ref: '#/components/schemas/ParseConcernItem' type: array - type: 'null' title: Parse Concerns description: Quality concerns detected during table extraction, indicating the table may have issues type: object required: - md - rows - html - csv title: TableItem TextDeltaEvent: properties: type: type: string const: text_delta title: Type default: text_delta content: type: string title: Content type: object required: - content title: TextDeltaEvent TextEvent: properties: type: type: string const: text title: Type default: text content: type: string title: Content type: object required: - content title: TextEvent TextItem: properties: type: type: string const: text title: Type description: Text item type default: text md: type: string title: Md description: Markdown representation preserving formatting bbox: anyOf: - items: $ref: '#/components/schemas/BBox' type: array - type: 'null' title: Bbox description: List of bounding boxes value: type: string title: Value description: Text content type: object required: - md - value title: TextItem TextNode: properties: id_: type: string title: Id description: Unique ID of the node. embedding: anyOf: - items: type: number type: array - type: 'null' title: Embedding description: Embedding of the node. extra_info: additionalProperties: true type: object title: Extra Info description: A flat dictionary of metadata fields excluded_embed_metadata_keys: items: type: string type: array title: Excluded Embed Metadata Keys description: Metadata keys that are excluded from text for the embed model. excluded_llm_metadata_keys: items: type: string type: array title: Excluded Llm Metadata Keys description: Metadata keys that are excluded from text for the LLM. relationships: additionalProperties: anyOf: - $ref: '#/components/schemas/RelatedNodeInfo' - items: $ref: '#/components/schemas/RelatedNodeInfo' type: array propertyNames: $ref: '#/components/schemas/NodeRelationship' type: object title: Relationships description: A mapping of relationships to other node information. metadata_template: type: string title: Metadata Template description: Template for how metadata is formatted, with {key} and {value} placeholders. default: '{key}: {value}' metadata_seperator: type: string title: Metadata Seperator description: Separator between metadata fields when converting to string. default: ' ' text: type: string title: Text description: Text content of the node. default: '' mimetype: type: string title: Mimetype description: MIME type of the node content. default: text/plain start_char_idx: anyOf: - type: integer - type: 'null' title: Start Char Idx description: Start char index of the node. end_char_idx: anyOf: - type: integer - type: 'null' title: End Char Idx description: End char index of the node. text_template: type: string title: Text Template description: Template for how text is formatted, with {content} and {metadata_str} placeholders. default: '{metadata_str} {content}' class_name: type: string title: Class Name default: TextNode type: object title: TextNode description: Provided for backward compatibility. TextNodeWithScore: properties: node: $ref: '#/components/schemas/TextNode' score: anyOf: - type: number - type: 'null' title: Score class_name: type: string title: Class Name default: TextNodeWithScore type: object required: - node title: TextNodeWithScore description: 'Same as NodeWithScore but type for node is a TextNode instead of BaseNode. FastAPI doesn''t accept abstract classes like BaseNode.' TextResult: properties: pages: items: $ref: '#/components/schemas/TextResultPage' type: array title: Pages description: List of text pages type: object required: - pages title: TextResult TextResultPage: properties: page_number: type: integer title: Page Number description: Page number of the document text: type: string title: Text description: Plain text content of the page type: object required: - page_number - text title: TextResultPage ThinkingDeltaEvent: properties: type: type: string const: thinking_delta title: Type default: thinking_delta content: type: string title: Content type: object required: - content title: ThinkingDeltaEvent ThinkingEvent: properties: type: type: string const: thinking title: Type default: thinking content: type: string title: Content type: object required: - content title: ThinkingEvent TokenChunkingConfig: properties: chunk_size: type: integer exclusiveMinimum: 0.0 title: Chunk Size default: 1024 chunk_overlap: type: integer title: Chunk Overlap default: 200 gte: 0 mode: type: string const: token title: Mode default: token separator: type: string title: Separator default: ' ' type: object title: TokenChunkingConfig ToolCallEvent: properties: type: type: string const: tool_call title: Type default: tool_call arguments: additionalProperties: true type: object title: Arguments name: type: string title: Name call_id: type: string title: Call Id type: object required: - arguments - name - call_id title: ToolCallEvent ToolResultEvent: properties: type: type: string const: tool_result title: Type default: tool_result name: type: string title: Name call_id: type: string title: Call Id result: title: Result image_attachment: anyOf: - $ref: '#/components/schemas/ImageAttachmentRef' - type: 'null' type: object required: - name - call_id - result title: ToolResultEvent UntypedParameters: properties: product_type: type: string const: unknown title: Product Type description: Product type. additionalProperties: true type: object required: - product_type title: UntypedParameters description: 'Catch-all for configurations without a dedicated typed schema. Accepts arbitrary JSON fields alongside ``product_type``.' Usage: properties: total_input_tokens: anyOf: - type: integer - type: 'null' title: Total Input Tokens total_output_tokens: anyOf: - type: integer - type: 'null' title: Total Output Tokens turns: type: integer title: Turns default: 1 duration_ms: type: number title: Duration Ms default: 0.0 type: object title: Usage UsageMetric: properties: id: type: string format: uuid title: Id description: The system-generated UUID for the metric user_id: type: string title: User Id description: The ID of the user event_type: type: string enum: - pages_indexed - pages_embedded - pages_parsed - set_total_pages_indexed - set_total_indexes - layout_extracted - layout_aware_parsing - layout_aware_chart_extraction - chart_parsing_agentic - chart_parsing_plus - chart_parsing_efficient - image_classified - precise_bbox_extraction - audio_seconds_parsed - extraction_num_pages - extraction_num_pages_parsed - pages_split - pages_classified - directory_file_count_snapshot - directory_count_snapshot title: Event Type description: The event type that is emitted project_id: type: string title: Project Id description: The project ID organization_id: type: string title: Organization Id description: The organization ID value: type: integer title: Value description: The unit measurement associated with the event type properties: additionalProperties: true type: object title: Properties description: Properties associated with the metric day: type: string title: Day description: The day the metric was emitted [UTC], in the format 'YYYY-MM-DD' event_aggregation_key: type: string title: Event Aggregation Key description: The source job identifier, i.e. job_id or file_id event_aggregation_type: type: string title: Event Aggregation Type description: The source job aggregation type, i.e. pdf credits: anyOf: - type: number - type: 'null' title: Credits description: The number of credits consumed by this metric type: object required: - id - user_id - event_type - project_id - organization_id - value - day - event_aggregation_key - event_aggregation_type title: UsageMetric description: API boundary representation of a usage metric. UsageMetricQueryResponse: properties: items: items: $ref: '#/components/schemas/UsageMetric' type: array title: Items description: The list of items. next_page_token: anyOf: - type: string - type: 'null' title: Next Page Token description: A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages. total_size: anyOf: - type: integer - type: 'null' title: Total Size description: The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only. type: object required: - items title: UsageMetricQueryResponse description: Paginated response containing usage metrics. UserInputEvent: properties: type: type: string const: user_input title: Type default: user_input content: type: string title: Content type: object required: - content title: UserInputEvent ValidationError: properties: loc: items: anyOf: - type: string - type: integer type: array title: Location msg: type: string title: Message type: type: string title: Error Type input: title: Input ctx: type: object title: Context type: object required: - loc - msg - type title: ValidationError VertexAIEmbeddingConfig: properties: type: type: string const: VERTEXAI_EMBEDDING title: Type description: Type of the embedding model. default: VERTEXAI_EMBEDDING component: $ref: '#/components/schemas/VertexTextEmbedding' description: Configuration for the VertexAI embedding model. type: object title: VertexAIEmbeddingConfig VertexEmbeddingMode: type: string enum: - default - classification - clustering - similarity - retrieval title: VertexEmbeddingMode description: 'Copied from llama_index.embeddings.vertex.base.VertexEmbeddingMode since importing llama_index.embeddings.vertex.base incurs a lot of memory usage.' VertexTextEmbedding: properties: model_name: type: string title: Model Name description: The modelId of the VertexAI model to use. default: textembedding-gecko@003 embed_batch_size: type: integer maximum: 2048.0 exclusiveMinimum: 0.0 title: Embed Batch Size description: The batch size for embedding calls. default: 10 num_workers: anyOf: - type: integer - type: 'null' title: Num Workers description: The number of workers to use for async embedding calls. location: type: string title: Location description: The default location to use when making API calls. project: type: string title: Project description: The default GCP project to use when making Vertex API calls. embed_mode: $ref: '#/components/schemas/VertexEmbeddingMode' description: The embedding mode to use. default: retrieval additional_kwargs: additionalProperties: true type: object title: Additional Kwargs description: Additional kwargs for the Vertex. client_email: anyOf: - type: string - type: 'null' title: Client Email description: The client email for the VertexAI credentials. token_uri: anyOf: - type: string - type: 'null' title: Token Uri description: The token URI for the VertexAI credentials. private_key_id: anyOf: - type: string - type: 'null' title: Private Key Id description: The private key ID for the VertexAI credentials. private_key: anyOf: - type: string - type: 'null' title: Private Key description: The private key for the VertexAI credentials. class_name: type: string title: Class Name default: VertexTextEmbedding type: object required: - location - project - client_email - token_uri - private_key_id - private_key title: VertexTextEmbedding WebhookConfiguration: properties: webhook_url: anyOf: - type: string - type: 'null' title: Webhook Url description: URL to receive webhook POST notifications examples: - https://example.com/webhooks/llamacloud webhook_headers: anyOf: - additionalProperties: type: string type: object - type: 'null' title: Webhook Headers description: Custom HTTP headers sent with each webhook request (e.g. auth tokens) examples: - Authorization: Bearer sk-... webhook_events: anyOf: - items: type: string enum: - extract.pending - extract.success - extract.error - extract.partial_success - extract.cancelled - parse.pending - parse.running - parse.success - parse.error - parse.partial_success - parse.cancelled - classify.pending - classify.running - classify.success - classify.error - classify.partial_success - classify.cancelled - sheets.pending - sheets.success - sheets.error - sheets.partial_success - sheets.cancelled - unmapped_event type: array - type: 'null' title: Webhook Events description: Events to subscribe to (e.g. 'parse.success', 'extract.error'). If null, all events are delivered. examples: - - parse.success - parse.error webhook_output_format: anyOf: - type: string - type: 'null' title: Webhook Output Format description: 'Response format sent to the webhook: ''string'' (default) or ''json''' examples: - json type: object title: WebhookConfiguration description: Configuration for a single outbound webhook endpoint. WorksheetMetadata: properties: sheet_name: type: string title: Sheet Name description: Name of the worksheet title: anyOf: - type: string - type: 'null' title: Title description: Generated title for the worksheet description: anyOf: - type: string - type: 'null' title: Description description: Generated description of the worksheet type: object required: - sheet_name title: WorksheetMetadata description: Metadata about a worksheet in a spreadsheet securitySchemes: HTTPBearer: type: http scheme: bearer