{ "dataset": "campaign_booklet", "name": "South Korean Election Campaign Booklets", "description": "Enriched campaign booklet artifact using the same document-row universe as the original CSV source, with conservative NEC linkage fields such as 'huboid', 'sg_id', and 'sg_typecode' added to improve interoperability with kr-elections-mcp and related NEC-aligned workflows.", "time_coverage": "2000-2022", "data_version": "v2022", "package_version": "0.2.0", "variant": "enriched", "default_variant": "original", "available_variants": ["original", "enriched"], "variant_description": "The same document-row universe as the original CSV source, plus conservative NEC linkage fields for integration workflows.", "recommended_use": "NEC-aligned workflows, kr-elections-mcp, and linkage-aware joins.", "identifier_columns": "code", "text_columns": ["text", "filtered"], "supported_formats": ["csv", "parquet"], "managed_formats": ["csv", "parquet"], "artifacts": { "csv": { "format": "csv", "file": "sk_election_campaign_booklet_enriched_v2022.csv", "download_url": "https://osf.io/download/69e3eec5352dbdd881fd8d7b/", "sha256": "08779d4c27a02635c7bf08a332170ac0a5bf1295e825e3b29061c62f95598586", "size_bytes": 760045361, "managed": true }, "parquet": { "format": "parquet", "file": "sk_election_campaign_booklet_enriched_v2022.parquet", "download_url": "https://osf.io/download/69e3ee72a0e06b0928fd8ae2/", "sha256": "d8901cd2cebef30116f8865847727bb10855478ee556bc0dcfb5a04e838ad8f4", "size_bytes": 406231949, "managed": true } }, "columns": [ { "name": "date", "type": "character", "description": "Election date (YYYY-MM-DD)" }, { "name": "name", "type": "character", "description": "Candidate name (Korean)" }, { "name": "region", "type": "character", "description": "Metropolitan region (province or metropolitan city)" }, { "name": "district", "type": "character", "description": "Electoral district" }, { "name": "office_id", "type": "integer", "description": "Office type identifier (1=president, 2=national_assembly, 3=edu_superintendent, 4=metro_head, 5=metro_assembly, 6=basic_head, 7=basic_assembly)" }, { "name": "office", "type": "character", "description": "Office type label (president, national_assembly, edu_superintendent, metro_head, metro_assembly, basic_head, basic_assembly)" }, { "name": "giho", "type": "integer", "description": "Candidate ballot number" }, { "name": "party", "type": "character", "description": "Political party name (Korean)" }, { "name": "party_eng", "type": "character", "description": "Political party name (English); transliteration if no official English name" }, { "name": "result", "type": "character", "description": "Election result in Korean" }, { "name": "sex", "type": "character", "description": "Sex in Korean" }, { "name": "birthday", "type": "character", "description": "Date of birth (YYYY-MM-DD)" }, { "name": "age", "type": "integer", "description": "Age at the time of the election" }, { "name": "job_id", "type": "integer", "description": "Original NEC job category identifier (varies across years)" }, { "name": "job", "type": "character", "description": "Standardized job category (Korean)" }, { "name": "job_name", "type": "character", "description": "Job title (Korean)" }, { "name": "job_name_eng", "type": "character", "description": "Job title (English)" }, { "name": "job_code", "type": "integer", "description": "Standardized job code consistent across years" }, { "name": "edu_id", "type": "integer", "description": "Original NEC education level identifier (varies across years)" }, { "name": "edu", "type": "character", "description": "Education description (Korean, free-text from NEC)" }, { "name": "edu_name", "type": "character", "description": "Standardized education level label (Korean)" }, { "name": "edu_name_eng", "type": "character", "description": "Standardized education level label (English)" }, { "name": "edu_code", "type": "integer", "description": "Standardized education code consistent across years" }, { "name": "career1", "type": "character", "description": "Career description 1" }, { "name": "career2", "type": "character", "description": "Career description 2" }, { "name": "pages", "type": "integer", "description": "Number of pages in the booklet" }, { "name": "code", "type": "character", "description": "krpoltext document row identifier", "identifier": true }, { "name": "huboid", "type": "character", "description": "Linked NEC candidate identifier used for conservative kr-elections-mcp alignment; unresolved rows remain NA" }, { "name": "sg_id", "type": "character", "description": "Linked NEC election identifier used for NEC-aligned workflows" }, { "name": "sg_typecode", "type": "character", "description": "Linked NEC election type identifier used for NEC-aligned workflows" }, { "name": "link_status", "type": "character", "description": "Linkage status for NEC alignment (resolved, ambiguous, not_found, rejected)" }, { "name": "matcher_version", "type": "character", "description": "Version of the linkage pipeline used to assign NEC fields" }, { "name": "nec_snapshot_id", "type": "character", "description": "Identifier of the NEC snapshot used to assign NEC fields" }, { "name": "sex_code", "type": "integer", "description": "Sex code: 1 = male, 0 = female" }, { "name": "result_code", "type": "integer", "description": "Result code: 1 = elected, 0 = not elected" }, { "name": "text", "type": "character", "description": "Full OCR-extracted text of the campaign booklet" }, { "name": "filtered", "type": "character", "description": "Parsed text after morphological analysis; Korean-only, numbers, foreign characters, and symbols removed" } ], "notes": { "missing_values": "2,283 rows have no booklet code or text because a booklet was not available. 151 are missing biographical information. 23 booklets were unprocessable.", "text_processing": "All text is UTF-8 encoded Korean. 'text' contains the full original text; 'filtered' contains the morphologically parsed version.", "identifiers": "'code' is the krpoltext document row identifier, but some rows have missing code values, so row identity should not be inferred from code alone. 'huboid' is a linked NEC identifier, not a native krpoltext identifier. Rows with 'link_status == \"resolved\"' are expected to have a non-null 'huboid'. 'sg_id' and 'sg_typecode' describe the NEC-aligned election scope attached to the row. 'job_id' and 'edu_id' vary across election years; use 'job_code' and 'edu_code' for cross-year analysis.", "provenance": "The enriched variant is a row-preserving transformation of the original campaign_booklet CSV source. It adds conservative NEC linkage metadata to improve interoperability with kr-elections-mcp and related NEC-aligned workflows.", "artifact_transition": "When the enriched campaign_booklet artifact is rebuilt or republished, update registry checksums, sizes, and URLs in lockstep with this schema." }, "extras": { "office_mapping": [ { "office_id": 1, "office": "president", "description": "Presidential election" }, { "office_id": 2, "office": "national_assembly", "description": "National Assembly election" }, { "office_id": 3, "office": "edu_superintendent", "description": "Education superintendent" }, { "office_id": 4, "office": "metro_head", "description": "Metropolitan city mayor / provincial governor" }, { "office_id": 5, "office": "metro_assembly", "description": "Metropolitan assembly member" }, { "office_id": 6, "office": "basic_head", "description": "Basic local government head" }, { "office_id": 7, "office": "basic_assembly", "description": "Basic assembly member" } ], "row_universe": "Same document-row universe as the original campaign_booklet CSV source; some rows have missing code values.", "linkage_fields": [ { "name": "huboid", "role": "linked_nec_candidate_identifier", "nullable": true }, { "name": "sg_id", "role": "linked_nec_election_identifier", "nullable": true }, { "name": "sg_typecode", "role": "linked_nec_election_type_identifier", "nullable": true }, { "name": "link_status", "role": "linkage_status", "nullable": false }, { "name": "matcher_version", "role": "linkage_provenance", "nullable": true }, { "name": "nec_snapshot_id", "role": "linkage_provenance", "nullable": true } ] } }