// clean existing records match (n:pm_absolute_count_entry ) optional match (n)-[r]-() delete n,r; match (n:pm_absolute_count_entry_docs ) optional match (n)-[r]-() delete n,r; match (n:pm_document ) optional match (n)-[r]-() delete n,r; match (n:pm_diagnosis_category ) optional match (n)-[r]-() delete n,r; match (n:pm_diagnosis ) optional match (n)-[r]-() delete n,r; match (n:pm_pubmed_doc) optional match (n)-[r]-() delete n,r; // Loading the CSVs ////////////////////////////////////// // LOAD RAW DATA ////////////////////////////////////// // DIAGNOSIS LOAD CSV WITH HEADERS FROM "file:///Users/malarcon/Google Drive/CUNY/IS607/submissions/project5/absolute_count_entry.csv" as records create (n:pm_absolute_count_entry) set n = records; //CREATE CONSTRAINT ON (c:pm_absolute_count_entry) ASSERT c.ccs_diagnosis_categories IS UNIQUE; //CREATE INDEX ON :pm_absolute_count_entry(url_params); //DROP INDEX ON :pm_absolute_count_entry_docs(url_params); USING PERIODIC COMMIT 10000 LOAD CSV WITH HEADERS FROM "file:///Users/malarcon/Google Drive/CUNY/IS607/submissions/project5/absolute_count_entry_docs.csv" as records create (n:pm_absolute_count_entry_docs) set n = records; //CREATE INDEX ON :pm_absolute_count_entry_docs(url_params); LOAD CSV WITH HEADERS FROM "file:///Users/malarcon/Google Drive/CUNY/IS607/submissions/project5/dxref_2015.csv" as records with records, trim(records.`CCS CATEGORY`) as `CCS CATEGORY` create (n:pm_dxref_2015) set n = records; CREATE INDEX ON :pm_dxref_2015(`CCS CATEGORY`); CREATE INDEX ON :pm_dxref_2015(`ICD-9-CM CODE`); MATCH (a: pm_dxref_2015) SET a.`CCS CATEGORY` = trim(a.`CCS CATEGORY`); ////////Processing the Nodes /////// pm_diagnosis_category CREATE INDEX ON :pm_diagnosis_category(search_terms_url_encoded); CREATE INDEX ON :pm_diagnosis_category(category_id); MATCH (a: pm_absolute_count_entry) WITH a ,a.ccs_diagnosis_categories as category_id ,a.ccs_diagnosis_categories_labels as category_description ,a.url_params as search_terms ,a.disease_phrase as search_terms_url_encoded ,a.Count as frequency MERGE (n:pm_diagnosis_category {category_id:category_id ,category_description:category_description ,search_terms:search_terms ,search_terms_url_encoded:search_terms_url_encoded ,frequency:frequency } ); /////// pm_diagnosis CREATE INDEX ON :pm_diagnosis(icd9_code); CREATE INDEX ON :pm_diagnosis(icd9_code_description); CREATE INDEX ON :pm_diagnosis(category_id); MATCH (a: pm_dxref_2015) WITH a ,a.`ICD-9-CM CODE` as icd9_code ,a.`ICD-9-CM CODE DESCRIPTION` as icd9_code_description ,a.`CCS CATEGORY` as category_id merge (m:pm_diagnosis {icd9_code:icd9_code ,icd9_code_description:icd9_code_description ,category_id:category_id }); MATCH (a: pm_diagnosis_category) SET a.category_description_short = trim(a.category_description_short); MATCH (a: pm_dxref_2015),(b:pm_diagnosis_category {category_id:a.`CCS CATEGORY`}) SET b.category_description_short = a.`CCS CATEGORY DESCRIPTION`; /////////// diagnosis relationships MATCH (b:pm_diagnosis) ,(c:pm_diagnosis_category {category_id:b.category_id}) CREATE (b) -[r:BELONGS_TO]-> (c); MATCH (b:pm_diagnosis) REMOVE b.category_id /////////// pubmed documents CREATE INDEX ON :pm_pubmed_doc(pubmed_id); USING PERIODIC COMMIT 10000 LOAD CSV WITH HEADERS FROM "file:///Users/malarcon/Google Drive/CUNY/IS607/submissions/project5/pubmed_docs.csv" as records create (n:pm_pubmed_doc) set n = records; /////////// document diagnosis category relationship create index on :pm_pubmed_doc(pubmed_id); create index on :pm_absolute_count_entry_docs(Id); create index on :pm_absolute_count_entry_docs(url_params); create index on :pm_diagnosis_category(search_terms);