# The function: bibtex_2academic bibtex_2academic <- function(bibfile, outfold, abstract = TRUE, overwrite = FALSE) { if (!require("pacman")) install.packages("pacman") pacman::p_load(RefManageR, dplyr, stringr, anytime, tidyr, stringi) # Import the bibtex file and convert to data.frame mypubs <- ReadBib(bibfile, check = "warn", .Encoding = "UTF-8") %>% as.data.frame() # This section is for creating columns if not existing (as in the case of pure presentations collections) fncols <- function(data, cname) { add <-cname[!cname%in%names(data)] if(length(add)!=0) data[add] <- NA data } mypubs <-fncols(mypubs, c("journal", "abstract", "annotation", "editor", "booktitle", "volume", "number", "pages", "address", "institution", "publisher", "doi", "isbn", "url", "year", "month", "school")) mypubs <- mypubs %>% mutate(mainref = journal) mypubs$mainref <- as.character(mypubs$mainref) mypubs$mainref <- mypubs$mainref %>% replace_na(' ') #otherwise it appears "NA" in post mypubs$bibtype <-str_replace_all(mypubs$bibtype, c("Misc" = "Presentation")) # str_replace_all(mypubs$bibtype, c("PhdThesis" = "Thesis")) mypubs$abstract <- mypubs$abstract %>% replace_na('(Abstract not available)') #otherwise it appears "NA" in post # relplace NAs in month mypubs$month <- as.character(mypubs$month) mypubs$month <- mypubs$month %>% replace_na("jan") #otherwise it appears "NA" in post # mypubs$annotation <- mypubs$annotation %>% replace_na('image_preview = ""') #otherwise # Customize Zotero extra field (here "annotation") in order to leave there the additional information for the md file #clean backslashes generated by conversion of underline characters in links (_) & other special characters # mypubs$annotation<- gsub( # pattern = ('(.+)---'), # replacement = '\n---\n', # x = mypubs$annotation # ) mypubs$annotation<- gsub( pattern = ('--- '), replacement = '---\n', x = mypubs$annotation ) mypubs$annotation<- gsub( pattern = (' ---'), replacement = '\n---\n', x = mypubs$annotation ) mypubs$annotation<- gsub( pattern = ('\\\\'), replacement = '', x = mypubs$annotation ) mypubs$abstract<- gsub( pattern = ('\\\\'), replacement = '', x = mypubs$abstract ) mypubs$mainref<- gsub( pattern = ('\\\\'), replacement = '', x = mypubs$mainref ) mypubs$annotation<- gsub( pattern = ('\\{'), replacement = '', x = mypubs$annotation ) mypubs$annotation<- gsub( pattern = ('\\}'), replacement = '', x = mypubs$annotation ) mypubs$annotation<- gsub( pattern = ('\\$'), replacement = '', x = mypubs$annotation ) # Further customization of annotation for including yaml information in Zotero Extra field # mypubs$annotation <-gsub("url_pdf", "\nurl_pdf",mypubs$annotation ) # mypubs$annotation <-gsub("url_preprint", "\nurl_preprint",mypubs$annotation ) # mypubs$annotation <-gsub("url_dataset", "\nurl_dataset",mypubs$annotation ) # mypubs$annotation <-gsub("url_project", "\nurl_project",mypubs$annotation ) # mypubs$annotation <-gsub("url_slides", "\nurl_slides",mypubs$annotation ) # mypubs$annotation <-gsub("url_video", "\nurl_video",mypubs$annotation ) # mypubs$annotation <-gsub("url_poster", "\nurl_poster",mypubs$annotation ) # mypubs$annotation <-gsub("links", "\nlinks",mypubs$annotation ) # mypubs$annotation <-gsub("- icon", "\n- icon",mypubs$annotation ) # mypubs$annotation <-gsub(" icon_pack", "\n icon_pack",mypubs$annotation ) # mypubs$annotation <-gsub(" name:", "\n name:",mypubs$annotation ) # mypubs$annotation <-gsub(" web:", "\n url:",mypubs$annotation ) # something does not work when trying to replace "url:" from Zotero, that's why in the extra field use web for this function, which then is replaced with the url field and it works. url_fields <- c("url_pdf", "url_preprint", "url_dataset", "url_project", "url_slides", "url_video", "url_poster") # Extraer cada campo de `annotation` usando str_extract() y limpiarlo for (field in url_fields) { mypubs[[field]] <- str_extract(mypubs$annotation, paste0(field, ':\\s*"([^"]+)"')) mypubs[[field]] <- str_replace_all(mypubs[[field]], paste0(field, ':\\s*"'), '') # Elimina el prefijo mypubs[[field]] <- str_replace_all(mypubs[[field]], '"$', '') # Elimina comillas finales } # Construir la expresión regular para los campos de url_fields url_pattern <- paste0("\\b(", paste(url_fields, collapse = "|"), "):\\s*\"\"") # Eliminar cualquier cadena que contenga el patrón mypubs$annotation <- str_remove_all(mypubs$annotation, url_pattern) # Construir la expresión regular para los campos de url_fields con texto entre comillas url_pattern <- paste0("\\b(", paste(url_fields, collapse = "|"), "):\\s*\"[^\"]*\"") # Eliminar cualquier cadena que contenga el patrón mypubs$annotation <- str_remove_all(mypubs$annotation, url_pattern) # Customize for more than one editor mypubs$editor<- gsub( pattern = (' and '), replacement = ', ', x = mypubs$editor ) mypubs$editor<- stri_replace_last_fixed(mypubs$editor, ',', ' &') # add characters between tags for properly render mypubs$keywords<- gsub( pattern = (','), replacement = '","', x = mypubs$keywords ) #add line breaks for the different entries # mypubs$annotation<-cat(stri_wrap(mypubs$annotation, whitespace_only = TRUE)) # assign "categories" to the different types of publications mypubs <- mypubs %>% dplyr::mutate( categories = dplyr::case_when(bibtype == "Article" ~ "Journal Article", bibtype == "Article in Press" ~ "Journal Article", bibtype == "InProceedings" ~ "Conference paper", bibtype == "Proceedings" ~ "Conference paper", bibtype == "Conference" ~ "Conference paper", bibtype == "Conference Paper" ~ "Conference paper", bibtype == "Thesis" ~ "Manuscript", bibtype == "MastersThesis" ~ "Manuscript", bibtype == "PhdThesis" ~ "Manuscript", bibtype == "Manual" ~ "Report", bibtype == "TechReport" ~ "Report", bibtype == "Book" ~ "Book", bibtype == "InCollection" ~ "Book Section", bibtype == "InBook" ~ "Book Section", bibtype == "Presentation" ~ "Presentation", TRUE ~ "0")) # create a function which populates the md template based on the info # about a publication create_md <- function(x) { # define a date and create filename by appending date and start of title if (!is.na(x[["year"]])) { x[["date"]] <- paste0(x[["year"]]) } else { x[["date"]] <- "2999-01-01" } # define a date for the bibtex record (with month) x[["date2"]] <- paste0(x[["year"]], "-", x[["month"]] , "-", "01" ) filename <- paste(x[["date"]], x[["title"]] %>% str_replace_all(fixed(" "), "_") %>% str_remove_all(fixed(":")) %>% str_remove_all(fixed("?")) %>% str_sub(1, 20) %>% paste0(".qmd"), sep = "_") # start writing if (!file.exists(file.path(outfold, filename)) | overwrite) { fileConn <- file.path(outfold, filename) write("---", fileConn) # Title and date write(paste0("title : \"", x[["title"]], "\""), fileConn, append = T) write(paste0("date : \"", anydate(x[["date2"]]), "\""), fileConn, append = T) # write(paste0("date : \"", x[["year"]], "-", x[["month"]] , "-", "01" , "\""), fileConn, append = T) # Authors. Comma separated list, e.g. `["Bob Smith", "David Jones"]`. auth_hugo <- str_replace_all(x["author"], " and ", "\", \"") auth_hugo <- stringi::stri_trans_general(auth_hugo, "latin-ascii") write(paste0("author : [\"", auth_hugo,"\"]"), fileConn, append = T) # Publication type. Legend: # 0 = Uncategorized, 1 = Conference paper, 2 = Journal article # 3 = Manuscript, 4 = Report, 5 = Book, 6 = Book section write(paste0("categories : [\"", x[["categories"]],"\"]"), fileConn, append = T) # Publication details publication <- x[["mainref"]] if (!is.na(x[["editor"]])) publication <- paste0(publication, " In ", x[["editor"]], ": ") if (!is.na(x[["booktitle"]])) publication <- paste0(publication, x[["booktitle"]], "") if (!is.na(x[["volume"]])) publication <- paste0(publication, ", ", x[["volume"]], "") if (!is.na(x[["type"]])) publication <- paste0(publication, x[["type"]], " ") if (!is.na(x[["number"]])) publication <- paste0(publication, "(", x[["number"]], ")") if (!is.na(x[["pages"]])) publication <- paste0(publication, " ", x[["pages"]], " ") if (!is.na(x[["school"]])) publication <- paste0(publication, "- ", x[["school"]]) if (!is.na(x[["address"]])) publication <- paste0(publication, ". ", x[["address"]], "") if (!is.na(x[["institution"]])) publication <- paste0(publication, " ", x[["institution"]]) if (!is.na(x[["publisher"]])) publication <- paste0(publication, ": ", x[["publisher"]]) if (!is.na(x[["doi"]])) publication <- paste0(publication, " [https://doi.org/", x[["doi"]], "](https://doi.org/", x[["doi"]], ")") if (!is.na(x[["isbn"]])) publication <- paste0(publication, ". ISBN: ", paste0(x[["isbn"]])) write(paste0("publication : \"", publication,"\""), fileConn, append = T) write(paste0("publication_short : \"", publication,"\""),fileConn, append = T) # Abstract and optional shortened version. write(paste0("abstract : \"", x[["abstract"]], "\""), fileConn, append = T) # if (abstract) { # write(paste0("abstract = \"", x[["abstract"]],"\""), fileConn, append = T) # } else { # write("abstract = \"\"", fileConn, append = T) # } write(paste0("abstract_short : \"","\""), fileConn, append = T) # ----------------- LIMPIEZA Y EXTRACCIÓN DE LINKS ----------------- # Crear lista vacía para los links extraídos de annotation annotation_links <- list() # 1. Eliminar el bloque de "Prof. Guía" x[["annotation"]] <- gsub( "- icon: graduation-cap.*?(?=- icon|$)", # patrón multilinea hasta el siguiente icon o fin "", x[["annotation"]], perl = TRUE ) # 2. Extraer íconos y enlaces del annotation (todos los "- icon: ... href: ..." o "- icon: ... web: ...") matches <- str_match_all( x[["annotation"]], "- icon: ([^\\n]+)\\s*\\n\\s*(icon_pack: [^\\n]+\\s*\\n)?\\s*(name: [^\\n]+\\s*\\n)?\\s*(web:|href:) ([^\\n]+)" )[[1]] # Crear lista con los links extraídos y transformados if (nrow(matches) > 0) { annotation_links <- apply(matches, 1, function(row) { icon_name <- row[2] url <- row[6] if (grepl("github\\.com", url)) { icon_name <- "github" } else if (!is.na(icon_name) && icon_name == "file") { icon_name <- "file-pdf-fill" # cambiar file a file-pdf-fill } list(icon = icon_name, href = url) }) } # 3. Eliminar bloques de iconos + enlaces de annotation (incluyendo los que comienzan con #) x[["annotation"]] <- gsub( "(\\n|^)#?\\s*-\\s*icon:\\s*[^\\n]+\\s*\\n\\s*(icon_pack:\\s*[^\\n]+\\s*\\n)?\\s*(name:\\s*[^\\n]+\\s*\\n)?\\s*(web:|href:)\\s*[^\\n]+\\n?", "", x[["annotation"]], perl = TRUE ) # 4. Eliminar cualquier bloque "links:" vacío o mal formado que venga de Zotero x[["annotation"]] <- gsub( "\\n?links:\\s*(\\n\\s*-.*)?", "", x[["annotation"]], perl = TRUE ) # 5. Eliminar cualquier campo de URL residual (como "url_pdf : \"\"") x[["annotation"]] <- gsub( "url_[^:]+:\\s*\"[^\"]*\"\\s*", "", x[["annotation"]] ) # 6. Limpiar líneas en blanco adicionales x[["annotation"]] <- gsub("\n{2,}", "\n\n", x[["annotation"]]) # máximo 1 salto doble x[["annotation"]] <- trimws(x[["annotation"]]) # eliminar espacios al inicio y final # ----------------- LINKS DESDE CAMPOS ESPECIALES ----------------- # Mapa de íconos para campos especiales (url_pdf, url_project, etc.) icon_map <- list( "url_slides" = list(icon = "file-slides-fill"), "url_video" = list(icon = "camara-video-fill"), "url_poster" = list(icon = "image-fill"), "url_pdf" = list(icon = "file-pdf-fill"), "url_preprint" = list(icon = "files-alt"), "url_dataset" = list(icon = "database"), "url_project" = list(icon = "archive") ) # Recolectar los links especiales links <- list() for (field in names(icon_map)) { if (!is.null(x[[field]]) && !is.na(x[[field]]) && x[[field]] != "") { links <- append(links, list(list( icon = icon_map[[field]]$icon, href = x[[field]] ))) } } # ----------------- COMBINAR Y ESCRIBIR LINKS EN 'ABOUT' ----------------- # Juntar annotation_links + links especiales all_links <- c(annotation_links, links) # Escribir el bloque 'about' y 'links' write("about:", fileConn, append = T) write(" template: marquee", fileConn, append = T) if (length(all_links) > 0) { write(" links:", fileConn, append = T) for (link in all_links) { write(paste0(" - icon: ", link[["icon"]]), fileConn, append = T) write(paste0(" href: ", link[["href"]]), fileConn, append = T) } } # ----------------- CERRAR YAML Y ESCRIBIR CONTENIDO LIBRE ----------------- # Cerrar el bloque YAML write("---", fileConn, append = T) # Escribir annotation limpio como texto libre (solo si no está vacío) if (!is.na(x[["annotation"]]) && x[["annotation"]] != "") { write(x[["annotation"]], fileConn, append = T) } # ----------------- GENERACIÓN DE CALL OUT "HOW TO CITE" ----------------- # Preparar autores authors <- x[["author"]] authors <- str_replace_all(authors, " and ", ", ") # Cambiar 'and' por ', ' para lista de autores authors <- stringi::stri_trans_general(authors, "latin-ascii") # Eliminar tildes authors <- gsub("\\{", "", authors) # Limpiar llaves authors <- gsub("\\}", "", authors) # Año year <- ifelse(!is.na(x[["year"]]), x[["year"]], "s.f.") # "s.f." si falta el año # Título title <- x[["title"]] source <- publication # Usar directamente la variable que ya creaste # Construir cita formateada (HTML-safe) citation_text <- paste0(authors, " (", year, "). *", title, "*") if (source != "") { citation_text <- paste0(citation_text, ". ", source) } # ----------------- ESCRIBIR COMO CALLOUT ----------------- # Escribir el callout al final del archivo write("\n\n::: {.callout-note title=\"How to cite this work\"}\n", fileConn, append = T) write(citation_text, fileConn, append = T) write("\n:::\n", fileConn, append = T) } } # apply the "create_md" function over the publications list to generate # the different "md" files. apply(mypubs, FUN = function(x) create_md(x), MARGIN = 1) } # Running the function for publications my_bibfile <- "publication/posts/academic-publications.bib" out_fold <- "publication/posts" bibtex_2academic(bibfile = my_bibfile, outfold = out_fold, abstract = TRUE, overwrite = TRUE) #this is for including my presentations collection together with my publications in the same page for the sake of simplicity my_bibfile <- "publication/posts/academic-presentations.bib" out_fold <- "publication/posts" bibtex_2academic(bibfile = my_bibfile, outfold = out_fold, abstract = TRUE, overwrite = TRUE) # # # and this is for including my theses, which actually are in another page (tesis), but I let the script here in order to run it just once for the sake of simplicity # # # my_bibfile <- "tesis/posts/tesis.bib" # out_fold <- "tesis/posts" # bibtex_2academic(bibfile = my_bibfile, # outfold = out_fold, # abstract = TRUE, # overwrite = TRUE)