d.baseline %>% group_by( ntmaj12 ) %>% mutate( totN=n() ) %>% ungroup() t <- d.baseline %>% group_by( ntmaj12, gender ) %>% summarize( n=n(), AVE.COMP=round(median(totcomp),0), hire.female=round( mean( type == "F-F" | type == "M-F"), 2) ) %>% group_by( ntmaj12 ) %>% mutate( gender.prop=round( n/sum(n), 2 ), PAY.GAP=round( min(AVE.COMP)/max(AVE.COMP), 2) ) %>% dplyr::select( ntmaj12, gender, AVE.COMP, PAY.GAP, n, gender.prop, hire.female ) t$AVE.COMP <- dollarize( t$AVE.COMP ) # 12 MAJOR NTEE CATEGORIES (ntmaj12) # Expands 10 Major NTEE categories into 12 categories, with Higher Education (B4 & B5) separate from other education organizations (B), and Hospitals (E2) separate from other health organizations (E). # NTEE Code Description # A AR Arts, culture, and humanities # B4,B5 BH Higher education # B (other than B4,B5) ED Education (other) # C,D EN Environment # E2 EH Hospitals # E (other than E2),F,G,H HE Health (other) # I,J,K,L,M,N,O,P HU Human services # Q IN International # R,S,T,U,V,W PU Public and societal benefit # X RE Religion # Y MU Mutual benefit # Z UN Unknown t$NTMAJ12[ t$ntmaj12 == "AR" ] <- "ARTS" t$NTMAJ12[ t$ntmaj12 == "BH" ] <- "HIGHER ED" t$NTMAJ12[ t$ntmaj12 == "ED" ] <- "EDUCATION" t$NTMAJ12[ t$ntmaj12 == "EN" ] <- "ENVIRONMENT" t$NTMAJ12[ t$ntmaj12 == "EH" ] <- "HOSPITALS" t$NTMAJ12[ t$ntmaj12 == "HE" ] <- "HEALTH" t$NTMAJ12[ t$ntmaj12 == "HU" ] <- "HUMAN SERVICES" t$NTMAJ12[ t$ntmaj12 == "IN" ] <- "INTERNATIONAL" t$NTMAJ12[ t$ntmaj12 == "PU" ] <- "PUBLIC BENEFIT" t$NTMAJ12[ t$ntmaj12 == "RE" ] <- "RELIGION" t$NTMAJ12[ t$ntmaj12 == "MU" ] <- "ASSOCIATIONS" t %>% pander() write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F ) dollarize <- function(x) { paste0("$", format( round( x, 0 ), big.mark="," ) ) } d.baseline <- filter( dig, pre1 == 1 ) d.baseline <- d.baseline %>% group_by( ntmaj12 ) %>% mutate( totN=n() ) %>% ungroup() t <- d.baseline %>% group_by( ntmaj12, gender ) %>% summarize( n=n(), AVE.COMP=round(median(totcomp),0), hire.female=round( mean( type == "F-F" | type == "M-F"), 2) ) %>% group_by( ntmaj12 ) %>% mutate( gender.prop=round( n/sum(n), 2 ), PAY.GAP=round( min(AVE.COMP)/max(AVE.COMP), 2) ) %>% dplyr::select( ntmaj12, gender, AVE.COMP, PAY.GAP, n, gender.prop, hire.female ) t$AVE.COMP <- dollarize( t$AVE.COMP ) # 12 MAJOR NTEE CATEGORIES (ntmaj12) # Expands 10 Major NTEE categories into 12 categories, with Higher Education (B4 & B5) separate from other education organizations (B), and Hospitals (E2) separate from other health organizations (E). # NTEE Code Description # A AR Arts, culture, and humanities # B4,B5 BH Higher education # B (other than B4,B5) ED Education (other) # C,D EN Environment # E2 EH Hospitals # E (other than E2),F,G,H HE Health (other) # I,J,K,L,M,N,O,P HU Human services # Q IN International # R,S,T,U,V,W PU Public and societal benefit # X RE Religion # Y MU Mutual benefit # Z UN Unknown t$ntmaj12[ t$ntmaj12 == "AR" ] <- "ARTS" t$ntmaj12[ t$ntmaj12 == "BH" ] <- "HIGHER ED" t$ntmaj12[ t$ntmaj12 == "ED" ] <- "EDUCATION" t$ntmaj12[ t$ntmaj12 == "EN" ] <- "ENVIRONMENT" t$ntmaj12[ t$ntmaj12 == "EH" ] <- "HOSPITALS" t$ntmaj12[ t$ntmaj12 == "HE" ] <- "HEALTH" t$ntmaj12[ t$ntmaj12 == "HU" ] <- "HUMAN SERVICES" t$ntmaj12[ t$ntmaj12 == "IN" ] <- "INTERNATIONAL" t$ntmaj12[ t$ntmaj12 == "PU" ] <- "PUBLIC BENEFIT" t$ntmaj12[ t$ntmaj12 == "RE" ] <- "RELIGION" t$ntmaj12[ t$ntmaj12 == "MU" ] <- "ASSOCIATIONS" t %>% pander() write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F ) dig <- read.dta13( "DigitizedDataCEO&CFO/CEO_Digitized.dta" ) dput( sort(names(dig))) head( dig ) dollarize <- function(x) { paste0("$", format( round( x, 0 ), big.mark="," ) ) } d.baseline <- filter( dig, pre1 == 1 ) d.baseline <- d.baseline %>% group_by( ntmaj12 ) %>% mutate( totN=n() ) %>% ungroup() t <- d.baseline %>% group_by( ntmaj12, gender ) %>% summarize( n=n(), AVE.COMP=round(median( TOTCOMP2_adj2020 ),0), hire.female=round( mean( type == "F-F" | type == "M-F"), 2) ) %>% group_by( ntmaj12 ) %>% mutate( gender.prop=round( n/sum(n), 2 ), PAY.GAP=round( min(AVE.COMP)/max(AVE.COMP), 2) ) %>% dplyr::select( ntmaj12, gender, AVE.COMP, PAY.GAP, n, gender.prop, hire.female ) t$AVE.COMP <- dollarize( t$AVE.COMP ) # 12 MAJOR NTEE CATEGORIES (ntmaj12) # Expands 10 Major NTEE categories into 12 categories, with Higher Education (B4 & B5) separate from other education organizations (B), and Hospitals (E2) separate from other health organizations (E). # NTEE Code Description # A AR Arts, culture, and humanities # B4,B5 BH Higher education # B (other than B4,B5) ED Education (other) # C,D EN Environment # E2 EH Hospitals # E (other than E2),F,G,H HE Health (other) # I,J,K,L,M,N,O,P HU Human services # Q IN International # R,S,T,U,V,W PU Public and societal benefit # X RE Religion # Y MU Mutual benefit # Z UN Unknown t$ntmaj12[ t$ntmaj12 == "AR" ] <- "ARTS" t$ntmaj12[ t$ntmaj12 == "BH" ] <- "HIGHER ED" t$ntmaj12[ t$ntmaj12 == "ED" ] <- "EDUCATION" t$ntmaj12[ t$ntmaj12 == "EN" ] <- "ENVIRONMENT" t$ntmaj12[ t$ntmaj12 == "EH" ] <- "HOSPITALS" t$ntmaj12[ t$ntmaj12 == "HE" ] <- "HEALTH" t$ntmaj12[ t$ntmaj12 == "HU" ] <- "HUMAN SERVICES" t$ntmaj12[ t$ntmaj12 == "IN" ] <- "INTERNATIONAL" t$ntmaj12[ t$ntmaj12 == "PU" ] <- "PUBLIC BENEFIT" t$ntmaj12[ t$ntmaj12 == "RE" ] <- "RELIGION" t$ntmaj12[ t$ntmaj12 == "MU" ] <- "ASSOCIATIONS" t %>% pander() write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F ) write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F ) t %>% pander() dollarize <- function(x) { paste0("$", format( round( x, 0 ), big.mark="," ) ) } d.baseline <- filter( dig, pre1 == 1 & ( type == "F-F" | type == "M-F" ) ) d.baseline <- d.baseline %>% group_by( ntmaj12 ) %>% mutate( totN=n() ) %>% ungroup() t <- d.baseline %>% group_by( ntmaj12, gender ) %>% summarize( n=n(), AVE.COMP=round(median( TOTCOMP2_adj2020 ),0), hire.female=round( mean( type == "F-F" | type == "M-F"), 2) ) %>% group_by( ntmaj12 ) %>% mutate( gender.prop=round( n/sum(n), 2 ), PAY.GAP=round( min(AVE.COMP)/max(AVE.COMP), 2) ) %>% dplyr::select( ntmaj12, gender, AVE.COMP, PAY.GAP, n, gender.prop, hire.female ) t$AVE.COMP <- dollarize( t$AVE.COMP ) # 12 MAJOR NTEE CATEGORIES (ntmaj12) # Expands 10 Major NTEE categories into 12 categories, with Higher Education (B4 & B5) separate from other education organizations (B), and Hospitals (E2) separate from other health organizations (E). # NTEE Code Description # A AR Arts, culture, and humanities # B4,B5 BH Higher education # B (other than B4,B5) ED Education (other) # C,D EN Environment # E2 EH Hospitals # E (other than E2),F,G,H HE Health (other) # I,J,K,L,M,N,O,P HU Human services # Q IN International # R,S,T,U,V,W PU Public and societal benefit # X RE Religion # Y MU Mutual benefit # Z UN Unknown t$ntmaj12[ t$ntmaj12 == "AR" ] <- "ARTS" t$ntmaj12[ t$ntmaj12 == "BH" ] <- "HIGHER ED" t$ntmaj12[ t$ntmaj12 == "ED" ] <- "EDUCATION" t$ntmaj12[ t$ntmaj12 == "EN" ] <- "ENVIRONMENT" t$ntmaj12[ t$ntmaj12 == "EH" ] <- "HOSPITALS" t$ntmaj12[ t$ntmaj12 == "HE" ] <- "HEALTH" t$ntmaj12[ t$ntmaj12 == "HU" ] <- "HUMAN SERVICES" t$ntmaj12[ t$ntmaj12 == "IN" ] <- "INTERNATIONAL" t$ntmaj12[ t$ntmaj12 == "PU" ] <- "PUBLIC BENEFIT" t$ntmaj12[ t$ntmaj12 == "RE" ] <- "RELIGION" t$ntmaj12[ t$ntmaj12 == "MU" ] <- "ASSOCIATIONS" t %>% pander() write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F ) unique( dig$type ) dollarize <- function(x) { paste0("$", format( round( x, 0 ), big.mark="," ) ) } d.baseline <- filter( dig, pre1 == 1 & type %in% c("F-F","M-F","M-M","F-M") ) d.baseline <- d.baseline %>% group_by( ntmaj12 ) %>% mutate( totN=n() ) %>% ungroup() t <- d.baseline %>% group_by( ntmaj12, gender ) %>% summarize( n=n(), AVE.COMP=round(median( TOTCOMP2_adj2020 ),0), hire.female=round( mean( type == "F-F" | type == "M-F"), 2) ) %>% group_by( ntmaj12 ) %>% mutate( gender.prop=round( n/sum(n), 2 ), PAY.GAP=round( min(AVE.COMP)/max(AVE.COMP), 2) ) %>% dplyr::select( ntmaj12, gender, AVE.COMP, PAY.GAP, n, gender.prop, hire.female ) t$AVE.COMP <- dollarize( t$AVE.COMP ) # 12 MAJOR NTEE CATEGORIES (ntmaj12) # Expands 10 Major NTEE categories into 12 categories, with Higher Education (B4 & B5) separate from other education organizations (B), and Hospitals (E2) separate from other health organizations (E). # NTEE Code Description # A AR Arts, culture, and humanities # B4,B5 BH Higher education # B (other than B4,B5) ED Education (other) # C,D EN Environment # E2 EH Hospitals # E (other than E2),F,G,H HE Health (other) # I,J,K,L,M,N,O,P HU Human services # Q IN International # R,S,T,U,V,W PU Public and societal benefit # X RE Religion # Y MU Mutual benefit # Z UN Unknown t$ntmaj12[ t$ntmaj12 == "AR" ] <- "ARTS" t$ntmaj12[ t$ntmaj12 == "BH" ] <- "HIGHER ED" t$ntmaj12[ t$ntmaj12 == "ED" ] <- "EDUCATION" t$ntmaj12[ t$ntmaj12 == "EN" ] <- "ENVIRONMENT" t$ntmaj12[ t$ntmaj12 == "EH" ] <- "HOSPITALS" t$ntmaj12[ t$ntmaj12 == "HE" ] <- "HEALTH" t$ntmaj12[ t$ntmaj12 == "HU" ] <- "HUMAN SERVICES" t$ntmaj12[ t$ntmaj12 == "IN" ] <- "INTERNATIONAL" t$ntmaj12[ t$ntmaj12 == "PU" ] <- "PUBLIC BENEFIT" t$ntmaj12[ t$ntmaj12 == "RE" ] <- "RELIGION" t$ntmaj12[ t$ntmaj12 == "MU" ] <- "ASSOCIATIONS" t %>% pander() write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F ) dollarize <- function(x) { paste0("$", format( round( x, 0 ), big.mark="," ) ) } d.baseline <- filter( dig, pre1 == 1 & type %in% c("F-F","M-F","M-M","F-M") ) d.baseline <- d.baseline %>% group_by( ntmaj12 ) %>% mutate( totN=n() ) %>% ungroup() t <- d.baseline %>% group_by( ntmaj12, gender ) %>% summarize( n=n(), AVE.COMP=round(median( TOTCOMP2_adj2020 ),0), hire.female=round( mean( type == "F-F" | type == "M-F"), 2) ) %>% group_by( ntmaj12 ) %>% mutate( gender.prop=round( n/sum(n), 2 ), PAY.GAP=round( min(AVE.COMP)/max(AVE.COMP), 2) ) %>% dplyr::select( ntmaj12, gender, AVE.COMP, PAY.GAP, n, gender.prop, hire.female ) t$AVE.COMP <- dollarize( t$AVE.COMP ) # 12 MAJOR NTEE CATEGORIES (ntmaj12) # Expands 10 Major NTEE categories into 12 categories, with Higher Education (B4 & B5) separate from other education organizations (B), and Hospitals (E2) separate from other health organizations (E). # NTEE Code Description # A AR Arts, culture, and humanities # B4,B5 BH Higher education # B (other than B4,B5) ED Education (other) # C,D EN Environment # E2 EH Hospitals # E (other than E2),F,G,H HE Health (other) # I,J,K,L,M,N,O,P HU Human services # Q IN International # R,S,T,U,V,W PU Public and societal benefit # X RE Religion # Y MU Mutual benefit # Z UN Unknown t$ntmaj12[ t$ntmaj12 == "AR" ] <- "ARTS" t$ntmaj12[ t$ntmaj12 == "BH" ] <- "HIGHER ED" t$ntmaj12[ t$ntmaj12 == "ED" ] <- "EDUCATION" t$ntmaj12[ t$ntmaj12 == "EN" ] <- "ENVIRONMENT" t$ntmaj12[ t$ntmaj12 == "EH" ] <- "HOSPITALS" t$ntmaj12[ t$ntmaj12 == "HE" ] <- "HEALTH" t$ntmaj12[ t$ntmaj12 == "HU" ] <- "HUMAN SERVICES" t$ntmaj12[ t$ntmaj12 == "IN" ] <- "INTERNATIONAL" t$ntmaj12[ t$ntmaj12 == "PU" ] <- "PUBLIC BENEFIT" t$ntmaj12[ t$ntmaj12 == "RE" ] <- "RELIGION" t$ntmaj12[ t$ntmaj12 == "MU" ] <- "ASSOCIATIONS" t %>% pander() write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F ) summary( dig$TOTCOMP2_adj2020 ) head( dig ) apply_paper_sample <- function( d ) { d <- d %>% filter( type %in% c("M-M","M-F","F-M","F-F") ) %>% filter( ! ( is.na( ntmaj12 )| ntmaj12 == "UN" ) ) %>% group_by( ein ) %>% mutate( min.comp = min( TOTCOMP2_adj2020 ) ) %>% filter( min.comp > 80000 ) %>% ungroup() d <- as.data.frame(d) return( d ) } dig <- apply_paper_sample( dig ) dollarize <- function(x) { paste0("$", format( round( x, 0 ), big.mark="," ) ) } d.baseline <- filter( dig, pre1 == 1 & type %in% c("F-F","M-F","M-M","F-M") ) d.baseline <- d.baseline %>% group_by( ntmaj12 ) %>% mutate( totN=n() ) %>% ungroup() t <- d.baseline %>% group_by( ntmaj12, gender ) %>% summarize( n=n(), AVE.COMP=round(median( TOTCOMP2_adj2020 ),0), hire.female=round( mean( type == "F-F" | type == "M-F"), 2) ) %>% group_by( ntmaj12 ) %>% mutate( gender.prop=round( n/sum(n), 2 ), PAY.GAP=round( min(AVE.COMP)/max(AVE.COMP), 2) ) %>% dplyr::select( ntmaj12, gender, AVE.COMP, PAY.GAP, n, gender.prop, hire.female ) t$AVE.COMP <- dollarize( t$AVE.COMP ) # 12 MAJOR NTEE CATEGORIES (ntmaj12) # Expands 10 Major NTEE categories into 12 categories, with Higher Education (B4 & B5) separate from other education organizations (B), and Hospitals (E2) separate from other health organizations (E). # NTEE Code Description # A AR Arts, culture, and humanities # B4,B5 BH Higher education # B (other than B4,B5) ED Education (other) # C,D EN Environment # E2 EH Hospitals # E (other than E2),F,G,H HE Health (other) # I,J,K,L,M,N,O,P HU Human services # Q IN International # R,S,T,U,V,W PU Public and societal benefit # X RE Religion # Y MU Mutual benefit # Z UN Unknown t$ntmaj12[ t$ntmaj12 == "AR" ] <- "ARTS" t$ntmaj12[ t$ntmaj12 == "BH" ] <- "HIGHER ED" t$ntmaj12[ t$ntmaj12 == "ED" ] <- "EDUCATION" t$ntmaj12[ t$ntmaj12 == "EN" ] <- "ENVIRONMENT" t$ntmaj12[ t$ntmaj12 == "EH" ] <- "HOSPITALS" t$ntmaj12[ t$ntmaj12 == "HE" ] <- "HEALTH" t$ntmaj12[ t$ntmaj12 == "HU" ] <- "HUMAN SERVICES" t$ntmaj12[ t$ntmaj12 == "IN" ] <- "INTERNATIONAL" t$ntmaj12[ t$ntmaj12 == "PU" ] <- "PUBLIC BENEFIT" t$ntmaj12[ t$ntmaj12 == "RE" ] <- "RELIGION" t$ntmaj12[ t$ntmaj12 == "MU" ] <- "ASSOCIATIONS" t %>% pander() write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F ) URL <- "https://raw.githubusercontent.com/DS4PS/cpp-527-fall-2020/master/labs/data/medium-data-utf8-v2.csv" d <- read.csv( URL ) head( d ) head( d$title, 10 ) names( d ) head( d ) title <- d$title head( title, 10 ) nrow( d ) grep( "Why Do", title, value=TRUE ) args( grep ) grep( "Why Do", title, value=TRUE, ignore.case=TRUE ) grep( "^Why Do", title, value=TRUE, ignore.case=TRUE ) x1 <- c(1,1,1,1,20) x2 <- c(5,5,5,5,5) mean( x1 ) mean( x2 ) x1 <- c(1,1,1,1,50) x2 <- c(5,5,5,5,5) mean( x1 ) mean( x2 ) x1 <- c(1,1,1,1,50) x2 <- c(5,5,5,5,5) mean( x1 ) mean( x2 ) log.x1 <- log(x1) log.x2 <- log(x2) mean( log.x1 ) mean( log.x2 ) exp( mean( log.x1 ) ) exp( mean( log.x2 ) ) i <- 1 d$title <- tolower( d$title ) head( d$title ) ccv <- d$title[i] ccv word.list <- strsplit( ccv, " " ) # split title x into words word.list word.vector <- unlist( word.list ) # unlist results word.vector word.vector[1] results <- NULL for( i in 1:length(d$title) ) { ### i <- 1 d$title <- tolower( d$title ) head( d$title ) ccv <- d$title[i] ccv word.list <- strsplit( ccv, " " ) # split title x into words word.list word.vector <- unlist( word.list ) # unlist results word.vector word.vector[1] results[i] <- word.vector[1] ### results[i] <- split(word.vector, " ")[[i]][i] ## i <- i+1 ## you don't need counters in for loops ## return(results) ## you don't return from a loop } ### i <- 1 d$title <- tolower( d$title ) head( d$title ) ccv <- d$title[i] ccv word.list <- strsplit( ccv, " " ) # split title x into words word.list word.vector <- unlist( word.list ) # unlist results word.vector word.vector[1] results[i] <- word.vector[1] ### strings <- c("abcd", "cdab", "cabd", "c abd", "da", "dab" ) grep( "ab", strings, value=TRUE ) grep( "da", strings, value=TRUE ) strings <- c("abcd", "cdab", "cabd", "c abd", "da", "dab", "bada" ) grep( "da", strings, value=TRUE ) grep( "da", strings, value=TRUE ) grep( "^da", strings, value=TRUE ) grep( "da$", strings, value=TRUE ) grep( "^da$", strings, value=TRUE ) strings <- c("abcd", "cdab", "cabd", "c abd", "da", "ad", "dab", "bada" ) grep( "^da$", strings, value=TRUE ) grep( "^[da]$", strings, value=TRUE ) grep( "^[d|a]$", strings, value=TRUE ) grep( "[da]", strings, value=TRUE ) grep( "^[a-d]$", strings, value=TRUE ) grep( "^[a-d]+$", strings, value=TRUE ) grep( "^[ad]+$", strings, value=TRUE ) grep( "^[ad][ad]$", strings, value=TRUE ) grep( "^[ad][ad]", strings, value=TRUE ) grep( "^[ad]+$", strings, value=TRUE )strings <- c("abcd", "cdab", "cabd", "c abd", "da", "ad", "dab", "bada", "dada" ) strings <- c("abcd", "cdab", "cabd", "c abd", "da", "ad", "dab", "bada", "dada" ) grep( "^[ad][ad]", strings, value=TRUE ) grep( "^[ad][ad]$", strings, value=TRUE ) grep( "^[ad]+$", strings, value=TRUE ) grep( "^[ad]{2}$", strings, value=TRUE ) grep( "^[ad]{3}", strings, value=TRUE ) grep( "^[ad]{2}.{1}$", strings, value=TRUE ) grep( "^[ad]{2}.{2}$", strings, value=TRUE ) strings <- c("dead","dad","deed") grep( "dead", strings, value=TRUE ) grep( "[dea]+", strings, value=TRUE ) strings <- c("Dead","dad","deed") grep( "dead", strings, value=TRUE ) "d" == "d" "d" == "D" grep( "dead", strings, value=TRUE, ignore.case=TRUE ) grep( "de.d", strings, value=TRUE ) strings <- c("dead","dad","deed") grep( "de.d", strings, value=TRUE ) grep( "de*d", strings, value=TRUE ) grep( "d*d", strings, value=TRUE ) grep( "da*d", strings, value=TRUE ) grep( "dea*d", strings, value=TRUE ) grep( "de?ad", strings, value=TRUE ) strings <- c("ht","hot","hoot","hooot") grep("h*t", strings, value = TRUE) grep("h.t", strings, value = TRUE) grep("ho+t", strings, value = TRUE) grep("ho?t", strings, value = TRUE) grep("ho{1,2}t", strings, value = TRUE) grep("ho{2,}t", strings, value = TRUE) grep( "dea*d", strings, value=TRUE ) strings <- c("dead","dad","deed") grep( "dea*d", strings, value=TRUE ) grep( "de.d", strings, value=TRUE ) strings <- c("ded", "dead","dad","deed") grep( "de.d", strings, value=TRUE ) strings <- c("ded", "dead","dad","deed", "deaad" ) grep( "dea?d", strings, value=TRUE ) grep( "dea*d", strings, value=TRUE ) strings <- c("ded", "dead","dad","deed", "deaad", "deeed" ) grep( "de.{1,}d", strings, value=TRUE ) grep( "de*d", strings, value=TRUE ) grep( "dea*d", strings, value=TRUE ) grep( "dea{1,}d", strings, value=TRUE ) grep( "dea{0,}d", strings, value=TRUE ) grep( "dea?d", strings, value=TRUE ) grep( "dea{0,1}d", strings, value=TRUE ) grep( "^dee|ad$", strings, value=TRUE ) grep( "^de[e|a]d$", strings, value=TRUE ) grep( "^de[ea]d$", strings, value=TRUE ) grep( "^de|ad$", strings, value=TRUE ) grep( "ded|dad", strings, value=TRUE ) days <- c("monday","tuesday","wednesday") paste( days, sep="|" ) paste( days, sep="//|" ) paste( days, "|" ) paste( days, "|", sep="" ) paste( days, collapse="|" ) paste( days, collapse="|" ) gsub( "land", "LAND", c("finland", "iceland", "michael landon") ) gsub( "land$", "LAND", c("finland", "iceland", "michael landon") ) strings grepl( "dea{0,1}d", strings ) grepl( "^de|ad$", strings ) grepl( "^da*d$", strings ) results1 <- grepl( "dea{0,1}d", strings ) results2 <- grepl( "^da*d$", strings ) results1 + results2 cbind( results1, results2 ) results1 + results2 x1 <- c(TRUE,FALSE,FALSE) x2 <- c(TRUE,FALSE,TRUE) x1 + x2 x1 x2 x1 | x2 ( x1 + x2 ) > 0 x1 & x2 args( grep ) help( grep )