d.baseline %>%
group_by( ntmaj12 ) %>%
mutate( totN=n() ) %>%
ungroup()
t <-
d.baseline %>%
group_by( ntmaj12, gender ) %>%
summarize( n=n(), AVE.COMP=round(median(totcomp),0),
hire.female=round( mean( type == "F-F" | type == "M-F"), 2) ) %>%
group_by( ntmaj12 ) %>%
mutate( gender.prop=round( n/sum(n), 2 ), PAY.GAP=round( min(AVE.COMP)/max(AVE.COMP), 2) ) %>%
dplyr::select( ntmaj12, gender, AVE.COMP, PAY.GAP, n, gender.prop, hire.female )
t$AVE.COMP <- dollarize( t$AVE.COMP )
# 12 MAJOR NTEE CATEGORIES (ntmaj12)
# Expands 10 Major NTEE categories into 12 categories, with Higher Education (B4 & B5) separate from other education organizations (B), and Hospitals (E2) separate from other health organizations (E).
# NTEE    Code    Description
# A                       AR  Arts, culture, and humanities
# B4,B5                   BH  Higher education
# B (other than B4,B5)    ED  Education (other)
# C,D                     EN  Environment
# E2                      EH  Hospitals
# E (other than E2),F,G,H HE  Health (other)
# I,J,K,L,M,N,O,P         HU  Human services
# Q                       IN  International
# R,S,T,U,V,W             PU  Public and societal benefit
# X                       RE  Religion
# Y                       MU  Mutual benefit
# Z                       UN  Unknown
t$NTMAJ12[ t$ntmaj12 == "AR" ] <- "ARTS"
t$NTMAJ12[ t$ntmaj12 == "BH" ] <- "HIGHER ED"
t$NTMAJ12[ t$ntmaj12 == "ED" ] <- "EDUCATION"
t$NTMAJ12[ t$ntmaj12 == "EN" ] <- "ENVIRONMENT"
t$NTMAJ12[ t$ntmaj12 == "EH" ] <- "HOSPITALS"
t$NTMAJ12[ t$ntmaj12 == "HE" ] <- "HEALTH"
t$NTMAJ12[ t$ntmaj12 == "HU" ] <- "HUMAN SERVICES"
t$NTMAJ12[ t$ntmaj12 == "IN" ] <- "INTERNATIONAL"
t$NTMAJ12[ t$ntmaj12 == "PU" ] <- "PUBLIC BENEFIT"
t$NTMAJ12[ t$ntmaj12 == "RE" ] <- "RELIGION"
t$NTMAJ12[ t$ntmaj12 == "MU" ] <- "ASSOCIATIONS"
t %>% pander()
write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F )
dollarize <- function(x)
{ paste0("$", format( round( x, 0 ), big.mark="," ) ) }
d.baseline <- filter( dig, pre1 == 1  )
d.baseline <-
d.baseline %>%
group_by( ntmaj12 ) %>%
mutate( totN=n() ) %>%
ungroup()
t <-
d.baseline %>%
group_by( ntmaj12, gender ) %>%
summarize( n=n(), AVE.COMP=round(median(totcomp),0),
hire.female=round( mean( type == "F-F" | type == "M-F"), 2) ) %>%
group_by( ntmaj12 ) %>%
mutate( gender.prop=round( n/sum(n), 2 ), PAY.GAP=round( min(AVE.COMP)/max(AVE.COMP), 2) ) %>%
dplyr::select( ntmaj12, gender, AVE.COMP, PAY.GAP, n, gender.prop, hire.female )
t$AVE.COMP <- dollarize( t$AVE.COMP )
# 12 MAJOR NTEE CATEGORIES (ntmaj12)
# Expands 10 Major NTEE categories into 12 categories, with Higher Education (B4 & B5) separate from other education organizations (B), and Hospitals (E2) separate from other health organizations (E).
# NTEE    Code    Description
# A                       AR  Arts, culture, and humanities
# B4,B5                   BH  Higher education
# B (other than B4,B5)    ED  Education (other)
# C,D                     EN  Environment
# E2                      EH  Hospitals
# E (other than E2),F,G,H HE  Health (other)
# I,J,K,L,M,N,O,P         HU  Human services
# Q                       IN  International
# R,S,T,U,V,W             PU  Public and societal benefit
# X                       RE  Religion
# Y                       MU  Mutual benefit
# Z                       UN  Unknown
t$ntmaj12[ t$ntmaj12 == "AR" ] <- "ARTS"
t$ntmaj12[ t$ntmaj12 == "BH" ] <- "HIGHER ED"
t$ntmaj12[ t$ntmaj12 == "ED" ] <- "EDUCATION"
t$ntmaj12[ t$ntmaj12 == "EN" ] <- "ENVIRONMENT"
t$ntmaj12[ t$ntmaj12 == "EH" ] <- "HOSPITALS"
t$ntmaj12[ t$ntmaj12 == "HE" ] <- "HEALTH"
t$ntmaj12[ t$ntmaj12 == "HU" ] <- "HUMAN SERVICES"
t$ntmaj12[ t$ntmaj12 == "IN" ] <- "INTERNATIONAL"
t$ntmaj12[ t$ntmaj12 == "PU" ] <- "PUBLIC BENEFIT"
t$ntmaj12[ t$ntmaj12 == "RE" ] <- "RELIGION"
t$ntmaj12[ t$ntmaj12 == "MU" ] <- "ASSOCIATIONS"
t %>% pander()
write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F )
dig <- read.dta13( "DigitizedDataCEO&CFO/CEO_Digitized.dta" )
dput( sort(names(dig)))
head( dig )
dollarize <- function(x)
{ paste0("$", format( round( x, 0 ), big.mark="," ) ) }
d.baseline <- filter( dig, pre1 == 1  )
d.baseline <-
d.baseline %>%
group_by( ntmaj12 ) %>%
mutate( totN=n() ) %>%
ungroup()
t <-
d.baseline %>%
group_by( ntmaj12, gender ) %>%
summarize( n=n(), AVE.COMP=round(median( TOTCOMP2_adj2020 ),0),
hire.female=round( mean( type == "F-F" | type == "M-F"), 2) ) %>%
group_by( ntmaj12 ) %>%
mutate( gender.prop=round( n/sum(n), 2 ), PAY.GAP=round( min(AVE.COMP)/max(AVE.COMP), 2) ) %>%
dplyr::select( ntmaj12, gender, AVE.COMP, PAY.GAP, n, gender.prop, hire.female )
t$AVE.COMP <- dollarize( t$AVE.COMP )
# 12 MAJOR NTEE CATEGORIES (ntmaj12)
# Expands 10 Major NTEE categories into 12 categories, with Higher Education (B4 & B5) separate from other education organizations (B), and Hospitals (E2) separate from other health organizations (E).
# NTEE    Code    Description
# A                       AR  Arts, culture, and humanities
# B4,B5                   BH  Higher education
# B (other than B4,B5)    ED  Education (other)
# C,D                     EN  Environment
# E2                      EH  Hospitals
# E (other than E2),F,G,H HE  Health (other)
# I,J,K,L,M,N,O,P         HU  Human services
# Q                       IN  International
# R,S,T,U,V,W             PU  Public and societal benefit
# X                       RE  Religion
# Y                       MU  Mutual benefit
# Z                       UN  Unknown
t$ntmaj12[ t$ntmaj12 == "AR" ] <- "ARTS"
t$ntmaj12[ t$ntmaj12 == "BH" ] <- "HIGHER ED"
t$ntmaj12[ t$ntmaj12 == "ED" ] <- "EDUCATION"
t$ntmaj12[ t$ntmaj12 == "EN" ] <- "ENVIRONMENT"
t$ntmaj12[ t$ntmaj12 == "EH" ] <- "HOSPITALS"
t$ntmaj12[ t$ntmaj12 == "HE" ] <- "HEALTH"
t$ntmaj12[ t$ntmaj12 == "HU" ] <- "HUMAN SERVICES"
t$ntmaj12[ t$ntmaj12 == "IN" ] <- "INTERNATIONAL"
t$ntmaj12[ t$ntmaj12 == "PU" ] <- "PUBLIC BENEFIT"
t$ntmaj12[ t$ntmaj12 == "RE" ] <- "RELIGION"
t$ntmaj12[ t$ntmaj12 == "MU" ] <- "ASSOCIATIONS"
t %>% pander()
write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F )
write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F )
t %>% pander()
dollarize <- function(x)
{ paste0("$", format( round( x, 0 ), big.mark="," ) ) }
d.baseline <- filter( dig, pre1 == 1 & ( type == "F-F" | type == "M-F" )  )
d.baseline <-
d.baseline %>%
group_by( ntmaj12 ) %>%
mutate( totN=n() ) %>%
ungroup()
t <-
d.baseline %>%
group_by( ntmaj12, gender ) %>%
summarize( n=n(), AVE.COMP=round(median( TOTCOMP2_adj2020 ),0),
hire.female=round( mean( type == "F-F" | type == "M-F"), 2) ) %>%
group_by( ntmaj12 ) %>%
mutate( gender.prop=round( n/sum(n), 2 ), PAY.GAP=round( min(AVE.COMP)/max(AVE.COMP), 2) ) %>%
dplyr::select( ntmaj12, gender, AVE.COMP, PAY.GAP, n, gender.prop, hire.female )
t$AVE.COMP <- dollarize( t$AVE.COMP )
# 12 MAJOR NTEE CATEGORIES (ntmaj12)
# Expands 10 Major NTEE categories into 12 categories, with Higher Education (B4 & B5) separate from other education organizations (B), and Hospitals (E2) separate from other health organizations (E).
# NTEE    Code    Description
# A                       AR  Arts, culture, and humanities
# B4,B5                   BH  Higher education
# B (other than B4,B5)    ED  Education (other)
# C,D                     EN  Environment
# E2                      EH  Hospitals
# E (other than E2),F,G,H HE  Health (other)
# I,J,K,L,M,N,O,P         HU  Human services
# Q                       IN  International
# R,S,T,U,V,W             PU  Public and societal benefit
# X                       RE  Religion
# Y                       MU  Mutual benefit
# Z                       UN  Unknown
t$ntmaj12[ t$ntmaj12 == "AR" ] <- "ARTS"
t$ntmaj12[ t$ntmaj12 == "BH" ] <- "HIGHER ED"
t$ntmaj12[ t$ntmaj12 == "ED" ] <- "EDUCATION"
t$ntmaj12[ t$ntmaj12 == "EN" ] <- "ENVIRONMENT"
t$ntmaj12[ t$ntmaj12 == "EH" ] <- "HOSPITALS"
t$ntmaj12[ t$ntmaj12 == "HE" ] <- "HEALTH"
t$ntmaj12[ t$ntmaj12 == "HU" ] <- "HUMAN SERVICES"
t$ntmaj12[ t$ntmaj12 == "IN" ] <- "INTERNATIONAL"
t$ntmaj12[ t$ntmaj12 == "PU" ] <- "PUBLIC BENEFIT"
t$ntmaj12[ t$ntmaj12 == "RE" ] <- "RELIGION"
t$ntmaj12[ t$ntmaj12 == "MU" ] <- "ASSOCIATIONS"
t %>% pander()
write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F )
unique( dig$type )
dollarize <- function(x)
{ paste0("$", format( round( x, 0 ), big.mark="," ) ) }
d.baseline <- filter( dig, pre1 == 1 & type %in% c("F-F","M-F","M-M","F-M")   )
d.baseline <-
d.baseline %>%
group_by( ntmaj12 ) %>%
mutate( totN=n() ) %>%
ungroup()
t <-
d.baseline %>%
group_by( ntmaj12, gender ) %>%
summarize( n=n(), AVE.COMP=round(median( TOTCOMP2_adj2020 ),0),
hire.female=round( mean( type == "F-F" | type == "M-F"), 2) ) %>%
group_by( ntmaj12 ) %>%
mutate( gender.prop=round( n/sum(n), 2 ), PAY.GAP=round( min(AVE.COMP)/max(AVE.COMP), 2) ) %>%
dplyr::select( ntmaj12, gender, AVE.COMP, PAY.GAP, n, gender.prop, hire.female )
t$AVE.COMP <- dollarize( t$AVE.COMP )
# 12 MAJOR NTEE CATEGORIES (ntmaj12)
# Expands 10 Major NTEE categories into 12 categories, with Higher Education (B4 & B5) separate from other education organizations (B), and Hospitals (E2) separate from other health organizations (E).
# NTEE    Code    Description
# A                       AR  Arts, culture, and humanities
# B4,B5                   BH  Higher education
# B (other than B4,B5)    ED  Education (other)
# C,D                     EN  Environment
# E2                      EH  Hospitals
# E (other than E2),F,G,H HE  Health (other)
# I,J,K,L,M,N,O,P         HU  Human services
# Q                       IN  International
# R,S,T,U,V,W             PU  Public and societal benefit
# X                       RE  Religion
# Y                       MU  Mutual benefit
# Z                       UN  Unknown
t$ntmaj12[ t$ntmaj12 == "AR" ] <- "ARTS"
t$ntmaj12[ t$ntmaj12 == "BH" ] <- "HIGHER ED"
t$ntmaj12[ t$ntmaj12 == "ED" ] <- "EDUCATION"
t$ntmaj12[ t$ntmaj12 == "EN" ] <- "ENVIRONMENT"
t$ntmaj12[ t$ntmaj12 == "EH" ] <- "HOSPITALS"
t$ntmaj12[ t$ntmaj12 == "HE" ] <- "HEALTH"
t$ntmaj12[ t$ntmaj12 == "HU" ] <- "HUMAN SERVICES"
t$ntmaj12[ t$ntmaj12 == "IN" ] <- "INTERNATIONAL"
t$ntmaj12[ t$ntmaj12 == "PU" ] <- "PUBLIC BENEFIT"
t$ntmaj12[ t$ntmaj12 == "RE" ] <- "RELIGION"
t$ntmaj12[ t$ntmaj12 == "MU" ] <- "ASSOCIATIONS"
t %>% pander()
write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F )
dollarize <- function(x)
{ paste0("$", format( round( x, 0 ), big.mark="," ) ) }
d.baseline <- filter( dig, pre1 == 1 & type %in% c("F-F","M-F","M-M","F-M")   )
d.baseline <-
d.baseline %>%
group_by( ntmaj12 ) %>%
mutate( totN=n() ) %>%
ungroup()
t <-
d.baseline %>%
group_by( ntmaj12, gender ) %>%
summarize( n=n(), AVE.COMP=round(median( TOTCOMP2_adj2020 ),0),
hire.female=round( mean( type == "F-F" | type == "M-F"), 2) ) %>%
group_by( ntmaj12 ) %>%
mutate( gender.prop=round( n/sum(n), 2 ), PAY.GAP=round( min(AVE.COMP)/max(AVE.COMP), 2) ) %>%
dplyr::select( ntmaj12, gender, AVE.COMP, PAY.GAP, n, gender.prop, hire.female )
t$AVE.COMP <- dollarize( t$AVE.COMP )
# 12 MAJOR NTEE CATEGORIES (ntmaj12)
# Expands 10 Major NTEE categories into 12 categories, with Higher Education (B4 & B5) separate from other education organizations (B), and Hospitals (E2) separate from other health organizations (E).
# NTEE    Code    Description
# A                       AR  Arts, culture, and humanities
# B4,B5                   BH  Higher education
# B (other than B4,B5)    ED  Education (other)
# C,D                     EN  Environment
# E2                      EH  Hospitals
# E (other than E2),F,G,H HE  Health (other)
# I,J,K,L,M,N,O,P         HU  Human services
# Q                       IN  International
# R,S,T,U,V,W             PU  Public and societal benefit
# X                       RE  Religion
# Y                       MU  Mutual benefit
# Z                       UN  Unknown
t$ntmaj12[ t$ntmaj12 == "AR" ] <- "ARTS"
t$ntmaj12[ t$ntmaj12 == "BH" ] <- "HIGHER ED"
t$ntmaj12[ t$ntmaj12 == "ED" ] <- "EDUCATION"
t$ntmaj12[ t$ntmaj12 == "EN" ] <- "ENVIRONMENT"
t$ntmaj12[ t$ntmaj12 == "EH" ] <- "HOSPITALS"
t$ntmaj12[ t$ntmaj12 == "HE" ] <- "HEALTH"
t$ntmaj12[ t$ntmaj12 == "HU" ] <- "HUMAN SERVICES"
t$ntmaj12[ t$ntmaj12 == "IN" ] <- "INTERNATIONAL"
t$ntmaj12[ t$ntmaj12 == "PU" ] <- "PUBLIC BENEFIT"
t$ntmaj12[ t$ntmaj12 == "RE" ] <- "RELIGION"
t$ntmaj12[ t$ntmaj12 == "MU" ] <- "ASSOCIATIONS"
t %>% pander()
write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F )
summary( dig$TOTCOMP2_adj2020 )
head( dig )
apply_paper_sample <- function( d )
{
d <-
d %>%
filter( type %in% c("M-M","M-F","F-M","F-F") ) %>%
filter( ! ( is.na( ntmaj12 )| ntmaj12 == "UN" ) ) %>%
group_by( ein ) %>%
mutate( min.comp = min( TOTCOMP2_adj2020 ) ) %>%
filter( min.comp > 80000 ) %>%
ungroup()
d <- as.data.frame(d)
return( d )
}
dig <- apply_paper_sample( dig )
dollarize <- function(x)
{ paste0("$", format( round( x, 0 ), big.mark="," ) ) }
d.baseline <- filter( dig, pre1 == 1 & type %in% c("F-F","M-F","M-M","F-M")   )
d.baseline <-
d.baseline %>%
group_by( ntmaj12 ) %>%
mutate( totN=n() ) %>%
ungroup()
t <-
d.baseline %>%
group_by( ntmaj12, gender ) %>%
summarize( n=n(), AVE.COMP=round(median( TOTCOMP2_adj2020 ),0),
hire.female=round( mean( type == "F-F" | type == "M-F"), 2) ) %>%
group_by( ntmaj12 ) %>%
mutate( gender.prop=round( n/sum(n), 2 ), PAY.GAP=round( min(AVE.COMP)/max(AVE.COMP), 2) ) %>%
dplyr::select( ntmaj12, gender, AVE.COMP, PAY.GAP, n, gender.prop, hire.female )
t$AVE.COMP <- dollarize( t$AVE.COMP )
# 12 MAJOR NTEE CATEGORIES (ntmaj12)
# Expands 10 Major NTEE categories into 12 categories, with Higher Education (B4 & B5) separate from other education organizations (B), and Hospitals (E2) separate from other health organizations (E).
# NTEE    Code    Description
# A                       AR  Arts, culture, and humanities
# B4,B5                   BH  Higher education
# B (other than B4,B5)    ED  Education (other)
# C,D                     EN  Environment
# E2                      EH  Hospitals
# E (other than E2),F,G,H HE  Health (other)
# I,J,K,L,M,N,O,P         HU  Human services
# Q                       IN  International
# R,S,T,U,V,W             PU  Public and societal benefit
# X                       RE  Religion
# Y                       MU  Mutual benefit
# Z                       UN  Unknown
t$ntmaj12[ t$ntmaj12 == "AR" ] <- "ARTS"
t$ntmaj12[ t$ntmaj12 == "BH" ] <- "HIGHER ED"
t$ntmaj12[ t$ntmaj12 == "ED" ] <- "EDUCATION"
t$ntmaj12[ t$ntmaj12 == "EN" ] <- "ENVIRONMENT"
t$ntmaj12[ t$ntmaj12 == "EH" ] <- "HOSPITALS"
t$ntmaj12[ t$ntmaj12 == "HE" ] <- "HEALTH"
t$ntmaj12[ t$ntmaj12 == "HU" ] <- "HUMAN SERVICES"
t$ntmaj12[ t$ntmaj12 == "IN" ] <- "INTERNATIONAL"
t$ntmaj12[ t$ntmaj12 == "PU" ] <- "PUBLIC BENEFIT"
t$ntmaj12[ t$ntmaj12 == "RE" ] <- "RELIGION"
t$ntmaj12[ t$ntmaj12 == "MU" ] <- "ASSOCIATIONS"
t %>% pander()
write.csv( t, "results/baseline-subsector-differences-digitized-ceo.csv", row.names=F )
URL <- "https://raw.githubusercontent.com/DS4PS/cpp-527-fall-2020/master/labs/data/medium-data-utf8-v2.csv"
d <- read.csv( URL )
head( d )
head( d$title, 10 )
names( d )
head( d )
title <- d$title 
head( title, 10 )
nrow( d )
grep( "Why Do", title, value=TRUE )
args( grep )
grep( "Why Do", title, value=TRUE, ignore.case=TRUE )
grep( "^Why Do", title, value=TRUE, ignore.case=TRUE )
x1 <- c(1,1,1,1,20)
x2 <- c(5,5,5,5,5)
mean( x1 )
mean( x2 )
x1 <- c(1,1,1,1,50)
x2 <- c(5,5,5,5,5)
mean( x1 )
mean( x2 )
x1 <- c(1,1,1,1,50)
x2 <- c(5,5,5,5,5)
mean( x1 )
mean( x2 )
log.x1 <- log(x1)
log.x2 <- log(x2)
mean( log.x1 )
mean( log.x2 )
exp( mean( log.x1 ) )
exp( mean( log.x2 ) )
i <- 1
d$title <- tolower( d$title )
head( d$title )
ccv <- d$title[i]
ccv
word.list <- strsplit( ccv, " " )   # split title x into words
word.list
word.vector <- unlist( word.list )   # unlist results
word.vector
word.vector[1]
results <- NULL
for( i in 1:length(d$title) )
{
###
i <- 1
d$title <- tolower( d$title )
head( d$title )
ccv <- d$title[i]
ccv
word.list <- strsplit( ccv, " " )   # split title x into words
word.list
word.vector <- unlist( word.list )   # unlist results
word.vector
word.vector[1]
results[i] <- word.vector[1]
###
results[i] <- split(word.vector, " ")[[i]][i]
##  i <- i+1         ## you don't need counters in for loops
##  return(results)  ## you don't return from a loop
}
###
i <- 1
d$title <- tolower( d$title )
head( d$title )
ccv <- d$title[i]
ccv
word.list <- strsplit( ccv, " " )   # split title x into words
word.list
word.vector <- unlist( word.list )   # unlist results
word.vector
word.vector[1]
results[i] <- word.vector[1]
###
strings <- c("abcd", "cdab", "cabd", "c abd", "da", "dab" )
grep( "ab", strings, value=TRUE )
grep( "da", strings, value=TRUE )
strings <- c("abcd", "cdab", "cabd", "c abd", "da", "dab", "bada" )
grep( "da", strings, value=TRUE )
grep( "da", strings, value=TRUE )
grep( "^da", strings, value=TRUE )
grep( "da$", strings, value=TRUE )
grep( "^da$", strings, value=TRUE )
strings <- c("abcd", "cdab", "cabd", "c abd", "da", "ad", "dab", "bada" )
grep( "^da$", strings, value=TRUE )
grep( "^[da]$", strings, value=TRUE )
grep( "^[d|a]$", strings, value=TRUE )
grep( "[da]", strings, value=TRUE )
grep( "^[a-d]$", strings, value=TRUE )
grep( "^[a-d]+$", strings, value=TRUE )
grep( "^[ad]+$", strings, value=TRUE )
grep( "^[ad][ad]$", strings, value=TRUE )
grep( "^[ad][ad]", strings, value=TRUE )
grep( "^[ad]+$", strings, value=TRUE )strings <- c("abcd", "cdab", "cabd", 
             "c abd", "da", "ad", "dab", 
             "bada", "dada" )
strings <- c("abcd", "cdab", "cabd", 
             "c abd", "da", "ad", "dab", 
             "bada", "dada" )
grep( "^[ad][ad]", strings, value=TRUE )
grep( "^[ad][ad]$", strings, value=TRUE )
grep( "^[ad]+$", strings, value=TRUE )
grep( "^[ad]{2}$", strings, value=TRUE )
grep( "^[ad]{3}", strings, value=TRUE )
grep( "^[ad]{2}.{1}$", strings, value=TRUE )
grep( "^[ad]{2}.{2}$", strings, value=TRUE )
strings <- c("dead","dad","deed")
grep( "dead", strings, value=TRUE )
grep( "[dea]+", strings, value=TRUE )
strings <- c("Dead","dad","deed")
grep( "dead", strings, value=TRUE )
"d" == "d"
"d" == "D"
grep( "dead", strings, value=TRUE, ignore.case=TRUE )
grep( "de.d", strings, value=TRUE )
strings <- c("dead","dad","deed")
grep( "de.d", strings, value=TRUE )
grep( "de*d", strings, value=TRUE )
grep( "d*d", strings, value=TRUE )
grep( "da*d", strings, value=TRUE )
grep( "dea*d", strings, value=TRUE )
grep( "de?ad", strings, value=TRUE )
strings <- c("ht","hot","hoot","hooot")
grep("h*t", strings, value = TRUE)
grep("h.t", strings, value = TRUE)
grep("ho+t", strings, value = TRUE)
grep("ho?t", strings, value = TRUE)
grep("ho{1,2}t", strings, value = TRUE)
grep("ho{2,}t", strings, value = TRUE)
grep( "dea*d", strings, value=TRUE )
strings <- c("dead","dad","deed")
grep( "dea*d", strings, value=TRUE )
grep( "de.d", strings, value=TRUE )
strings <- c("ded", "dead","dad","deed")
grep( "de.d", strings, value=TRUE )
strings <- c("ded", "dead","dad","deed", "deaad" )
grep( "dea?d", strings, value=TRUE )
grep( "dea*d", strings, value=TRUE )
strings <- c("ded", "dead","dad","deed", "deaad", "deeed" )
grep( "de.{1,}d", strings, value=TRUE )
grep( "de*d", strings, value=TRUE )
grep( "dea*d", strings, value=TRUE )
grep( "dea{1,}d", strings, value=TRUE )
grep( "dea{0,}d", strings, value=TRUE )
grep( "dea?d", strings, value=TRUE )
grep( "dea{0,1}d", strings, value=TRUE )
grep( "^dee|ad$", strings, value=TRUE )
grep( "^de[e|a]d$", strings, value=TRUE )
grep( "^de[ea]d$", strings, value=TRUE )
grep( "^de|ad$", strings, value=TRUE )
grep( "ded|dad", strings, value=TRUE )
days <- c("monday","tuesday","wednesday")
paste( days, sep="|" )
paste( days, sep="//|" )
paste( days, "|" )
paste( days, "|", sep="" )
paste( days, collapse="|" )
paste( days, collapse="|" )
gsub( "land", "LAND", c("finland", "iceland", "michael landon") )
gsub( "land$", "LAND", c("finland", "iceland", "michael landon") )
strings
grepl( "dea{0,1}d", strings )
grepl( "^de|ad$", strings )
grepl( "^da*d$", strings )
results1 <- grepl( "dea{0,1}d", strings )
results2 <- grepl( "^da*d$", strings )
results1 + results2
cbind( results1, results2 )
results1 + results2
x1 <- c(TRUE,FALSE,FALSE)
x2 <- c(TRUE,FALSE,TRUE)
x1 + x2
x1
x2
x1 | x2
( x1 + x2 ) > 0
x1 & x2
args( grep )
help( grep )