---
title: "Accession QA Report"
author: "NOAA-NFMS-OHC-DSCRTP: Robert.McGuinn@NOAA.gov"
date: "Date of report: `r Sys.Date()`"
output: word_document
#prettydoc::html_pretty:
# theme: hpstr
#highlight: github
always_allow_html: yes
---
```{r load_packs, echo=F, message=FALSE, warning=F}
library(tidyverse)
library(knitr)
library(flextable)
library(sp)
library(rgdal)
library(maps)
library(extrafont)
library(googlesheets4)
library(leaflet)
library(RColorBrewer)
#font_import()
# library(marmap)
# library(googlesheets)
# library(googledrive)
# library(openxlsx)
# library(scales)
# library(extrafont)
# library(RColorBrewer)
# library(rgdal)
# library(sp)
# library(marmap)
# library(worrms)
# library(worms)
```
```{r setup, include=FALSE}
opts_chunk$set(fig.width=7,
fig.height=3.5,
dpi = 300,
fig.align = 'left',
message=FALSE,
warning=FALSE)
```
```{r load_NDB, eval=T, echo=F, message=FALSE, warning=FALSE, cache=TRUE}
##### input: latest version of NDB #####
setwd("C:/rworking/deepseatools/indata")
indata <- read_csv("DSCRTP_NatDB_20200710-2.csv")
filt <- indata %>%
filter(Flag == "0", is.na(Phylum) == F)
```
```{r data_and_subset, echo=FALSE, message=FALSE, warning=FALSE}
setwd("C:/rworking/deepseatools/indata")
## x is defined in the runner script
# or you could define it here like this
# x <- "20200701-0_NOAA_NEFSC_Connecticut_ISIS2_Towcam_Packer_2015_2015"
sub <- read.csv(paste(x,'.csv', sep = ''), header = T)
flagged <- sub %>% filter(Flag == "1")
```
``` {r schema, echo=FALSE, cache = FALSE, warning=FALSE, message=FALSE}
##### download Google Sheet version of schema for use in R documents #####
# Register and download Google Sheet using googlesheets4::read_sheet
s <- read_sheet('1YDskzxY8OF-34Q8aI04tZvlRbhGZqBSysuie39kYHoI')
## checking
# s %>% filter(FieldName == 'VernacularNameCategory') %>% pull(ValidValues)
```
```{r mapit, include = F, eval=F}
# sub <- sub %>% filter(grepl('horizontal', FlagReason))
m <- leaflet()
m <- addProviderTiles(m, "Esri.OceanBasemap")
m <- addCircleMarkers(m, data=sub,
radius=5,
weight=0,
fillColor= "red",
fillOpacity=.5,
popup = paste(
"","Flag:","", sub$Flag, "
",
"","Catalog Number:","", sub$CatalogNumber, "
",
"","Record Type:","", sub$RecordType, "
",
"","DatasetID:","", sub$DatasetID, "
",
"","AccessionID:","", sub$AccessionID, "
",
"","DataProvider:","", sub$DataProvider, "
",
"","ObservationYear:","", sub$ObservationYear, "
",
"","Vessel:","", sub$Vessel, "
",
"","Locality:","", sub$Locality, "
",
"","Scientific Name:","", sub$ScientificName, "
",
"","Depth (meters):","", sub$DepthInMeters, "
",
"","Survey ID:","", sub$SurveyID, "
",
"","Event ID:","", sub$EventID, "
",
"","Latitude:","", sub$Latitude, "
",
"","Longitude:","", sub$Longitude, "
",
"","Image:","",sub$ImageURL))
m
```
```{r echo = F}
##### fix implicit NA #####
sub$Family <- forcats::fct_explicit_na(sub$Family)
```
```{r echo = F, cache=T, eval=T}
##### load NDB #####
# setwd("C:/rworking/deepseatools/indata")
# indata<-read.csv("DSCRTP_NatDB_20200408-1.csv", header = T)
# filt <- indata %>%
# filter(Flag == "0")
# rm(indata)
```
# AccessionID:
*`r x`*
# DatasetID: *`r unique(factor(sub$DatasetID))`*
Purpose: `r unique(factor(sub$Purpose))`
SurveyComments: `r unique(factor(sub$SurveyComments))`
* Number of records: *`r length(sub$CatalogNumber)`*
* Number of records flagged: *`r length(flagged$CatalogNumber)`*
* Minimum depth: *`r min(sub[sub$DepthInMeters != "-999",]$DepthInMeters)`*
* Maximum depth: *`r max(sub[sub$DepthInMeters != "-999",]$DepthInMeters)`*
* Number of dives or tows recorded: *`r length(unique(sub[sub$EventID != "NA" | is.na(sub$EventID) == F,]$EventID))`*
* Time frame: *`r min(as.numeric(min(sub$ObservationYear)))` to `r min(as.numeric(max(sub$ObservationYear)))`*
* Number of ImageURL populated: `r sub %>% filter(is.na(ImageURL) == F) %>% pull(ImageURL) %>% length()`
* Number of ImageFilePath Populated: `r sub %>% filter(is.na(ImageFilePath) == F) %>% pull(ImageURL) %>% length()`
# Citation
``` {r Citation, echo=FALSE}
sum_tbl <-
sub %>%
group_by(Citation) %>%
dplyr::summarize(Records=n())
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
# Citation Maker
``` {r CitationMaker, echo = FALSE, eval = F}
sub$CitationMaker <- paste(sub$DataProvider,'. ','Observation date range: ',
min(sub$ObservationYear[sub$ObservationYear != "-999"]),' to ',
max(sub$ObservationYear[sub$ObservationYear != "-999"]),'. ',
'Coral or sponge occurrence observations submitted to the NOAA National Database for Deep Sea Corals and Sponges (www.deepseacoraldata.noaa.gov)', '. ',
#'DSCRTP Accession ID: ',sub$AccessionID, '. ',
#'Record type: ', sub$RecordType, '. ',
#'Vessel(s): ', sub$Vessel,'. ',
#'Sampling vehicle: ', sub$VehicleName,'. ',
#'Survey ID: ', sub$SurveyID,'. ',
'DSCRTP Dataset ID: ', sub$DatasetID, '. ',
'Principal investigator: ', sub$PI,'. ',
'Reporter: ', sub$Reporter, '. ',
'Database version: ', unique(filt$DatabaseVersion), '. ',
#'Data contact: ', sub$DataContact,'. ',
#'Reporter: ', sub$Reporter,'. ',
#'Repository: ', sub$Repository,'. ',
# 'Web site [last accessed on YYYY-MM-DD]: ', sub$WebSite,'.',
sep = '')
unique(sub$CitationMaker)
```
# Web site
``` {r Website, echo=FALSE}
sum_tbl <-
sub %>%
group_by(WebSite) %>%
dplyr::summarize(Records=n())
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
# Data Origin
``` {r SourceSummary, echo=FALSE, warnings = F}
sum_tbl <-
sub %>%
group_by(DatasetID) %>%
dplyr::summarize(
DataProvider = paste(unique(DataProvider), collapse=" | "),
PI = paste(unique(PI), collapse=" | "),
Reporter = paste(unique(Reporter), collapse=" | "),
ReporterEmail = paste(unique(ReporterEmail), collapse=" | "),
DataContact = paste(unique(DataContact), collapse=" | "),
Records = n()
)
ft <- flextable(sum_tbl) %>%
theme_zebra() %>% #booktabs, vanilla, box, tron, tron_legacy, vader
#set_header_labels(Genus_n = "# of Genera", n = "# of Records") %>%
colformat_num(j = 7,
digits = 0,
big.mark = ",",
na_str = "NA") %>%
font(fontname = "Cambria", part = "all") %>% #header, body, all
fontsize(size = 7, part = "all") %>%
align(j = 1:6, align = 'left', part = 'all') %>%
align(j = 7, align = 'right', part = 'all')# %>%
#autofit()
ft
```
# Static Maps
## Locator Map
```{r, map, echo = FALSE, message = FALSE, warning=FALSE, dpi=300, fig.width=8, fig.height=4.1, eval=TRUE, fig.align='center'}
# # ##### Using ggmap libary #####
# #getting rid of missing lat/long data
#library(ggmap)
sub2 <- sub %>%
filter(Latitude != '-999' , Longitude != '-999', Longitude != '-887.22488')
#
# # use this when not crossing dateline
# xpos <- sub2$Longitude
# # also use this additional calculation to transforming longitudes to 0-360 if crossing dateline as in Western AK and HI
# xpos <- ifelse(xpos < 0, (180 + xpos) + 180, xpos)
#
# myLocation <- c(lon = mean(xpos), lat = mean(sub2$Latitude))
# #manually set center longitude if in an area that crosses dateline
# myLocation <- c(lon = 180, lat = mean(sub2$Latitude))
#
# p <- get_map(location=myLocation, maptype = "watercolor", source = 'stamen', crop=FALSE, zoom=2)
# # # put transLong below if
# ggmap(p)+
# geom_point(aes(x = xpos, y = Latitude), data = sub2,
# alpha = .5, color="darkred", size = 3)
#
#
# #> converting bounding box to center/zoom specification. (experimental)
# #> Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=34.75309,-119.751995&zoom=16&size=640x640&scale=2&maptype=satellite&language=en-EN&sensor=false
# ggmap(sq_map) + geom_point(data = sub2, mapping = aes(x = Longitude, y = Latitude), color = "red")
# #> Warning: Removed 3 rows containing missing values (geom_point).
###### this will make a simple maps if you have problems with the method above #####
# library(dplyr)
# library(ggplot2)
gis<-sub2
coordinates(gis) <- c("Longitude", "Latitude", "DepthInMeters")
proj4string(gis) <- "+proj=longlat +ellps=WGS84 +datum=WGS84"
x<-bbox(gis)
m <- map_data("world") #, xlim = c(x[1,1]-1,x[1,2]+1), ylim = c(x[2,1]-1,x[2,2]+1))
z <- ggplot(m, aes(x = long, y = lat, group = group)) +
geom_polygon()
z + geom_point(data=sub2,
color = "red",
size = 2,
aes(x=Longitude, y = Latitude), inherit.aes = FALSE)
```
## Area of Interest Map
```{r mapzoomed, echo=FALSE, message = FALSE, warning=FALSE, dpi=300, fig.width=8, fig.height=4.1, eval=FALSE, fig.align='center'}
##### using marmap and ggplot2 to make a map #####
# create a spatial points data frame
spdf<-sub2
coordinates(spdf) <- c("Longitude", "Latitude", "DepthInMeters")
proj4string(spdf) <- "+proj=longlat +ellps=WGS84 +datum=WGS84"
x<-bbox(spdf)
# install.packages('marmap')
# library(marmap)
zoom <- 2 # as number gets bigger you achieve a wider extent to your download
cont <- getNOAA.bathy(lon1 = x[1,1]-zoom, lon2 = x[1,2]+zoom,
lat1 = x[2,1]-zoom, lat2 = x[2,2]+zoom, resolution = 2,
keep = FALSE, antimeridian = FALSE)
##### if you have linear stuff you want to map use this #####
# begin.coord <- data.frame(lon=sub$StartLongitude, lat=sub$StartLatitude)
# end.coord <- data.frame(lon=sub$EndLongitude, lat=sub$EndLatitude)
#
# l <- vector("list", nrow(begin.coord))
# library(sp)
# for (i in seq_along(l)) {
# l[[i]] <- Lines(list(Line(rbind(begin.coord[i, ], end.coord[i,]))), as.character(i))
# }
# lines<-SpatialLines(l)
# class(lines)
#
# linesdf <- SpatialLinesDataFrame(lines, data = begin.coord)
#
# lines_fortify <- fortify(linesdf)
##### make ggplot based map #####
# topographical color scale, see ?scale_fill_etopo
g <- autoplot(cont, geom=c("raster", "contour")) +
scale_fill_gradient2(low="dodgerblue4", mid="gainsboro", high="darkgreen") +
labs(x = 'Longitude') +
labs(y = 'Latitude')
# add sampling locations
g + geom_point(aes(x=Longitude, y=Latitude), data=sub2, alpha=0.5, color = 'red', size = 2)
```
## 3D Bounding Box
```{r bounding box, echo=FALSE, echo = FALSE, message = FALSE, warning=FALSE, dpi=300, fig.width=8, fig.height=4.1, eval=TRUE, fig.align='center'}
spdf<-sub2
coordinates(spdf) <- c("Longitude", "Latitude", "DepthInMeters")
proj4string(spdf) <- "+proj=longlat +ellps=WGS84 +datum=WGS84"
x<-bbox(spdf)
x
```
# Expedition Details
``` {r source, echo=FALSE}
sum_tbl <-
sub %>%
group_by(SurveyID) %>%
dplyr::summarize(
Vessel = paste(unique(Vessel), collapse=" | "),
VehicleName = paste(unique(VehicleName), collapse=" | "),
SamplingEquipment = paste(unique(SamplingEquipment), collapse=" | "),
NavType = paste(unique(NavType), collapse=" | "),
LocationAccuracy = paste(unique(LocationAccuracy), collapse=" | "),
ObservationDates = paste(unique(ObservationDate), collapse=" | "),
Records = n()
)
colkeys <- c("Records")
ft <- flextable(sum_tbl) %>%
theme_zebra() %>% #booktabs, vanilla, box, tron, tron_legacy, vader
#set_header_labels(Genus_n = "# of Genera", n = "# of Records") %>%
colformat_num(j = 8, digits = 0, big.mark = ",", na_str = "NA") %>%
font(fontname = "Cambria", part = "all") %>% #header, body, all
fontsize(size = 7, part = "all") %>%
#autofit() %>%
width(j = 6, 1.5) %>%
width(j = 7, 1.5)
ft
```
# Taxonomy Details
``` {r TaxDetails, echo=FALSE }
sum_tbl <-
sub %>%
# filter(RecordType == 'specimen') %>%
group_by(IdentifiedBy, RecordType) %>%
dplyr::summarize(
IdentificationQualifier = paste(unique(IdentificationQualifier), collapse=" | "),
#Identification_Date = toString(unique(IdentificationDate)),
Records = n()) %>%
arrange(desc(Records))
ft <- flextable(sum_tbl) %>%
theme_zebra() %>% #booktabs, vanilla, box, tron, tron_legacy, vader
#set_header_labels(Genus_n = "# of Genera", n = "# of Records") %>%
colformat_num(j=4, digits = 0, big.mark = ",", na_str = "NA") %>%
font(fontname = "Cambria", part = "all") %>% #header, body, all
fontsize(size = 7, part = "all")
ft
```
# List of ScientificNames (Coral) - Grouped by Family
``` {r ByScientificName, echo=FALSE }
sum_tbl <-
sub %>%
arrange(ScientificName) %>%
filter(Phylum == 'Cnidaria') %>%
group_by(Family) %>%
dplyr::summarize(
ScientificName = paste(unique(ScientificName), collapse=" | "),
TaxonRank = paste(unique(TaxonRank), collapse=" | "),
UnFlagged = length(Flag[Flag == "0"]),
Flagged = length(Flag[Flag == "1"]),
FlagReason = paste(unique(FlagReason), collapse=" | "),
Records = n())
ft <- flextable(sum_tbl) %>%
theme_zebra() %>% #booktabs, vanilla, box, tron, tron_legacy, vader
colformat_num(j = 4, digits = 0, big.mark = ",", na_str = "NA") %>%
colformat_num(j = 7, digits = 0, big.mark = ",", na_str = "NA") %>%
font(fontname = "Cambria", part = "all") %>% #header, body, all
fontsize(size = 7, part = "all") %>%
autofit() %>%
width(j = 2, 2)
ft
```
# List of ScientificNames (Sponges) - Grouped by Family
``` {r ByScientificNameSponges, echo=FALSE }
sum_tbl <-
sub %>%
arrange(ScientificName) %>%
filter(Phylum == 'Porifera') %>%
group_by(Family) %>%
dplyr::summarize(
ScientificName = paste(unique(ScientificName), collapse=" | "),
TaxonRank = paste(unique(TaxonRank), collapse=" | "),
UnFlagged = length(Flag[Flag == "0"]),
Flagged = length(Flag[Flag == "1"]),
FlagReason = paste(unique(FlagReason), collapse=" | "),
Records = n())
colkeys <- names(sum_tbl[,-1])
ft <- flextable(sum_tbl) %>%
theme_zebra() %>% #booktabs, vanilla, box, tron, tron_legacy, vader
colformat_num(j = 4, digits = 0, big.mark = ",", na_str = "NA") %>%
colformat_num(j = 7, digits = 0, big.mark = ",", na_str = "NA") %>%
font(fontname = "Cambria", part = "all") %>% #header, body, all
fontsize(size = 7, part = "all") %>%
autofit() %>%
width(j = 2, 2)
ft
```
# Location
## Localities and EventIDs (Dives/Tows/Hauls)
``` {r Locality, echo=FALSE}
sum_tbl <-
sub %>%
arrange(Locality, EventID) %>%
#filter(Phylum == "Porifera") %>%
group_by(Locality) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
EventID = paste(unique(EventID), collapse=" | "),
n = n()) %>%
arrange(desc(n))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## NavType
``` {r NavType, echo=FALSE}
sum_tbl <-
sub %>%
#filter(Phylum == "Porifera") %>%
group_by(NavType) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(n))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## LocationAccuracy
``` {r LocationAccuracy, echo=FALSE}
sum_tbl <-
sub %>%
#filter(Phylum == "Porifera") %>%
group_by(LocationAccuracy) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(n))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## Oceans
``` {r Ocean, echo=FALSE}
sum_tbl <-
sub %>%
#filter(Phylum == "Porifera") %>%
group_by(Ocean) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(n))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## Countries
``` {r Country, echo=FALSE}
sum_tbl <-
sub %>%
#filter(Phylum == "Porifera") %>%
group_by(Country) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(n))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## FishCouncilRegion(s)
``` {r FishCouncilRegions, echo=FALSE}
sum_tbl <-
sub %>%
#filter(Phylum == "Porifera") %>%
group_by(FishCouncilRegion) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(n))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## Marine Eco-Regions of the World
``` {r MEOW, echo=FALSE}
sum_tbl <-
sub %>%
#filter(Phylum == "Porifera") %>%
group_by(gisMEOW) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(n))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## Large Marine Ecosystem
``` {r LME, echo=FALSE}
sum_tbl <-
sub %>%
#filter(Phylum == "Porifera") %>%
group_by(gisLME) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(n))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
# Size, Abundance, and Condition
## Individual Count Summary
Nothing will appear here if no 'IndividualCount' values are present
``` {r IndividualCount, echo=FALSE}
sub2 <- sub %>%
filter(
# is.na(Temperature) == F|
as.character(IndividualCount) != '-999'
# as.character(Temperature) != 'NA' |
# as.numeric(Temperature) > 0
)
options(scipen=10000)
g <- ggplot(sub2, aes(IndividualCount,
#fill = Phylum
)) +
geom_histogram(binwidth = 1) +
ylab("Number of Records") +
theme_bw(base_size = 15, base_family = "Cambria")
#display.brewer.all(colorblindFriendly=TRUE)
g + scale_fill_manual(values = brewer.pal(12, "Paired")[c(10,9)])
```
### IndividualCount
`r options(digits = 3)`
* min: `r min(sub2$IndividualCount, digits = 3)`
* median: `r median(sub2$IndividualCount, digits = 3)`
* max: `r max(sub2$IndividualCount)`
`r options(digits = 5)`
## Categorical Abundance Values
Nothing will appear here if no 'CategoricalAbundance' values are present
``` {r CatAbundance, echo=FALSE}
sum_tbl <-
sub %>%
#filter(Phylum == "Porifera") %>%
group_by(CategoricalAbundance) %>%
dplyr::summarize(
n = n()) %>%
arrange(desc(CategoricalAbundance))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## Density Values (per square meter)
Nothing will appear here if 'Density' values are not present
``` {r Density, echo=FALSE}
sub %>% filter(Density != -999) %>% ggplot(aes(x=Density)) +
geom_histogram(binwidth = .2) +
ylab("Number of Records") +
theme_bw(base_size = 15, base_family = "Cambria")
```
## Cover Values (%)
Arranged in descending order of median 'Cover' values. Nothing will appear here if no 'Cover' values are present
``` {r PercentCover, echo=FALSE, message=F, warning = F}
sum_tbl <-
sub %>%
filter(
Cover != "-999" |
is.na(Cover) == F
) %>%
group_by(ScientificName) %>%
dplyr::summarize(
minCover = min(Cover),
maxCover = max(Cover),
medianCover = median(Cover),
n = n()) %>%
arrange(desc(medianCover))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 5)
sum_tbl
```
## Minimum and Maximum Size Values by Genus
This table will be empty if no size values present.
``` {r SizeSummary, echo=FALSE, warning = F}
sum_tbl <-
sub %>%
filter(
VerbatimSize != '-999'
) %>%
group_by(Genus) %>%
dplyr::summarize(
VerbatimSize_values = toString(unique(VerbatimSize)),
min.Size = min(as.numeric(MinimumSize)),
max.Size = max(as.numeric(MaximumSize)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(Genus))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## Weight Values
Table will be empty if no weight values are given.
``` {r WeightInKg, echo=FALSE,warning = F}
sum_tbl <-
sub %>%
filter(WeightInKg != "-999") %>%
group_by(WeightInKg) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(WeightInKg))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## Condition Values
``` {r Condition, echo=FALSE}
sum_tbl <-
sub %>%
#filter(is.na(Condition) == F) %>%
group_by(Condition) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(Condition))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
# Environment
## Habitat and Substrate
``` {r HabSub, echo=FALSE, warning = F}
sum_tbl <-
sub %>%
#filter(Phylum == "Porifera") %>%
group_by(Habitat, Substrate) %>%
dplyr::summarize(
#Species = toString(unique(ScientificName)),
# min.Depth = min(as.numeric(DepthInMeters)),
# mean.Depth = mean(as.numeric(DepthInMeters)),
# max.Depth = max(as.numeric(DepthInMeters)),
n = n()) %>%
arrange(desc(n))
sum_tbl <- sum_tbl %>% filter(n > 100)
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## Temperature, Salinity, Oxygen
Histogram of Temperature Values (Celcius) (empty if no Temperature values)
``` {r TemperatureHisto, echo=FALSE}
# ordering factors
sub2 <- sub %>%
filter(
# is.na(Temperature) == F|
as.character(Temperature) != '-999'
# as.character(Temperature) != 'NA' |
# as.numeric(Temperature) > 0
)
options(scipen=10000)
g <- ggplot(sub2, aes(Temperature,
#fill = Phylum
)) +
geom_histogram(binwidth = .1) +
ylab("Number of Records") +
theme_bw(base_size = 15, base_family = "Cambria")
#display.brewer.all(colorblindFriendly=TRUE)
g + scale_fill_manual(values = brewer.pal(12, "Paired")[c(10,9)])
```
Histogram of Salinity Values (PSU) (empty if no Salinity values)
``` {r SalinityHisto, echo=FALSE}
# ordering factors
sub2 <- sub %>%
filter(
is.na(Salinity) == F,
Salinity != '-999'
)
options(scipen=10000)
g <- ggplot(sub2, aes(Salinity
#fill = Phylum
)) +
geom_histogram(binwidth = .01) +
ylab("Number of Records") +
theme_bw(base_size = 15, base_family = "Cambria")
#display.brewer.all(colorblindFriendly=TRUE)
g + scale_fill_manual(values = brewer.pal(12, "Paired")[c(10,9)])
```
Histogram of Oxygen Values (ml/L) (empty if no Oxygen values)
``` {r OxygenHisto, echo=FALSE}
# ordering factors
sub2 <- sub %>%
filter(
is.na(Oxygen) == F,
Oxygen != '-999'
)
options(scipen=10000)
g <- ggplot(sub2, aes(Oxygen, fill = Phylum)) +
geom_histogram(binwidth = .1) +
ylab("Number of Records") +
theme_bw(base_size = 15, base_family = "Cambria")
#display.brewer.all(colorblindFriendly=TRUE)
g + scale_fill_manual(values = brewer.pal(12, "Paired"))#[c(10,9)])
```
## pH Values
``` {r pH, echo=FALSE}
sum_tbl <- sub %>%
#filter(Phylum == "Porifera") %>%
group_by(pH) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(pH))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## pHscale Values
``` {r pHscale, echo=FALSE}
sum_tbl <- sub %>%
#filter(Phylum == "Porifera") %>%
group_by(pHscale) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(pHscale))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## pCO2 Values
``` {r pHCO2, echo=FALSE}
sum_tbl <- sub %>%
#filter(Phylum == "Porifera") %>%
group_by(pCO2) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(pCO2))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## TA Values
``` {r TA, echo=FALSE}
sum_tbl <- sub %>%
#filter(Phylum == "Porifera") %>%
group_by(TA) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(TA))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## DIC Values
``` {r DIC, echo=FALSE}
sum_tbl <- sub %>%
#filter(Phylum == "Porifera") %>%
group_by(DIC) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(DIC))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
# Temporal
## Observation Time Frame
`r min(as.numeric(sub$ObservationYear[sub$ObservationYear != "-999"]), na.rm=TRUE)` to `r max(sub$ObservationYear, na.rm=TRUE)`
## Observation Dates
Number of ObservationDate values recorded:
`r x <- sub %>%
filter(
is.na(ObservationDate) == F |
ObservationDate != '-999'
) %>%
group_by(DatasetID) %>%
dplyr::summarize(
n = n()
)
sum(x$n)`
ObservationDate values grouped by SurveyID:
`r x <- sub %>%
filter(
is.na(ObservationDate) == F |
ObservationDate != '-999'
) %>%
group_by(SurveyID) %>%
dplyr::summarize(ObservationDates = paste(unique(ObservationDate), collapse = ' | '),
n = n()
)
kable(head(x, n=10), row.names = F, digits = 2)`
## Observation Time
`r x <- sub %>%
filter(
is.na(ObservationTime) == F |
ObservationTime != '-999'
) %>%
group_by(DatasetID) %>%
dplyr::summarize(
n = n()
)`
Number of ObservationTime values recorded: `r sum(x$n)`
View of some of the ObservationTime values:
`r x <- sub %>%
filter(
is.na(ObservationTime) == F |
ObservationTime != '-999'
) %>%
group_by(ObservationTime) %>%
dplyr::summarize(
n = n()
)
kable(head(x, n=10), row.names = F, digits = 2)`
## Modified Dates
``` {r Modified, echo=FALSE}
sum_tbl <-
sub %>%
#filter(Phylum == "Porifera") %>%
group_by(Modified) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(Modified))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## Entry Dates
``` {r EntryDate, echo=FALSE}
sum_tbl <-
sub %>%
#filter(Phylum == "Porifera") %>%
group_by(EntryDate) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(EntryDate))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## Entry Update
``` {r EntryUpdate, echo=FALSE}
sum_tbl <-
sub %>%
#filter(Phylum == "Porifera") %>%
group_by(EntryUpdate) %>%
dplyr::summarize(
#min.Depth = min(as.numeric(MinimumDepthInMeters)),
#max.Depth = max(as.numeric(MinimumDepthInMeters)),
#mean.Depth = mean(as.numeric(MinimumDepthInMeters)),
#mean.DepthGIS = mean(as.numeric(gisDepth)),
n = n()) %>%
arrange(desc(EntryUpdate))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
# Flagged Records
``` {r Flags and CatalogNumbers, echo=FALSE, warning=FALSE}
sum_tbl <-
sub %>%
filter(Flag == "1") %>%
group_by(FlagReason) %>%
dplyr::summarize(
#CatalogNumbers = toString(CatalogNumber),
taxa = paste(unique(ScientificName), collapse=" | "),
n = n()
) %>%
arrange(desc(n))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
# Depth
* Minimum Depth: `r min(sub[sub$DepthInMeters != '-999',]$DepthInMeters)`
* Median Depth: `r median(sub[sub$DepthInMeters != '-999',]$DepthInMeters)`
* Maximum Depth: `r max(sub[sub$DepthInMeters != '-999',]$DepthInMeters)`
## Histogram of Depth Distribution
``` {r Coral_Depth_Summary_Boxplots, echo=FALSE, message=FALSE, warning=FALSE,fig.width=7, fig.height=3.5, dpi=300, cache=FALSE}
sub3 <- sub %>% filter(Flag == '0')
g <- ggplot(sub3, aes(DepthInMeters, fill = Phylum)) +
geom_histogram(alpha = 0.5) +
xlim(0,7000) +
ylab("Number of Records") +
xlab('DepthInMeters') +
scale_x_reverse() +
coord_flip() +
theme_bw(base_size = 15, base_family = "Cambria")
#display.brewer.all(colorblindFriendly=TRUE)
g + scale_fill_manual(values = brewer.pal(12, "Paired")[c(10,9,8,7,6,5,4,3,2,1)])
# library(ggplot2)
# g <- ggplot(sub, aes(DepthInMeters)) +
# geom_histogram(binwidth = 1) +
# theme(text = element_text(size=20)) +
# scale_x_reverse() +
# coord_flip() +
# ylab("Number of Records") +
# xlab("DepthInMeters") +
# theme_bw(base_size = 15, base_family = "Cambria")
#
# g
```
# DepthCheck
``` {r Depth_Check, echo=FALSE, message=FALSE, warning=FALSE,fig.width=7, fig.height=3.5, dpi=300, eval = TRUE, cache=FALSE}
##### setting thresholds for difference and creating flags#####
t <- 1000
z <- 500
percrm <- 30
perother <- 60
##### building selection conditions for each depth value #####
#finding depth difference for CRM
sub$CRMDepthDiff <- abs(sub$DepthInMeters - sub$gisCRMDepth)
sub$CRMDepthDiff <- (sub$CRMDepthDiff/sub$gisCRMDepth) * 100
#crm depth flags
crm <- sub %>%
filter(CRMDepthDiff > percrm & gisCRMDepth != "-999") %>%
dplyr::select(CatalogNumber, DatasetID, SampleID, TrackingID, ScientificName, Locality, DepthInMeters, gisEtopoDepth,
gisGEBCODepth, gisCRMDepth, MinimumDepthInMeters, MaximumDepthInMeters,
NavType, LocationAccuracy, LocationComments, Flag, FlagReason, SurveyID,
Vessel, VehicleName, ObservationYear, Latitude, Longitude)
#finding depth difference for Etopo
sub$EtopoDepthDiff <- abs(sub$DepthInMeters - sub$gisEtopoDepth)
sub$EtopoDepthDiff <- (sub$EtopoDepthDiff/sub$gisEtopoDepth) * 100
#etopo depth flags
etopo <- sub %>%
filter(EtopoDepthDiff > perother & gisEtopoDepth != "-999") %>%
dplyr::select(CatalogNumber, DatasetID, SampleID, TrackingID, ScientificName, Locality, DepthInMeters, gisEtopoDepth,
gisGEBCODepth, gisCRMDepth, MinimumDepthInMeters, MaximumDepthInMeters,
NavType, LocationAccuracy, LocationComments, Flag, FlagReason, SurveyID,
Vessel, VehicleName, ObservationYear, Latitude, Longitude)
#finding depth difference for GEBCO
sub$GEBCODepthDiff <- abs(sub$DepthInMeters - sub$gisGEBCODepth)
sub$GEBCODepthDiff <- (sub$GEBCODepthDiff/sub$gisGEBCODepth) * 100
#GEBCO depth flags
gebco <- sub %>%
filter(GEBCODepthDiff > perother & gisGEBCODepth != "-999") %>%
dplyr::select(CatalogNumber, DatasetID, SampleID, TrackingID, ScientificName, Locality, DepthInMeters, gisEtopoDepth,
gisGEBCODepth, gisCRMDepth, MinimumDepthInMeters, MaximumDepthInMeters,
NavType, LocationAccuracy, LocationComments, Flag, FlagReason, SurveyID,
Vessel, VehicleName, ObservationYear, Latitude, Longitude)
#depth flags from reported values
depth <- sub %>%
filter(as.numeric(DepthInMeters) == -999 | as.numeric(DepthInMeters) < 30) %>%
dplyr::select(CatalogNumber, DatasetID, SampleID, TrackingID, ScientificName, Locality, DepthInMeters, gisEtopoDepth,
gisGEBCODepth, gisCRMDepth, MinimumDepthInMeters, MaximumDepthInMeters,
NavType, LocationAccuracy, LocationComments, Flag, FlagReason, SurveyID,
Vessel, VehicleName, ObservationYear, Latitude, Longitude)
##### building joint file of all depth probs and de-dup
dx <- rbind(crm,etopo,gebco,depth)
##### testing for duplicated records #####
dx <- dx[!duplicated(dx), ]
```
## Depth Summary by ScientificName (Corals)
Table is sorted by median depth in ascending order.
``` {r DepthSumByScientificNameCorals, echo=FALSE}
sum_tbl <-
sub %>%
filter(Phylum == "Cnidaria") %>%
group_by(ScientificName) %>%
dplyr::summarize(
min.Depth = min(as.numeric(DepthInMeters)),
median.Depth = median(as.numeric(DepthInMeters)),
max.Depth = max(as.numeric(DepthInMeters)),
n = n()) %>%
arrange(median.Depth)
sum_tbl <- kable(sum_tbl, row.names = F, digits = 0)
sum_tbl
```
## Depth Summary by ScientificName (Sponges)
Table is sorted in alpabetical order.
``` {r DepthSumByScientificNameSponges, echo=FALSE}
sum_tbl <-
sub %>%
filter(Phylum == "Porifera") %>%
group_by(ScientificName) %>%
dplyr::summarize(
min.Depth = min(as.numeric(DepthInMeters)),
median.Depth = median(as.numeric(DepthInMeters)),
max.Depth = max(as.numeric(DepthInMeters)),
n = n()) %>%
arrange(ScientificName)
sum_tbl <- kable(sum_tbl, row.names = F, digits = 0)
sum_tbl
```
## Points > `r percrm` percent different than Coastal Relief Model
Figure will not show points if none meet the criteria.
``` {r Depth_CRM, echo=FALSE, message=FALSE, warning=FALSE,fig.width=7, fig.height=3.5, dpi=300, eval = TRUE, cache=FALSE}
p <- ggplot(crm, aes(DepthInMeters, gisCRMDepth))
p +
geom_point(aes(shape = factor(Flag), size = as.numeric(Flag)), size = 2) +
scale_shape_manual(values=c(1,4)) +
geom_vline(aes(xintercept = 50)) +
geom_hline(aes(yintercept = 50)) +
geom_abline(col = "gray60") +
theme_bw(base_size = 15, base_family = "Cambria")
```
``` {r crmtable, echo=FALSE, eval = F}
# ## Table of Points > `r percrm` percent different than Coastal Relief Model
# Nothing will show here if criteria is not met
sum_tbl <-
crm %>%
#filter(Flag == "1") %>%
group_by(CatalogNumber) %>%
dplyr::summarize(
Scientific_Name = ScientificName,
Depth = DepthInMeters,
Etopo = gisEtopoDepth,
GEBCO = gisGEBCODepth,
CRM = gisCRMDepth,
Flagged = unique(Flag)) %>%
arrange(desc(Flagged))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## Points > `r perother` percent different than ETOPO1
Figure will not show points if none meet the criteria.
``` {r Depth_ETOPO, echo=FALSE, message=FALSE, warning=FALSE,fig.width=7, fig.height=3.5, dpi=300, eval = TRUE, cache=FALSE}
p <- ggplot(etopo, aes(DepthInMeters, gisEtopoDepth))
p +
geom_point(aes(shape = factor(Flag), size = as.numeric(Flag)), size = 2) +
scale_shape_manual(values=c(1,4)) +
geom_vline(aes(xintercept = 50)) +
geom_hline(aes(yintercept = 50)) +
geom_abline(col = "gray60") +
theme_bw(base_size = 15, base_family = "Cambria")
```
``` {r etopotable, echo=FALSE, eval = F}
# ## Table of Points > `r perother` percent different than ETOPO
# Nothing will show here if criteria is not met
sum_tbl <-
etopo %>%
#filter(Phylum == "Porifera") %>%
group_by(CatalogNumber) %>%
dplyr::summarize(
Scientific_Name = ScientificName,
Depth = DepthInMeters,
Etopo = gisEtopoDepth,
GEBCO = gisGEBCODepth,
CRM = gisCRMDepth,
Flagged = unique(Flag)) %>%
arrange(desc(Flagged))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## Points > `r perother` percent different than GEBCO
Figure will not show points if none meet the criteria.
``` {r Depth_GEBCO, echo=FALSE, message=FALSE, warning=FALSE,fig.width=7, fig.height=3.5, dpi=300, eval = TRUE, cache=FALSE}
p <- ggplot(gebco, aes(DepthInMeters, gisGEBCODepth))
p +
geom_point(aes(shape = factor(Flag), size = as.numeric(Flag)), size = 2) +
scale_shape_manual(values=c(1,4)) +
geom_vline(aes(xintercept = 50)) +
geom_hline(aes(yintercept = 50)) +
geom_abline(col = "gray60") +
theme_bw(base_size = 15, base_family = "Cambria")
```
``` {r gebcotable, echo=FALSE, eval = F}
# ## Table of Points > `r perother` percent different than GEBCO
# Nothing will show here if criteria is not met## Table of Points
sum_tbl <-
gebco %>%
#filter(Phylum == "Porifera") %>%
group_by(CatalogNumber) %>%
dplyr::summarize(
Scientific_Name = ScientificName,
Depth = DepthInMeters,
Etopo = gisEtopoDepth,
GEBCO = gisGEBCODepth,
CRM = gisCRMDepth,
Flagged = unique(Flag)) %>%
arrange(desc(Flagged))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
## Points < 30m depth
Figure will not show points if none meet the criteria.
``` {r Depth_Shallow, echo=FALSE, message=FALSE, warning=FALSE,fig.width=7, fig.height=3.5, dpi=300, eval = TRUE, cache=FALSE}
p <- ggplot(depth, aes(DepthInMeters, gisGEBCODepth))
p +
geom_point(aes(shape = factor(Flag), size = as.numeric(Flag)), size = 2) +
scale_shape_manual(values=c(1,4)) +
geom_vline(aes(xintercept = 30)) +
geom_hline(aes(yintercept = 30)) +
geom_abline(col = "gray60") +
theme_bw(base_size = 15, base_family = "Cambria")
```
## Table of Points < 30m depth
Nothing will show here if criteria is not met
``` {r shallowtable, echo=FALSE}
sum_tbl <-
depth %>%
#filter(Phylum == "Porifera") %>%
group_by(CatalogNumber) %>%
dplyr::summarize(
Scientific_Name = ScientificName,
Depth = DepthInMeters,
Etopo = gisEtopoDepth,
GEBCO = gisGEBCODepth,
CRM = gisCRMDepth) %>%
arrange(desc(Depth))
sum_tbl <- kable(sum_tbl, row.names = F, digits = 2)
sum_tbl
```
# Standardized Variable Checks
``` {r standardized_table_checks, echo=FALSE}
Vessel <- setdiff(unique(sub$Vessel), unique(filt$Vessel))
VehicleName <- setdiff(unique(sub$VehicleName), unique(filt$VehicleName))
PI <- setdiff(unique(sub$PI), unique(filt$PI))
PIAffiliation <- setdiff(unique(sub$PIAffiliation), unique(filt$PIAffiliation))
Repository <- setdiff(unique(sub$Repository), unique(filt$Repository))
IdentifiedBy <- setdiff(unique(sub$IdentifiedBy), unique(filt$IdentifiedBy))
IdentificationQualifier <- setdiff(unique(sub$IdentificationQualifier), unique(filt$IdentificationQualifier))
DataProvider <- setdiff(unique(sub$DataProvider), unique(filt$DataProvider))
DatasetID <- setdiff(unique(sub$DatasetID), unique(filt$DatasetID))
SurveyID <- setdiff(unique(sub$SurveyID), unique(filt$SurveyID))
```
If name appears below after variable, then it does not match the standard name. That means it needs to be changed to match, or it is actually a new name that we have not encountered before.
* Vessel: **`r Vessel`**
* VehicleName: **`r VehicleName`**
* PI: **`r PI`**
* PIAffiliation: **`r PIAffiliation`**
* Repository: **`r Repository`**
* IdentifiedBy: **`r IdentifiedBy`**
* IdentificationQualifier: **`r IdentificationQualifier`**
* DataProvider: **`r DataProvider`**
* DatasetID: **`r DatasetID`**
* SurveyID: **`r SurveyID`**
# Compare our current holdings for the same 'Vessel'
``` {r standardized_SurveyID_check, echo=FALSE, cache = FALSE, warning = F}
SurveyIDPattern <- filt %>%
filter(Vessel %in% unique(sub$Vessel)) %>%
group_by(Vessel) %>%
dplyr::summarise(
SurveyIDs = paste(unique(SurveyID), collapse = ' | '),
DatasetIDs = paste(unique(DatasetID), collapse = ' | '),
DataProviders = paste(unique(DataProvider), collapse = ' | '),
Repository = paste(unique(Repository), collapse = ' | '),
#ObservationDates = paste(unique(ObservationDate), collapse = ' | '),
PIs = paste(unique(PI), collapse = ' | '),
DataContacts = paste(unique(DataContact), collapse = ' | '),
#Reporters = paste(unique(Reporter), collapse = ' | ')
)
sum_tbl <- kable(SurveyIDPattern, row.names = F, digits = 2)
sum_tbl
```